map_chars: deal invalid chars in paths on windows

This commit is contained in:
Thomas Waldmann 2026-02-08 15:43:42 +01:00
parent d6d5ce5cb0
commit 50f4e54462
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
4 changed files with 57 additions and 13 deletions

View file

@ -109,9 +109,14 @@ class HelpMixIn:
**Windows path handling**: All paths in Borg archives use forward slashes (``/``)
as path separators, regardless of the platform. When creating archives on Windows,
backslashes from filesystem paths are automatically converted to forward slashes.
When extracting archives created on POSIX systems that contain literal backslashes
in filenames (which is rare, but possible), the backslash character is replaced
with ``%`` on Windows to prevent misinterpretation as a path separator.
.. note::
**Windows reserved characters**: On Windows, when extracting archives created on
POSIX systems, paths may contain characters that are reserved from being used in
file or directory names (like: ``< > : " \\ | ? *``).
These are replaced by characters in the unicode private use area (``U+F0xx``) like
the CIFS mapchars feature also does it. It won't be pretty, but at least it works.
Exclusions can be passed via the command line option ``--exclude``. When used
from within a shell, the patterns should be quoted to protect them from

View file

@ -252,7 +252,7 @@ def make_path_safe(path):
if "\\.." in path or "..\\" in path:
raise ValueError(f"unexpected '..' element in path {path!r}")
path = percentify(path)
path = map_chars(path)
path = path.lstrip("/")
if path.startswith("../") or "/../" in path or path.endswith("/..") or path == "..":
@ -270,15 +270,32 @@ def slashify(path):
return path.replace("\\", "/") if is_win32 else path
def percentify(path):
"""
Replace backslashes with percent signs if running on Windows.
# Bijective mapping to Unicode Private Use Area (like cifs mapchars)
WINDOWS_MAP_CHARS = str.maketrans(
{
"<": "\uF03C",
">": "\uF03E",
":": "\uF03A",
'"': "\uF022",
"\\": "\uF05C",
"|": "\uF07C",
"?": "\uF03F",
"*": "\uF02A",
}
)
Use case: if an archived path contains backslashes (which is not a path separator on POSIX
and could appear as a normal character in POSIX paths), we need to replace them with percent
signs to make the path usable on Windows.
def map_chars(path):
"""
return path.replace("\\", "%") if is_win32 else path
Map reserved characters if running on Windows.
Use case: if an archived path contains reserved characters (that are not reserved on POSIX)
we need to replace them with replacements to make the path usable on Windows.
"""
if not is_win32:
return path
return path.translate(WINDOWS_MAP_CHARS)
def get_strip_prefix(path):

View file

@ -7,7 +7,7 @@ from cpython.bytes cimport PyBytes_AsStringAndSize
from .constants import ITEM_KEYS, ARCHIVE_KEYS
from .helpers import StableDict
from .helpers import format_file_size
from .helpers.fs import assert_sanitized_path, to_sanitized_path, percentify, slashify
from .helpers.fs import assert_sanitized_path, to_sanitized_path, map_chars, slashify
from .helpers.msgpack import timestamp_to_int, int_to_timestamp, Timestamp
from .helpers.time import OutputTimestamp, safe_timestamp
@ -265,7 +265,7 @@ cdef class Item(PropDict):
path = PropDictProperty(str, 'surrogate-escaped str', encode=assert_sanitized_path, decode=to_sanitized_path)
source = PropDictProperty(str, 'surrogate-escaped str') # legacy borg 1.x. borg 2: see .target
target = PropDictProperty(str, 'surrogate-escaped str', encode=slashify, decode=percentify)
target = PropDictProperty(str, 'surrogate-escaped str', encode=slashify, decode=map_chars)
user = PropDictProperty(str, 'surrogate-escaped str')
group = PropDictProperty(str, 'surrogate-escaped str')

View file

@ -20,6 +20,7 @@ from ...helpers.fs import (
safe_unlink,
remove_dotdot_prefixes,
make_path_safe,
map_chars,
)
from ...platform import is_win32, is_darwin, is_haiku
from .. import are_hardlinks_supported
@ -441,3 +442,24 @@ def test_dir_is_tagged(tmpdir):
assert dir_is_tagged(dir_fd=fd, exclude_caches=True, exclude_if_present=[".NOBACKUP"]) == [".NOBACKUP"]
with open_dir(str(normal_dir)) as fd:
assert dir_is_tagged(dir_fd=fd, exclude_caches=True, exclude_if_present=[".NOBACKUP"]) == []
def test_map_chars(monkeypatch):
# Test behavior on non-Windows (should return path unchanged)
monkeypatch.setattr("borg.helpers.fs.is_win32", False)
assert map_chars("foo/bar") == "foo/bar"
assert map_chars("foo\\bar") == "foo\\bar"
assert map_chars("foo:bar") == "foo:bar"
# Test behavior on Windows
monkeypatch.setattr("borg.helpers.fs.is_win32", True)
# Reserved characters replacement
assert map_chars("foo:bar") == "foo\uf03abar"
assert map_chars("foo<bar") == "foo\uf03cbar"
assert map_chars("foo>bar") == "foo\uf03ebar"
assert map_chars('foo"bar') == "foo\uf022bar"
assert map_chars("foo\\bar") == "foo\uf05cbar"
assert map_chars("foo|bar") == "foo\uf07cbar"
assert map_chars("foo?bar") == "foo\uf03fbar"
assert map_chars("foo*bar") == "foo\uf02abar"