From 50f4e54462f9cd3efa4e869a8edf56717f510126 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 8 Feb 2026 15:43:42 +0100 Subject: [PATCH] map_chars: deal invalid chars in paths on windows --- src/borg/archiver/help_cmd.py | 11 ++++++--- src/borg/helpers/fs.py | 33 ++++++++++++++++++++------- src/borg/item.pyx | 4 ++-- src/borg/testsuite/helpers/fs_test.py | 22 ++++++++++++++++++ 4 files changed, 57 insertions(+), 13 deletions(-) diff --git a/src/borg/archiver/help_cmd.py b/src/borg/archiver/help_cmd.py index 32523e30e..a73ef7e90 100644 --- a/src/borg/archiver/help_cmd.py +++ b/src/borg/archiver/help_cmd.py @@ -109,9 +109,14 @@ class HelpMixIn: **Windows path handling**: All paths in Borg archives use forward slashes (``/``) as path separators, regardless of the platform. When creating archives on Windows, backslashes from filesystem paths are automatically converted to forward slashes. - When extracting archives created on POSIX systems that contain literal backslashes - in filenames (which is rare, but possible), the backslash character is replaced - with ``%`` on Windows to prevent misinterpretation as a path separator. + + .. note:: + + **Windows reserved characters**: On Windows, when extracting archives created on + POSIX systems, paths may contain characters that are reserved from being used in + file or directory names (like: ``< > : " \\ | ? *``). + These are replaced by characters in the unicode private use area (``U+F0xx``) like + the CIFS mapchars feature also does it. It won't be pretty, but at least it works. Exclusions can be passed via the command line option ``--exclude``. When used from within a shell, the patterns should be quoted to protect them from diff --git a/src/borg/helpers/fs.py b/src/borg/helpers/fs.py index fa95dbf24..8a47b1bd1 100644 --- a/src/borg/helpers/fs.py +++ b/src/borg/helpers/fs.py @@ -252,7 +252,7 @@ def make_path_safe(path): if "\\.." in path or "..\\" in path: raise ValueError(f"unexpected '..' element in path {path!r}") - path = percentify(path) + path = map_chars(path) path = path.lstrip("/") if path.startswith("../") or "/../" in path or path.endswith("/..") or path == "..": @@ -270,15 +270,32 @@ def slashify(path): return path.replace("\\", "/") if is_win32 else path -def percentify(path): - """ - Replace backslashes with percent signs if running on Windows. +# Bijective mapping to Unicode Private Use Area (like cifs mapchars) +WINDOWS_MAP_CHARS = str.maketrans( + { + "<": "\uF03C", + ">": "\uF03E", + ":": "\uF03A", + '"': "\uF022", + "\\": "\uF05C", + "|": "\uF07C", + "?": "\uF03F", + "*": "\uF02A", + } +) - Use case: if an archived path contains backslashes (which is not a path separator on POSIX - and could appear as a normal character in POSIX paths), we need to replace them with percent - signs to make the path usable on Windows. + +def map_chars(path): """ - return path.replace("\\", "%") if is_win32 else path + Map reserved characters if running on Windows. + + Use case: if an archived path contains reserved characters (that are not reserved on POSIX) + we need to replace them with replacements to make the path usable on Windows. + """ + if not is_win32: + return path + + return path.translate(WINDOWS_MAP_CHARS) def get_strip_prefix(path): diff --git a/src/borg/item.pyx b/src/borg/item.pyx index 2ee622864..7b1011659 100644 --- a/src/borg/item.pyx +++ b/src/borg/item.pyx @@ -7,7 +7,7 @@ from cpython.bytes cimport PyBytes_AsStringAndSize from .constants import ITEM_KEYS, ARCHIVE_KEYS from .helpers import StableDict from .helpers import format_file_size -from .helpers.fs import assert_sanitized_path, to_sanitized_path, percentify, slashify +from .helpers.fs import assert_sanitized_path, to_sanitized_path, map_chars, slashify from .helpers.msgpack import timestamp_to_int, int_to_timestamp, Timestamp from .helpers.time import OutputTimestamp, safe_timestamp @@ -265,7 +265,7 @@ cdef class Item(PropDict): path = PropDictProperty(str, 'surrogate-escaped str', encode=assert_sanitized_path, decode=to_sanitized_path) source = PropDictProperty(str, 'surrogate-escaped str') # legacy borg 1.x. borg 2: see .target - target = PropDictProperty(str, 'surrogate-escaped str', encode=slashify, decode=percentify) + target = PropDictProperty(str, 'surrogate-escaped str', encode=slashify, decode=map_chars) user = PropDictProperty(str, 'surrogate-escaped str') group = PropDictProperty(str, 'surrogate-escaped str') diff --git a/src/borg/testsuite/helpers/fs_test.py b/src/borg/testsuite/helpers/fs_test.py index ae6dd71c9..cf089698e 100644 --- a/src/borg/testsuite/helpers/fs_test.py +++ b/src/borg/testsuite/helpers/fs_test.py @@ -20,6 +20,7 @@ from ...helpers.fs import ( safe_unlink, remove_dotdot_prefixes, make_path_safe, + map_chars, ) from ...platform import is_win32, is_darwin, is_haiku from .. import are_hardlinks_supported @@ -441,3 +442,24 @@ def test_dir_is_tagged(tmpdir): assert dir_is_tagged(dir_fd=fd, exclude_caches=True, exclude_if_present=[".NOBACKUP"]) == [".NOBACKUP"] with open_dir(str(normal_dir)) as fd: assert dir_is_tagged(dir_fd=fd, exclude_caches=True, exclude_if_present=[".NOBACKUP"]) == [] + + +def test_map_chars(monkeypatch): + # Test behavior on non-Windows (should return path unchanged) + monkeypatch.setattr("borg.helpers.fs.is_win32", False) + assert map_chars("foo/bar") == "foo/bar" + assert map_chars("foo\\bar") == "foo\\bar" + assert map_chars("foo:bar") == "foo:bar" + + # Test behavior on Windows + monkeypatch.setattr("borg.helpers.fs.is_win32", True) + + # Reserved characters replacement + assert map_chars("foo:bar") == "foo\uf03abar" + assert map_chars("foobar") == "foo\uf03ebar" + assert map_chars('foo"bar') == "foo\uf022bar" + assert map_chars("foo\\bar") == "foo\uf05cbar" + assert map_chars("foo|bar") == "foo\uf07cbar" + assert map_chars("foo?bar") == "foo\uf03fbar" + assert map_chars("foo*bar") == "foo\uf02abar"