Improve Windows path handling with forward slash standardization

This commit implements a comprehensive approach to Windows path compatibility
by standardizing on forward slashes (/) for all internal path representations
while maintaining cross-platform archive compatibility.

Core Strategy:
- All internal paths now use forward slashes as separators on all platforms
- Boundary normalization: backslashes converted to forward slashes at entry
  points on Windows (filesystem paths only, not user patterns)
- Literal backslashes from POSIX archives replaced with % on Windows extraction

Key Changes:

Path Handling (helpers/fs.py):
- Added slashify(): converts backslashes to forward slashes on Windows
- Added percentify(): replaces backslashes with % for POSIX-to-Windows extraction
- Updated make_path_safe() to check for Windows-style .. patterns
- Changed get_strip_prefix() to use posixpath.normpath instead of os.path.normpath
- Updated remove_dotdot_prefixes() to use forward slashes consistently

Pattern Matching (patterns.py):
- Replaced os.path with posixpath throughout for consistent separator handling
- Updated PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
- All pattern matching now uses / as separator regardless of platform
- Removed platform-specific os.sep usage

Archive Operations (archive.py, item.pyx):
- Applied slashify() to paths during archive creation on Windows
- Added percentify/slashify encoding/decoding for symlink targets
- Ensures archived paths always use forward slashes

Command Line (archiver/create_cmd.py, extract_cmd.py):
- Replaced os.path.join/normpath with posixpath equivalents
- Added slashify() for stdin-provided paths on Windows
- Updated strip_components to use / separator
- Changed PathSpec to FilesystemPathSpec for proper path handling

Repository (repository.py, legacyrepository.py):
- Replaced custom _local_abspath_to_file_url() with Path.as_uri()

Documentation (archiver/help_cmd.py):
- Clarified that all archived paths use forward slashes
- Added note about Windows absolute paths in archives (e.g., C/Windows/System32)
- Documented backslash-to-percent replacement for POSIX archives on Windows

Impact:
- Windows users can now create and extract archives with consistent path handling
- Cross-platform archives remain compatible
- Pattern matching works identically on all platforms
This commit is contained in:
Thomas Waldmann 2026-01-25 11:01:25 +01:00
parent 0feb1da03f
commit 88581d1bb8
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
13 changed files with 109 additions and 77 deletions

View file

@ -2,6 +2,7 @@ import base64
import errno
import json
import os
import posixpath
import stat
import sys
import time
@ -1243,8 +1244,8 @@ class FilesystemObjectProcessors:
@contextmanager
def create_helper(self, path, st, status=None, hardlinkable=True, strip_prefix=None):
if strip_prefix is not None:
assert not path.endswith(os.sep)
if strip_prefix.startswith(path + os.sep):
assert not path.endswith("/")
if strip_prefix.startswith(path + "/"):
# still on a directory level that shall be stripped - do not create an item for this!
yield None, "x", False, None
return
@ -1547,7 +1548,7 @@ class TarfileObjectProcessors:
# if the tar has names starting with "./", normalize them like borg create also does.
# ./dir/file must become dir/file in the borg archive.
normalized_path = os.path.normpath(tarinfo.name)
normalized_path = posixpath.normpath(tarinfo.name)
item = Item(
path=make_path_safe(normalized_path),
mode=tarinfo.mode | type,
@ -1608,7 +1609,7 @@ class TarfileObjectProcessors:
def process_hardlink(self, *, tarinfo, status, type):
with self.create_helper(tarinfo, status, type) as (item, status):
# create a not hardlinked borg item, reusing the chunks, see HardLinkManager.__doc__
normalized_path = os.path.normpath(tarinfo.linkname)
normalized_path = posixpath.normpath(tarinfo.linkname)
safe_path = make_path_safe(normalized_path)
chunks = self.hlm.retrieve(safe_path)
if chunks is not None:

View file

@ -3,6 +3,7 @@ import sys
import argparse
import logging
import os
import posixpath
import stat
import subprocess
import time
@ -16,11 +17,11 @@ from ..archive import FilesystemObjectProcessors, MetadataCollector, ChunksProce
from ..cache import Cache
from ..constants import * # NOQA
from ..compress import CompressionSpec
from ..helpers import comment_validator, ChunkerParams, PathSpec
from ..helpers import comment_validator, ChunkerParams, FilesystemPathSpec
from ..helpers import archivename_validator, FilesCacheMode
from ..helpers import eval_escapes
from ..helpers import timestamp, archive_ts_now
from ..helpers import get_cache_dir, os_stat, get_strip_prefix
from ..helpers import get_cache_dir, os_stat, get_strip_prefix, slashify
from ..helpers import dir_is_tagged
from ..helpers import log_multi
from ..helpers import basic_json_data, json_print
@ -106,8 +107,9 @@ class CreateMixIn:
pipe_bin = sys.stdin.buffer
pipe = TextIOWrapper(pipe_bin, errors="surrogateescape")
for path in iter_separated(pipe, paths_sep):
path = slashify(path)
strip_prefix = get_strip_prefix(path)
path = os.path.normpath(path)
path = posixpath.normpath(path)
try:
with backup_io("stat"):
st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False)
@ -160,7 +162,7 @@ class CreateMixIn:
continue
strip_prefix = get_strip_prefix(path)
path = os.path.normpath(path)
path = posixpath.normpath(path)
try:
with backup_io("stat"):
st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False)
@ -489,7 +491,7 @@ class CreateMixIn:
path=path, fd=child_fd, st=st, strip_prefix=strip_prefix
)
for tag_name in tag_names:
tag_path = os.path.join(path, tag_name)
tag_path = posixpath.join(path, tag_name)
self._rec_walk(
path=tag_path,
parent_fd=child_fd,
@ -523,7 +525,7 @@ class CreateMixIn:
with backup_io("scandir"):
entries = helpers.scandir_inorder(path=path, fd=child_fd)
for dirent in entries:
normpath = os.path.normpath(os.path.join(path, dirent.name))
normpath = posixpath.normpath(posixpath.join(path, dirent.name))
self._rec_walk(
path=normpath,
parent_fd=child_fd,
@ -962,5 +964,5 @@ class CreateMixIn:
subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name")
subparser.add_argument(
"paths", metavar="PATH", nargs="*", type=PathSpec, action="extend", help="paths to archive"
"paths", metavar="PATH", nargs="*", type=FilesystemPathSpec, action="extend", help="paths to archive"
)

View file

@ -1,7 +1,6 @@
import sys
import argparse
import logging
import os
import stat
from ._common import with_repository, with_archive
@ -60,7 +59,7 @@ class ExtractMixIn:
for item in archive.iter_items():
orig_path = item.path
if strip_components:
stripped_path = os.sep.join(orig_path.split(os.sep)[strip_components:])
stripped_path = "/".join(orig_path.split("/")[strip_components:])
if not stripped_path:
continue
item.path = stripped_path

View file

@ -35,11 +35,14 @@ class HelpMixIn:
start with ``src``.
- When you back up relative paths like ``../../src``, the archived paths
start with ``src``.
- On native Windows, archived absolute paths look like ``C/Windows/System32``.
Borg supports different pattern styles. To define a non-default
style for a specific pattern, prefix it with two characters followed
by a colon ':' (i.e. ``fm:path/*``, ``sh:path/**``).
Note: Windows users must only use forward slashes in patterns, not backslashes.
The default pattern style for ``--exclude`` differs from ``--pattern``, see below.
`Fnmatch <https://docs.python.org/3/library/fnmatch.html>`_, selector ``fm:``
@ -48,8 +51,8 @@ class HelpMixIn:
any number of characters, '?' matching any single character, '[...]'
matching any single character specified, including ranges, and '[!...]'
matching any character not specified. For the purpose of these patterns,
the path separator (backslash for Windows and '/' on other systems) is not
treated specially. Wrap meta-characters in brackets for a literal
the path separator (forward slash '/') is not treated specially.
Wrap meta-characters in brackets for a literal
match (i.e. ``[?]`` to match the literal character '?'). For a path
to match a pattern, the full path must match, or it must match
from the start of the full path to just before a path separator. Except
@ -69,9 +72,7 @@ class HelpMixIn:
`Regular expressions <https://docs.python.org/3/library/re.html>`_, selector ``re:``
Unlike shell patterns, regular expressions are not required to match the full
path and any substring match is sufficient. It is strongly recommended to
anchor patterns to the start ('^'), to the end ('$') or both. Path
separators (backslash for Windows and '/' on other systems) in paths are
always normalized to a forward slash '/' before applying a pattern.
anchor patterns to the start ('^'), to the end ('$') or both.
Path prefix, selector ``pp:``
This pattern style is useful to match whole subdirectories. The pattern
@ -103,6 +104,15 @@ class HelpMixIn:
cannot supply ``re:`` patterns. Further, ensure that ``sh:`` and
``fm:`` patterns only contain a handful of wildcards at most.
.. note::
**Windows path handling**: All paths in Borg archives use forward slashes (``/``)
as path separators, regardless of the platform. When creating archives on Windows,
backslashes from filesystem paths are automatically converted to forward slashes.
When extracting archives created on POSIX systems that contain literal backslashes
in filenames (which is rare, but possible), the backslash character is replaced
with ``%`` on Windows to prevent misinterpretation as a path separator.
Exclusions can be passed via the command line option ``--exclude``. When used
from within a shell, the patterns should be quoted to protect them from
expansion.

View file

@ -20,7 +20,7 @@ from .errors import BackupPermissionError, BackupIOError, BackupFileNotFoundErro
from .fs import ensure_dir, join_base_dir, get_socket_filename
from .fs import get_security_dir, get_keys_dir, get_base_dir, get_cache_dir, get_config_dir, get_runtime_dir
from .fs import dir_is_tagged, dir_is_cachedir, remove_dotdot_prefixes, make_path_safe, scandir_inorder
from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, get_strip_prefix, umount
from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, get_strip_prefix, umount, slashify
from .fs import O_, flags_dir, flags_special_follow, flags_special, flags_base, flags_normal, flags_noatime
from .fs import HardLinkManager
from .misc import sysinfo, log_multi, consume
@ -28,7 +28,15 @@ from .misc import ChunkIteratorFileWrapper, open_item, chunkit, iter_separated,
from .parseformat import bin_to_hex, hex_to_bin, safe_encode, safe_decode
from .parseformat import text_to_json, binary_to_json, remove_surrogates, join_cmd
from .parseformat import eval_escapes, decode_dict, positive_int_validator, interval
from .parseformat import PathSpec, SortBySpec, ChunkerParams, FilesCacheMode, partial_format, DatetimeWrapper
from .parseformat import (
PathSpec,
FilesystemPathSpec,
SortBySpec,
ChunkerParams,
FilesCacheMode,
partial_format,
DatetimeWrapper,
)
from .parseformat import format_file_size, parse_file_size, FileSize
from .parseformat import sizeof_fmt, sizeof_fmt_iec, sizeof_fmt_decimal, Location, text_validator
from .parseformat import format_line, replace_placeholders, PlaceholderError, relative_time_marker_validator

View file

@ -249,6 +249,11 @@ def make_path_safe(path):
For reasons of security, a ValueError is raised should
`path` contain any '..' elements.
"""
if "\\.." in path or "..\\" in path:
raise ValueError(f"unexpected '..' element in path {path!r}")
path = percentify(path)
path = path.lstrip("/")
if path.startswith("../") or "/../" in path or path.endswith("/..") or path == "..":
raise ValueError(f"unexpected '..' element in path {path!r}")
@ -256,6 +261,26 @@ def make_path_safe(path):
return path
def slashify(path):
"""
Replace backslashes with forward slashes if running on Windows.
Use case: we always want to use forward slashes, even on Windows.
"""
return path.replace("\\", "/") if is_win32 else path
def percentify(path):
"""
Replace backslashes with percent signs if running on Windows.
Use case: if an archived path contains backslashes (which is not a path separator on POSIX
and could appear as a normal character in POSIX paths), we need to replace them with percent
signs to make the path usable on Windows.
"""
return path.replace("\\", "%") if is_win32 else path
def get_strip_prefix(path):
# similar to how rsync does it, we allow users to give paths like:
# /this/gets/stripped/./this/is/kept
@ -265,7 +290,7 @@ def get_strip_prefix(path):
pos = path.find("/./") # detect slashdot hack
if pos > 0:
# found a prefix to strip! make sure it ends with one "/"!
return os.path.normpath(path[:pos]) + os.sep
return posixpath.normpath(path[:pos]) + "/"
else:
# no or empty prefix, nothing to strip!
return None
@ -276,15 +301,14 @@ _dotdot_re = re.compile(r"^(\.\./)+")
def remove_dotdot_prefixes(path):
"""
Remove '../'s at the beginning of `path`. Additionally,
the path is made relative.
Remove '../'s at the beginning of `path`. Additionally, the path is made relative.
`path` is expected to be normalized already (e.g. via `os.path.normpath()`).
`path` is expected to be normalized already (e.g. via `posixpath.normpath()`).
"""
assert "\\" not in path
if is_win32:
if len(path) > 1 and path[1] == ":":
path = path.replace(":", "", 1)
path = path.replace("\\", "/")
path = path.lstrip("/")
path = _dotdot_re.sub("", path)

View file

@ -22,12 +22,13 @@ from ..logger import create_logger
logger = create_logger()
from .errors import Error
from .fs import get_keys_dir, make_path_safe
from .fs import get_keys_dir, make_path_safe, slashify
from .msgpack import Timestamp
from .time import OutputTimestamp, format_time, safe_timestamp
from .. import __version__ as borg_version
from .. import __version_tuple__ as borg_version_tuple
from ..constants import * # NOQA
from ..platformflags import is_win32
if TYPE_CHECKING:
from ..item import ItemDiff
@ -334,6 +335,12 @@ def PathSpec(text):
return text
def FilesystemPathSpec(text):
if not text:
raise argparse.ArgumentTypeError("Empty strings are not accepted as paths.")
return slashify(text)
def SortBySpec(text):
from ..manifest import AI_HUMAN_SORT_KEYS
@ -557,7 +564,8 @@ class Location:
m = self.local_re.match(text)
if m:
self.proto = "file"
self.path = os.path.abspath(os.path.normpath(m.group("path")))
path = m.group("path")
self.path = slashify(os.path.abspath(path)) if is_win32 else os.path.abspath(path)
return True
return False

View file

@ -7,7 +7,7 @@ from cpython.bytes cimport PyBytes_AsStringAndSize
from .constants import ITEM_KEYS, ARCHIVE_KEYS
from .helpers import StableDict
from .helpers import format_file_size
from .helpers.fs import assert_sanitized_path, to_sanitized_path
from .helpers.fs import assert_sanitized_path, to_sanitized_path, percentify, slashify
from .helpers.msgpack import timestamp_to_int, int_to_timestamp, Timestamp
from .helpers.time import OutputTimestamp, safe_timestamp
@ -265,7 +265,7 @@ cdef class Item(PropDict):
path = PropDictProperty(str, 'surrogate-escaped str', encode=assert_sanitized_path, decode=to_sanitized_path)
source = PropDictProperty(str, 'surrogate-escaped str') # legacy borg 1.x. borg 2: see .target
target = PropDictProperty(str, 'surrogate-escaped str')
target = PropDictProperty(str, 'surrogate-escaped str', encode=slashify, decode=percentify)
user = PropDictProperty(str, 'surrogate-escaped str')
group = PropDictProperty(str, 'surrogate-escaped str')

View file

@ -5,6 +5,7 @@ import shutil
import stat
import struct
import time
from pathlib import Path
from collections import defaultdict
from configparser import ConfigParser
from functools import partial
@ -27,7 +28,6 @@ from .platform import SaveFile, SyncFile, sync_dir, safe_fadvise
from .repoobj import RepoObj
from .checksums import crc32, StreamingXXH64
from .crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError
from .repository import _local_abspath_to_file_url
logger = create_logger(__name__)
@ -191,8 +191,9 @@ class LegacyRepository:
exit_mcode = 21
def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True, send_log_cb=None):
self.path = os.path.abspath(path)
self._location = Location(_local_abspath_to_file_url(self.path))
p = Path(path).absolute()
self.path = str(p)
self._location = Location(p.as_uri())
self.version = None
# long-running repository methods which emit log or progress output are responsible for calling
# the ._send_log method periodically to get log and progress output transferred to the borg client

View file

@ -1,6 +1,6 @@
import argparse
import fnmatch
import os.path
import posixpath
import re
import sys
import unicodedata
@ -142,7 +142,7 @@ class PatternMatcher:
in self.fallback is returned (defaults to None).
"""
path = normalize_path(path).lstrip(os.path.sep)
path = normalize_path(path).lstrip("/")
# do a fast lookup for full path matches (note: we do not count such matches):
non_existent = object()
value = self._path_full_patterns.get(path, non_existent)
@ -215,7 +215,7 @@ class PathFullPattern(PatternBase):
PREFIX = "pf"
def _prepare(self, pattern):
self.pattern = os.path.normpath(pattern).lstrip(os.path.sep) # sep at beginning is removed
self.pattern = posixpath.normpath(pattern).lstrip("/") # / at beginning is removed
def _match(self, path):
return path == self.pattern
@ -236,12 +236,10 @@ class PathPrefixPattern(PatternBase):
PREFIX = "pp"
def _prepare(self, pattern):
sep = os.path.sep
self.pattern = (os.path.normpath(pattern).rstrip(sep) + sep).lstrip(sep) # sep at beginning is removed
self.pattern = (posixpath.normpath(pattern).rstrip("/") + "/").lstrip("/") # / at beginning is removed
def _match(self, path):
return (path + os.path.sep).startswith(self.pattern)
return (path + "/").startswith(self.pattern)
class FnmatchPattern(PatternBase):
@ -252,19 +250,19 @@ class FnmatchPattern(PatternBase):
PREFIX = "fm"
def _prepare(self, pattern):
if pattern.endswith(os.path.sep):
pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + "*" + os.path.sep
if pattern.endswith("/"):
pattern = posixpath.normpath(pattern).rstrip("/") + "/*/"
else:
pattern = os.path.normpath(pattern) + os.path.sep + "*"
pattern = posixpath.normpath(pattern) + "/*"
self.pattern = pattern.lstrip(os.path.sep) # sep at beginning is removed
self.pattern = pattern.lstrip("/") # / at beginning is removed
# fnmatch and re.match both cache compiled regular expressions.
# Nevertheless, this is about 10 times faster.
self.regex = re.compile(fnmatch.translate(self.pattern))
def _match(self, path):
return self.regex.match(path + os.path.sep) is not None
return self.regex.match(path + "/") is not None
class ShellPattern(PatternBase):
@ -275,18 +273,16 @@ class ShellPattern(PatternBase):
PREFIX = "sh"
def _prepare(self, pattern):
sep = os.path.sep
if pattern.endswith(sep):
pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep
if pattern.endswith("/"):
pattern = posixpath.normpath(pattern).rstrip("/") + "/**/*/"
else:
pattern = os.path.normpath(pattern) + sep + "**" + sep + "*"
pattern = posixpath.normpath(pattern) + "/**/*"
self.pattern = pattern.lstrip(sep) # sep at beginning is removed
self.pattern = pattern.lstrip("/") # / at beginning is removed
self.regex = re.compile(shellpattern.translate(self.pattern))
def _match(self, path):
return self.regex.match(path + os.path.sep) is not None
return self.regex.match(path + "/") is not None
class RegexPattern(PatternBase):
@ -295,14 +291,11 @@ class RegexPattern(PatternBase):
PREFIX = "re"
def _prepare(self, pattern):
self.pattern = pattern # sep at beginning is NOT removed
self.pattern = pattern # / at beginning is NOT removed
self.regex = re.compile(pattern)
def _match(self, path):
# Normalize path separators
if os.path.sep != "/":
path = path.replace(os.path.sep, "/")
assert "\\" not in path
return self.regex.search(path) is not None

View file

@ -1,6 +1,6 @@
import os
import sys
import time
from pathlib import Path
from borgstore.store import Store
from borgstore.store import ObjectNotFound as StoreObjectNotFound
@ -106,11 +106,11 @@ class Repository:
if isinstance(path_or_location, Location):
location = path_or_location
if location.proto == "file":
url = _local_abspath_to_file_url(location.path)
url = Path(location.path).as_uri()
else:
url = location.processed # location as given by user, processed placeholders
else:
url = _local_abspath_to_file_url(os.path.abspath(path_or_location))
url = Path(path_or_location).absolute().as_uri()
location = Location(url)
self._location = location
self.url = url
@ -566,16 +566,3 @@ class Repository:
def store_move(self, name, new_name=None, *, delete=False, undelete=False, deleted=False):
self._lock_refresh()
return self.store.move(name, new_name, delete=delete, undelete=undelete, deleted=deleted)
def _local_abspath_to_file_url(path: str) -> str:
"""Create a file URL from a local, absolute path.
Expects `path` to be an absolute path on the local filesystem, e.g.:
- POSIX: `/foo/bar`
- Windows: `c:/foo/bar` (or `c:\foo\bar`)
The easiest way to ensure this is for the caller to pass `path` through `os.path.abspath` first.
"""
if sys.platform in ("win32", "msys", "cygwin"):
path = "/" + path.replace("\\", "/")
return "file://%s" % path

View file

@ -2,6 +2,7 @@ import os
import subprocess
import sys
import time
from pathlib import Path
import pytest
@ -9,7 +10,6 @@ from ...constants import * # NOQA
from . import cmd, generate_archiver_tests, RK_ENCRYPTION
from ...helpers import CommandError
from ...platformflags import is_haiku, is_win32
from ...repository import _local_abspath_to_file_url
pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA
@ -24,7 +24,7 @@ def test_break_lock(archivers, request):
def test_with_lock(tmp_path):
repo_path = tmp_path / "repo"
env = os.environ.copy()
env["BORG_REPO"] = _local_abspath_to_file_url(str(repo_path.absolute()))
env["BORG_REPO"] = Path(repo_path).as_uri()
# test debug output:
print("sys.path: %r" % sys.path)
print("PYTHONPATH: %s" % env.get("PYTHONPATH", ""))

View file

@ -1,10 +1,10 @@
import time
from pathlib import Path
import pytest
from borgstore.store import Store
from ..repository import _local_abspath_to_file_url
from ..storelocking import Lock, NotLocked, LockTimeout
ID1 = "foo", 1, 1
@ -13,8 +13,7 @@ ID2 = "bar", 2, 2
@pytest.fixture()
def lockstore(tmp_path):
lockstore_path = tmp_path / "lockstore"
store = Store(_local_abspath_to_file_url(str(lockstore_path.absolute())), levels={"locks/": [0]})
store = Store(Path(tmp_path / "lockstore").as_uri(), levels={"locks/": [0]})
store.create()
with store:
yield store