From 5b96d5acc30fec766a076ec367a154497b5d52e4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 23 Jan 2024 18:06:55 +0100 Subject: [PATCH] create: add the slashdot hack, fixes #4685 --- docs/usage/create.rst | 3 ++ src/borg/archive.py | 48 ++++++++++++++++------- src/borg/archiver.py | 71 +++++++++++++++++++++++----------- src/borg/helpers/fs.py | 15 +++++++ src/borg/testsuite/archiver.py | 25 ++++++++++++ 5 files changed, 125 insertions(+), 37 deletions(-) diff --git a/docs/usage/create.rst b/docs/usage/create.rst index 16906d7ee..488169333 100644 --- a/docs/usage/create.rst +++ b/docs/usage/create.rst @@ -10,6 +10,9 @@ Examples # same, but list all files as we process them $ borg create --list /path/to/repo::my-documents ~/Documents + # Backup /mnt/disk/docs, but strip path prefix using the slashdot hack + $ borg create /path/to/repo::docs /mnt/disk/./docs + # Backup ~/Documents and ~/src but exclude pyc files $ borg create /path/to/repo::my-files \ ~/Documents \ diff --git a/src/borg/archive.py b/src/borg/archive.py index 7f7da35a2..4b1bd6dc7 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1299,7 +1299,16 @@ class FilesystemObjectProcessors: self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=sparse) @contextmanager - def create_helper(self, path, st, status=None, hardlinkable=True): + def create_helper(self, path, st, status=None, hardlinkable=True, strip_prefix=None): + if strip_prefix is not None: + assert not path.endswith(os.sep) + if strip_prefix.startswith(path + os.sep): + # still on a directory level that shall be stripped - do not create an item for this! + yield None, 'x', False, False + return + # adjust path, remove stripped directory levels + path = path.removeprefix(strip_prefix) + safe_path = make_path_safe(path) item = Item(path=safe_path) hardlink_master = False @@ -1318,13 +1327,16 @@ class FilesystemObjectProcessors: if hardlink_master: self.hard_links[(st.st_ino, st.st_dev)] = safe_path - def process_dir_with_fd(self, *, path, fd, st): - with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master): - item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) + def process_dir_with_fd(self, *, path, fd, st, strip_prefix): + with self.create_helper(path, st, 'd', hardlinkable=False, strip_prefix=strip_prefix) as (item, status, hardlinked, hardlink_master): + if item is not None: + item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) return status - def process_dir(self, *, path, parent_fd, name, st): - with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master): + def process_dir(self, *, path, parent_fd, name, st, strip_prefix): + with self.create_helper(path, st, 'd', hardlinkable=False, strip_prefix=strip_prefix) as (item, status, hardlinked, hardlink_master): + if item is None: + return status with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir, noatime=True, op='dir_open') as fd: # fd is None for directories on windows, in that case a race condition check is not possible. @@ -1334,8 +1346,10 @@ class FilesystemObjectProcessors: item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) return status - def process_fifo(self, *, path, parent_fd, name, st): - with self.create_helper(path, st, 'f') as (item, status, hardlinked, hardlink_master): # fifo + def process_fifo(self, *, path, parent_fd, name, st, strip_prefix): + with self.create_helper(path, st, 'f', strip_prefix=strip_prefix) as (item, status, hardlinked, hardlink_master): # fifo + if item is None: + return status with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd: with backup_io('fstat'): st = stat_update_check(st, os.fstat(fd)) @@ -1344,9 +1358,11 @@ class FilesystemObjectProcessors: item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) return status - def process_dev(self, *, path, parent_fd, name, st, dev_type): - with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hardlink_master): # char/block device + def process_dev(self, *, path, parent_fd, name, st, dev_type, strip_prefix): + with self.create_helper(path, st, dev_type, strip_prefix=strip_prefix) as (item, status, hardlinked, hardlink_master): # char/block device # looks like we can not work fd-based here without causing issues when trying to open/close the device + if item is None: + return status with backup_io('stat'): st = stat_update_check(st, os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False)) item.rdev = st.st_rdev @@ -1355,11 +1371,13 @@ class FilesystemObjectProcessors: item.update(self.metadata_collector.stat_attrs(st, path)) return status - def process_symlink(self, *, path, parent_fd, name, st): + def process_symlink(self, *, path, parent_fd, name, st, strip_prefix): # note: using hardlinkable=False because we can not support hardlinked symlinks, # due to the dual-use of item.source, see issue #2343: # hardlinked symlinks will be archived [and extracted] as non-hardlinked symlinks. - with self.create_helper(path, st, 's', hardlinkable=False) as (item, status, hardlinked, hardlink_master): + with self.create_helper(path, st, 's', hardlinkable=False, strip_prefix=strip_prefix) as (item, status, hardlinked, hardlink_master): + if item is None: + return status fname = name if name is not None and parent_fd is not None else path with backup_io('readlink'): source = os.readlink(fname, dir_fd=parent_fd) @@ -1392,8 +1410,10 @@ class FilesystemObjectProcessors: self.add_item(item, stats=self.stats) return status - def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal): - with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master): # no status yet + def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal, strip_prefix): + with self.create_helper(path, st, None, strip_prefix=strip_prefix) as (item, status, hardlinked, hardlink_master): # no status yet + if item is None: + return status with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags, noatime=True) as fd: with backup_io('fstat'): st = stat_update_check(st, os.fstat(fd)) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index e670df0fd..d8f6ef344 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -55,7 +55,7 @@ try: from .helpers import safe_encode, remove_surrogates, bin_to_hex, hex_to_bin, prepare_dump_dict, eval_escapes from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS from .helpers import timestamp, utcnow - from .helpers import get_cache_dir, os_stat + from .helpers import get_cache_dir, os_stat, get_strip_prefix from .helpers import Manifest, AI_HUMAN_SORT_KEYS from .helpers import hardlinkable from .helpers import StableDict @@ -565,12 +565,14 @@ class Archiver: pipe_bin = sys.stdin.buffer pipe = TextIOWrapper(pipe_bin, errors='surrogateescape') for path in iter_separated(pipe, paths_sep): + strip_prefix = get_strip_prefix(path) path = os.path.normpath(path) try: with backup_io('stat'): st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False) status = self._process_any(path=path, parent_fd=None, name=None, st=st, fso=fso, - cache=cache, read_special=args.read_special, dry_run=dry_run) + cache=cache, read_special=args.read_special, dry_run=dry_run, + strip_prefix=strip_prefix) except BackupError as e: self.print_warning_instance(BackupWarning(path, e)) status = 'E' @@ -598,6 +600,8 @@ class Archiver: status = '-' self.print_file_status(status, path) continue + + strip_prefix = get_strip_prefix(path) path = os.path.normpath(path) try: with backup_io('stat'): @@ -607,7 +611,8 @@ class Archiver: fso=fso, cache=cache, matcher=matcher, exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present, keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes, - restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run) + restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run, + strip_prefix=strip_prefix) # if we get back here, we've finished recursing into , # we do not ever want to get back in there (even if path is given twice as recursion root) skip_inodes.add((st.st_ino, st.st_dev)) @@ -674,7 +679,7 @@ class Archiver: else: create_inner(None, None, None) - def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run): + def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run, strip_prefix): """ Call the right method on the given FilesystemObjectProcessor. """ @@ -682,12 +687,12 @@ class Archiver: if dry_run: return '-' elif stat.S_ISREG(st.st_mode): - return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache) + return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache, strip_prefix=strip_prefix) elif stat.S_ISDIR(st.st_mode): - return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st) + return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix) elif stat.S_ISLNK(st.st_mode): if not read_special: - return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st) + return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix) else: try: st_target = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=True) @@ -697,27 +702,27 @@ class Archiver: special = is_special(st_target.st_mode) if special: return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st_target, - cache=cache, flags=flags_special_follow) + cache=cache, flags=flags_special_follow, strip_prefix=strip_prefix) else: - return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st) + return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix) elif stat.S_ISFIFO(st.st_mode): if not read_special: - return fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st) + return fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st, strip_prefix=strip_prefix) else: return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, - cache=cache, flags=flags_special) + cache=cache, flags=flags_special, strip_prefix=strip_prefix) elif stat.S_ISCHR(st.st_mode): if not read_special: - return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='c') + return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='c', strip_prefix=strip_prefix) else: return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, - cache=cache, flags=flags_special) + cache=cache, flags=flags_special, strip_prefix=strip_prefix) elif stat.S_ISBLK(st.st_mode): if not read_special: - return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='b') + return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='b', strip_prefix=strip_prefix) else: return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, - cache=cache, flags=flags_special) + cache=cache, flags=flags_special, strip_prefix=strip_prefix) elif stat.S_ISSOCK(st.st_mode): # Ignore unix sockets return @@ -733,7 +738,7 @@ class Archiver: def _rec_walk(self, *, path, parent_fd, name, fso, cache, matcher, exclude_caches, exclude_if_present, keep_exclude_tags, - skip_inodes, restrict_dev, read_special, dry_run): + skip_inodes, restrict_dev, read_special, dry_run, strip_prefix): """ Process *path* (or, preferably, parent_fd/name) recursively according to the various parameters. @@ -781,7 +786,7 @@ class Archiver: # directories cannot go in this branch because they can be excluded based on tag # files they might contain status = self._process_any(path=path, parent_fd=parent_fd, name=name, st=st, fso=fso, cache=cache, - read_special=read_special, dry_run=dry_run) + read_special=read_special, dry_run=dry_run, strip_prefix=strip_prefix) else: with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir, noatime=True, op='dir_open') as child_fd: @@ -797,19 +802,19 @@ class Archiver: if not recurse_excluded_dir: if keep_exclude_tags: if not dry_run: - fso.process_dir_with_fd(path=path, fd=child_fd, st=st) + fso.process_dir_with_fd(path=path, fd=child_fd, st=st, strip_prefix=strip_prefix) for tag_name in tag_names: tag_path = os.path.join(path, tag_name) self._rec_walk( path=tag_path, parent_fd=child_fd, name=tag_name, fso=fso, cache=cache, matcher=matcher, exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, - restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run) + restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run, strip_prefix=strip_prefix) self.print_file_status('x', path) return if not recurse_excluded_dir: if not dry_run: - status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st) + status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st, strip_prefix=strip_prefix) else: status = '-' if recurse: @@ -821,7 +826,7 @@ class Archiver: path=normpath, parent_fd=child_fd, name=dirent.name, fso=fso, cache=cache, matcher=matcher, exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, restrict_dev=restrict_dev, - read_special=read_special, dry_run=dry_run) + read_special=read_special, dry_run=dry_run, strip_prefix=strip_prefix) except BackupError as e: self.print_warning_instance(BackupWarning(path, e)) status = 'E' @@ -3391,6 +3396,11 @@ class Archiver: that means if relative paths are desired, the command has to be run from the correct directory. + The slashdot hack in paths (recursion roots) is triggered by using ``/./``: + ``/this/gets/stripped/./this/gets/archived`` means to process that fs object, but + strip the prefix on the left side of ``./`` from the archived items (in this case, + ``this/gets/archived`` will be the path in the archived item). + When giving '-' as path, borg will read data from standard input and create a file 'stdin' in the created archive from that data. In some cases it's more appropriate to use --content-from-command, however. See section *Reading from @@ -3530,8 +3540,8 @@ class Archiver: - 'x' = excluded, item was *not* backed up - '?' = missing status code (if you see this, please file a bug report!) - Reading from stdin - ++++++++++++++++++ + Reading backup data from stdin + ++++++++++++++++++++++++++++++ There are two methods to read from stdin. Either specify ``-`` as path and pipe directly to borg:: @@ -3562,6 +3572,21 @@ class Archiver: By default, the content read from stdin is stored in a file called 'stdin'. Use ``--stdin-name`` to change the name. + + Feeding all file paths from externally + ++++++++++++++++++++++++++++++++++++++ + + Usually, you give a starting path (recursion root) to borg and then borg + automatically recurses, finds and backs up all fs objects contained in + there (optionally considering include/exclude rules). + + If you need more control and you want to give every single fs object path + to borg (maybe implementing your own recursion or your own rules), you can use + ``--paths-from-stdin`` or ``--paths-from-command`` (with the latter, borg will + fail to create an archive should the command fail). + + Borg supports paths with the slashdot hack to strip path prefixes here also. + So, be careful not to unintentionally trigger that. """) subparser = subparsers.add_parser('create', parents=[common_parser], add_help=False, diff --git a/src/borg/helpers/fs.py b/src/borg/helpers/fs.py index acb16dd83..a880bb0fd 100644 --- a/src/borg/helpers/fs.py +++ b/src/borg/helpers/fs.py @@ -162,6 +162,21 @@ def make_path_safe(path): return _safe_re.sub('', path) or '.' +def get_strip_prefix(path): + # similar to how rsync does it, we allow users to give paths like: + # /this/gets/stripped/./this/is/kept + # the whole path is what is used to read from the fs, + # the strip_prefix will be /this/gets/stripped/ and + # this/is/kept is the path being archived. + pos = path.find('/./') # detect slashdot hack + if pos > 0: + # found a prefix to strip! make sure it ends with one "/"! + return os.path.normpath(path[:pos]) + os.sep + else: + # no or empty prefix, nothing to strip! + return None + + def hardlinkable(mode): """return True if we support hardlinked items of this type""" return stat.S_ISREG(mode) or stat.S_ISBLK(mode) or stat.S_ISCHR(mode) or stat.S_ISFIFO(mode) diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index f16062f9f..3eb5a78ba 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -2172,6 +2172,31 @@ class ArchiverTestCase(ArchiverTestCaseBase): output = self.cmd('list', archive) assert 'input/link -> somewhere does not exist' in output + def test_create_dotslash_hack(self): + os.makedirs(os.path.join(self.input_path, 'first', 'secondA', 'thirdA')) + os.makedirs(os.path.join(self.input_path, 'first', 'secondB', 'thirdB')) + self.cmd('init', '--encryption=none', self.repository_location) + archive = self.repository_location + '::test' + self.cmd('create', archive, 'input/first/./') # hack! + output = self.cmd('list', archive) + # dir levels left of slashdot (= input, first) not in archive: + assert 'input' not in output + assert 'input/first' not in output + assert 'input/first/secondA' not in output + assert 'input/first/secondA/thirdA' not in output + assert 'input/first/secondB' not in output + assert 'input/first/secondB/thirdB' not in output + assert 'first' not in output + assert 'first/secondA' not in output + assert 'first/secondA/thirdA' not in output + assert 'first/secondB' not in output + assert 'first/secondB/thirdB' not in output + # dir levels right of slashdot are in archive: + assert 'secondA' in output + assert 'secondA/thirdA' in output + assert 'secondB' in output + assert 'secondB/thirdB' in output + # def test_cmdline_compatibility(self): # self.create_regular_file('file1', size=1024 * 80) # self.cmd('init', '--encryption=repokey', self.repository_location)