diff --git a/scripts/shell_completions/fish/borg.fish b/scripts/shell_completions/fish/borg.fish index 423ac6e2d..1da1b1a21 100644 --- a/scripts/shell_completions/fish/borg.fish +++ b/scripts/shell_completions/fish/borg.fish @@ -122,6 +122,7 @@ complete -c borg -f -l 'exclude-caches' -d 'Exclude directories tagg complete -c borg -l 'exclude-if-present' -d 'Exclude directories that contain FILENAME' -n "__fish_seen_subcommand_from create" complete -c borg -f -l 'keep-exclude-tags' -d 'Keep tag files of excluded directories' -n "__fish_seen_subcommand_from create" complete -c borg -f -l 'exclude-nodump' -d 'Exclude files flagged NODUMP' -n "__fish_seen_subcommand_from create" +complete -c borg -f -l 'exclude-dataless' -d 'Exclude files flagged DATALESS (macOS)' -n "__fish_seen_subcommand_from create" # Filesystem options complete -c borg -f -s x -l 'one-file-system' -d 'Stay in the same file system' -n "__fish_seen_subcommand_from create" complete -c borg -f -l 'numeric-ids' -d 'Only store numeric user:group identifiers' -n "__fish_seen_subcommand_from create" diff --git a/src/borg/archive.py b/src/borg/archive.py index ad07fec7c..063bfa513 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -53,6 +53,11 @@ from .platform import acl_get, acl_set, set_flags, get_flags, swidth from .repository import Repository, NoManifestError from .repoobj import RepoObj +# macOS: SF_DATALESS marks dataless placeholder files (e.g. cloud files not materialized locally). +# Reading such files triggers downloading their content. stat.SF_DATALESS is only available +# from Python 3.13 on, thus we fall back to the value from macOS' sys/stat.h. +SF_DATALESS = getattr(stat, "SF_DATALESS", 0x40000000) + has_link = hasattr(os, "link") diff --git a/src/borg/archiver/create_cmd.py b/src/borg/archiver/create_cmd.py index 9bf4d4f68..a2087ff1a 100644 --- a/src/borg/archiver/create_cmd.py +++ b/src/borg/archiver/create_cmd.py @@ -10,7 +10,7 @@ from io import TextIOWrapper from ._common import with_repository, Highlander from .. import helpers -from ..archive import Archive, is_special +from ..archive import Archive, is_special, SF_DATALESS from ..archive import BackupError, BackupOSError, BackupItemExcluded, backup_io, OsOpen, stat_update_check from ..archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor from ..cache import Cache @@ -32,7 +32,7 @@ from ..helpers import Error, CommandError, BackupWarning, FileChangedWarning from ..helpers.argparsing import ArgumentParser from ..manifest import Manifest from ..patterns import PatternMatcher -from ..platform import is_win32 +from ..platform import is_win32, get_flags from ..logger import create_logger @@ -225,6 +225,7 @@ class CreateMixIn: self.noflags = args.noflags self.noacls = args.noacls self.noxattrs = args.noxattrs + self.exclude_dataless = args.exclude_dataless dry_run = args.dry_run self.start_backup = time.time_ns() t0 = archive_ts_now() @@ -476,6 +477,15 @@ class CreateMixIn: # directory of the mounted filesystem that shadows the mountpoint dir). recurse = restrict_dev is None or st.st_dev == restrict_dev + if self.exclude_dataless: + # this needs to be done BEFORE opening the file, as opening + # would otherwise materialize the file contents. + with backup_io("flags"): + flags = get_flags(path=path, st=st) + if flags & SF_DATALESS: + self.print_file_status("x", path) + return + if not stat.S_ISDIR(st.st_mode): # directories cannot go in this branch because they can be excluded based on tag # files they might contain @@ -886,7 +896,14 @@ class CreateMixIn: help="set path delimiter for ``--paths-from-stdin`` and ``--paths-from-command`` (default: ``\\n``) ", ) - define_exclusion_group(subparser, tag_files=True) + exclude_group = define_exclusion_group(subparser, tag_files=True) + exclude_group.add_argument( + "--exclude-dataless", + dest="exclude_dataless", + action="store_true", + help="exclude files flagged DATALESS (macOS: placeholder files whose content " + "is not materialized locally, e.g. not-downloaded cloud storage files)", + ) fs_group = subparser.add_argument_group("Filesystem options") fs_group.add_argument( diff --git a/src/borg/testsuite/archiver/create_cmd_test.py b/src/borg/testsuite/archiver/create_cmd_test.py index 07db6ee51..a65d9f999 100644 --- a/src/borg/testsuite/archiver/create_cmd_test.py +++ b/src/borg/testsuite/archiver/create_cmd_test.py @@ -1172,6 +1172,33 @@ def test_create_with_compression_algorithms(archivers, request): assert_dirs_equal(archiver.input_path, os.path.join(extract_path, "input")) +def test_create_exclude_dataless(archivers, request, monkeypatch): + """Files flagged SF_DATALESS are excluded with --exclude-dataless.""" + from ...archive import SF_DATALESS + import borg.archiver.create_cmd as create_cmd_module + + archiver = request.getfixturevalue(archivers) + if archiver.EXE: + pytest.skip("Skipping binary test due to patch objects") + create_regular_file(archiver.input_path, "file1", size=1024 * 80) + create_regular_file(archiver.input_path, "cloudfile", size=1024 * 80) + + # SF_DATALESS cannot be set from userspace, so fake the flags lookup. + def fake_get_flags(path, st, fd=None): + return SF_DATALESS if path.endswith("cloudfile") else 0 + + cmd(archiver, "repo-create", RK_ENCRYPTION) + + monkeypatch.setattr(create_cmd_module, "get_flags", fake_get_flags) + output = cmd(archiver, "create", "--list", "--exclude-dataless", "test", "input") + assert "A input/file1" in output + assert "x input/cloudfile" in output + + # without --exclude-dataless, the file is backed up + output = cmd(archiver, "create", "--list", "test2", "input") + assert "A input/cloudfile" in output + + def test_exclude_nodump_dir_with_file(archivers, request): """A directory flagged NODUMP and its contents must not be archived.""" archiver = request.getfixturevalue(archivers)