From f484741fddc4ab612a332328f63bdf55ed6557af Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 11 Jun 2026 00:30:37 +0200 Subject: [PATCH] create: add --exclude-dataless to skip cloud files not materialized locally Ports #9755 (1.4-maint) to master. macOS flags files whose content lives in cloud storage (e.g. iCloud Drive) and is not present locally with SF_DATALESS. Reading such a file triggers downloading its content. This adds --exclude-dataless to borg create: the flags are checked right after stat() and before any open(), so excluded files/dirs never get materialized. Skipped paths are reported with the usual 'x' status, analogous to --exclude-nodump. Notes: - stat.SF_DATALESS only exists from Python 3.13 on, so there is a fallback to the value from macOS' sys/stat.h (0x40000000). - The check covers directories too (they can also be dataless), and we skip before opening/recursing into them. - Test fakes get_flags via monkeypatch since SF_DATALESS cannot be set from userspace. - Fish shell completion updated; bash/zsh completions don't exist in master. Co-Authored-By: Claude Sonnet 4.6 --- scripts/shell_completions/fish/borg.fish | 1 + src/borg/archive.py | 5 ++++ src/borg/archiver/create_cmd.py | 23 +++++++++++++--- .../testsuite/archiver/create_cmd_test.py | 27 +++++++++++++++++++ 4 files changed, 53 insertions(+), 3 deletions(-) diff --git a/scripts/shell_completions/fish/borg.fish b/scripts/shell_completions/fish/borg.fish index 423ac6e2d..1da1b1a21 100644 --- a/scripts/shell_completions/fish/borg.fish +++ b/scripts/shell_completions/fish/borg.fish @@ -122,6 +122,7 @@ complete -c borg -f -l 'exclude-caches' -d 'Exclude directories tagg complete -c borg -l 'exclude-if-present' -d 'Exclude directories that contain FILENAME' -n "__fish_seen_subcommand_from create" complete -c borg -f -l 'keep-exclude-tags' -d 'Keep tag files of excluded directories' -n "__fish_seen_subcommand_from create" complete -c borg -f -l 'exclude-nodump' -d 'Exclude files flagged NODUMP' -n "__fish_seen_subcommand_from create" +complete -c borg -f -l 'exclude-dataless' -d 'Exclude files flagged DATALESS (macOS)' -n "__fish_seen_subcommand_from create" # Filesystem options complete -c borg -f -s x -l 'one-file-system' -d 'Stay in the same file system' -n "__fish_seen_subcommand_from create" complete -c borg -f -l 'numeric-ids' -d 'Only store numeric user:group identifiers' -n "__fish_seen_subcommand_from create" diff --git a/src/borg/archive.py b/src/borg/archive.py index ad07fec7c..063bfa513 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -53,6 +53,11 @@ from .platform import acl_get, acl_set, set_flags, get_flags, swidth from .repository import Repository, NoManifestError from .repoobj import RepoObj +# macOS: SF_DATALESS marks dataless placeholder files (e.g. cloud files not materialized locally). +# Reading such files triggers downloading their content. stat.SF_DATALESS is only available +# from Python 3.13 on, thus we fall back to the value from macOS' sys/stat.h. +SF_DATALESS = getattr(stat, "SF_DATALESS", 0x40000000) + has_link = hasattr(os, "link") diff --git a/src/borg/archiver/create_cmd.py b/src/borg/archiver/create_cmd.py index 9bf4d4f68..a2087ff1a 100644 --- a/src/borg/archiver/create_cmd.py +++ b/src/borg/archiver/create_cmd.py @@ -10,7 +10,7 @@ from io import TextIOWrapper from ._common import with_repository, Highlander from .. import helpers -from ..archive import Archive, is_special +from ..archive import Archive, is_special, SF_DATALESS from ..archive import BackupError, BackupOSError, BackupItemExcluded, backup_io, OsOpen, stat_update_check from ..archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor from ..cache import Cache @@ -32,7 +32,7 @@ from ..helpers import Error, CommandError, BackupWarning, FileChangedWarning from ..helpers.argparsing import ArgumentParser from ..manifest import Manifest from ..patterns import PatternMatcher -from ..platform import is_win32 +from ..platform import is_win32, get_flags from ..logger import create_logger @@ -225,6 +225,7 @@ class CreateMixIn: self.noflags = args.noflags self.noacls = args.noacls self.noxattrs = args.noxattrs + self.exclude_dataless = args.exclude_dataless dry_run = args.dry_run self.start_backup = time.time_ns() t0 = archive_ts_now() @@ -476,6 +477,15 @@ class CreateMixIn: # directory of the mounted filesystem that shadows the mountpoint dir). recurse = restrict_dev is None or st.st_dev == restrict_dev + if self.exclude_dataless: + # this needs to be done BEFORE opening the file, as opening + # would otherwise materialize the file contents. + with backup_io("flags"): + flags = get_flags(path=path, st=st) + if flags & SF_DATALESS: + self.print_file_status("x", path) + return + if not stat.S_ISDIR(st.st_mode): # directories cannot go in this branch because they can be excluded based on tag # files they might contain @@ -886,7 +896,14 @@ class CreateMixIn: help="set path delimiter for ``--paths-from-stdin`` and ``--paths-from-command`` (default: ``\\n``) ", ) - define_exclusion_group(subparser, tag_files=True) + exclude_group = define_exclusion_group(subparser, tag_files=True) + exclude_group.add_argument( + "--exclude-dataless", + dest="exclude_dataless", + action="store_true", + help="exclude files flagged DATALESS (macOS: placeholder files whose content " + "is not materialized locally, e.g. not-downloaded cloud storage files)", + ) fs_group = subparser.add_argument_group("Filesystem options") fs_group.add_argument( diff --git a/src/borg/testsuite/archiver/create_cmd_test.py b/src/borg/testsuite/archiver/create_cmd_test.py index 07db6ee51..a65d9f999 100644 --- a/src/borg/testsuite/archiver/create_cmd_test.py +++ b/src/borg/testsuite/archiver/create_cmd_test.py @@ -1172,6 +1172,33 @@ def test_create_with_compression_algorithms(archivers, request): assert_dirs_equal(archiver.input_path, os.path.join(extract_path, "input")) +def test_create_exclude_dataless(archivers, request, monkeypatch): + """Files flagged SF_DATALESS are excluded with --exclude-dataless.""" + from ...archive import SF_DATALESS + import borg.archiver.create_cmd as create_cmd_module + + archiver = request.getfixturevalue(archivers) + if archiver.EXE: + pytest.skip("Skipping binary test due to patch objects") + create_regular_file(archiver.input_path, "file1", size=1024 * 80) + create_regular_file(archiver.input_path, "cloudfile", size=1024 * 80) + + # SF_DATALESS cannot be set from userspace, so fake the flags lookup. + def fake_get_flags(path, st, fd=None): + return SF_DATALESS if path.endswith("cloudfile") else 0 + + cmd(archiver, "repo-create", RK_ENCRYPTION) + + monkeypatch.setattr(create_cmd_module, "get_flags", fake_get_flags) + output = cmd(archiver, "create", "--list", "--exclude-dataless", "test", "input") + assert "A input/file1" in output + assert "x input/cloudfile" in output + + # without --exclude-dataless, the file is backed up + output = cmd(archiver, "create", "--list", "test2", "input") + assert "A input/cloudfile" in output + + def test_exclude_nodump_dir_with_file(archivers, request): """A directory flagged NODUMP and its contents must not be archived.""" archiver = request.getfixturevalue(archivers)