Merge pull request #9756 from ThomasWaldmann/exclude-dataless-master

create: add --exclude-dataless to skip cloud files not materialized locally
This commit is contained in:
TW 2026-06-11 09:39:23 +02:00 committed by GitHub
commit 23d5152840
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 53 additions and 3 deletions

View file

@ -122,6 +122,7 @@ complete -c borg -f -l 'exclude-caches' -d 'Exclude directories tagg
complete -c borg -l 'exclude-if-present' -d 'Exclude directories that contain FILENAME' -n "__fish_seen_subcommand_from create"
complete -c borg -f -l 'keep-exclude-tags' -d 'Keep tag files of excluded directories' -n "__fish_seen_subcommand_from create"
complete -c borg -f -l 'exclude-nodump' -d 'Exclude files flagged NODUMP' -n "__fish_seen_subcommand_from create"
complete -c borg -f -l 'exclude-dataless' -d 'Exclude files flagged DATALESS (macOS)' -n "__fish_seen_subcommand_from create"
# Filesystem options
complete -c borg -f -s x -l 'one-file-system' -d 'Stay in the same file system' -n "__fish_seen_subcommand_from create"
complete -c borg -f -l 'numeric-ids' -d 'Only store numeric user:group identifiers' -n "__fish_seen_subcommand_from create"

View file

@ -53,6 +53,11 @@ from .platform import acl_get, acl_set, set_flags, get_flags, swidth
from .repository import Repository, NoManifestError
from .repoobj import RepoObj
# macOS: SF_DATALESS marks dataless placeholder files (e.g. cloud files not materialized locally).
# Reading such files triggers downloading their content. stat.SF_DATALESS is only available
# from Python 3.13 on, thus we fall back to the value from macOS' sys/stat.h.
SF_DATALESS = getattr(stat, "SF_DATALESS", 0x40000000)
has_link = hasattr(os, "link")

View file

@ -10,7 +10,7 @@ from io import TextIOWrapper
from ._common import with_repository, Highlander
from .. import helpers
from ..archive import Archive, is_special
from ..archive import Archive, is_special, SF_DATALESS
from ..archive import BackupError, BackupOSError, BackupItemExcluded, backup_io, OsOpen, stat_update_check
from ..archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor
from ..cache import Cache
@ -32,7 +32,7 @@ from ..helpers import Error, CommandError, BackupWarning, FileChangedWarning
from ..helpers.argparsing import ArgumentParser
from ..manifest import Manifest
from ..patterns import PatternMatcher
from ..platform import is_win32
from ..platform import is_win32, get_flags
from ..logger import create_logger
@ -225,6 +225,7 @@ class CreateMixIn:
self.noflags = args.noflags
self.noacls = args.noacls
self.noxattrs = args.noxattrs
self.exclude_dataless = args.exclude_dataless
dry_run = args.dry_run
self.start_backup = time.time_ns()
t0 = archive_ts_now()
@ -476,6 +477,15 @@ class CreateMixIn:
# directory of the mounted filesystem that shadows the mountpoint dir).
recurse = restrict_dev is None or st.st_dev == restrict_dev
if self.exclude_dataless:
# this needs to be done BEFORE opening the file, as opening
# would otherwise materialize the file contents.
with backup_io("flags"):
flags = get_flags(path=path, st=st)
if flags & SF_DATALESS:
self.print_file_status("x", path)
return
if not stat.S_ISDIR(st.st_mode):
# directories cannot go in this branch because they can be excluded based on tag
# files they might contain
@ -886,7 +896,14 @@ class CreateMixIn:
help="set path delimiter for ``--paths-from-stdin`` and ``--paths-from-command`` (default: ``\\n``) ",
)
define_exclusion_group(subparser, tag_files=True)
exclude_group = define_exclusion_group(subparser, tag_files=True)
exclude_group.add_argument(
"--exclude-dataless",
dest="exclude_dataless",
action="store_true",
help="exclude files flagged DATALESS (macOS: placeholder files whose content "
"is not materialized locally, e.g. not-downloaded cloud storage files)",
)
fs_group = subparser.add_argument_group("Filesystem options")
fs_group.add_argument(

View file

@ -1172,6 +1172,33 @@ def test_create_with_compression_algorithms(archivers, request):
assert_dirs_equal(archiver.input_path, os.path.join(extract_path, "input"))
def test_create_exclude_dataless(archivers, request, monkeypatch):
"""Files flagged SF_DATALESS are excluded with --exclude-dataless."""
from ...archive import SF_DATALESS
import borg.archiver.create_cmd as create_cmd_module
archiver = request.getfixturevalue(archivers)
if archiver.EXE:
pytest.skip("Skipping binary test due to patch objects")
create_regular_file(archiver.input_path, "file1", size=1024 * 80)
create_regular_file(archiver.input_path, "cloudfile", size=1024 * 80)
# SF_DATALESS cannot be set from userspace, so fake the flags lookup.
def fake_get_flags(path, st, fd=None):
return SF_DATALESS if path.endswith("cloudfile") else 0
cmd(archiver, "repo-create", RK_ENCRYPTION)
monkeypatch.setattr(create_cmd_module, "get_flags", fake_get_flags)
output = cmd(archiver, "create", "--list", "--exclude-dataless", "test", "input")
assert "A input/file1" in output
assert "x input/cloudfile" in output
# without --exclude-dataless, the file is backed up
output = cmd(archiver, "create", "--list", "test2", "input")
assert "A input/cloudfile" in output
def test_exclude_nodump_dir_with_file(archivers, request):
"""A directory flagged NODUMP and its contents must not be archived."""
archiver = request.getfixturevalue(archivers)