Merge pull request #8849 from ThomasWaldmann/fd-based-dir-is-tagged

dir_is_tagged/_is_cachedir: add fd-based operations
This commit is contained in:
TW 2025-05-18 22:38:12 +02:00 committed by GitHub
commit b73af3642d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 169 additions and 24 deletions

View file

@ -484,7 +484,7 @@ class CreateMixIn:
with backup_io("fstat"):
st = stat_update_check(st, os.fstat(child_fd))
if recurse:
tag_names = dir_is_tagged(path, exclude_caches, exclude_if_present)
tag_names = dir_is_tagged(path, exclude_caches, exclude_if_present, dir_fd=child_fd)
if tag_names:
# if we are already recursing in an excluded dir, we do not need to do anything else than
# returning (we do not need to archive or recurse into tagged directories), see #3991:

View file

@ -178,40 +178,60 @@ def get_config_dir(*, legacy=False, create=True):
return config_dir
def dir_is_cachedir(path):
"""Determines whether the specified path is a cache directory (and
def dir_is_cachedir(path=None, dir_fd=None):
"""Determines whether the specified directory is a cache directory (and
therefore should potentially be excluded from the backup) according to
the CACHEDIR.TAG protocol
(http://www.bford.info/cachedir/spec.html).
the CACHEDIR.TAG protocol (http://www.bford.info/cachedir/spec.html).
If dir_fd is provided, operations will be based on the directory file descriptor.
Otherwise (path is provided), operations will be based on the directory path.
"""
tag_path = os.path.join(path, CACHE_TAG_NAME)
tag_fd = None
try:
if os.path.exists(tag_path):
with open(tag_path, "rb") as tag_file:
tag_data = tag_file.read(len(CACHE_TAG_CONTENTS))
if tag_data == CACHE_TAG_CONTENTS:
return True
except OSError:
pass
return False
if dir_fd is not None:
tag_fd = os.open(CACHE_TAG_NAME, os.O_RDONLY, dir_fd=dir_fd)
else:
tag_fd = os.open(os.path.join(path, CACHE_TAG_NAME), os.O_RDONLY)
return os.read(tag_fd, len(CACHE_TAG_CONTENTS)) == CACHE_TAG_CONTENTS
except (FileNotFoundError, OSError):
return False
finally:
if tag_fd is not None:
os.close(tag_fd)
def dir_is_tagged(path, exclude_caches, exclude_if_present):
def dir_is_tagged(path=None, exclude_caches=None, exclude_if_present=None, dir_fd=None):
"""Determines whether the specified path is excluded by being a cache
directory or containing user-specified tag files/directories. Returns a
list of the names of the tag files/directories (either CACHEDIR.TAG or the
matching user-specified files/directories).
If dir_fd is provided, operations will be based on the directory file descriptor.
Otherwise (path is provided), operations will be based on the directory path.
"""
# TODO: do operations based on the directory fd
tag_names = []
if exclude_caches and dir_is_cachedir(path):
tag_names.append(CACHE_TAG_NAME)
if exclude_if_present is not None:
for tag in exclude_if_present:
tag_path = os.path.join(path, tag)
if os.path.exists(tag_path):
tag_names.append(tag)
if dir_fd is not None:
# Use file descriptor-based operations
if exclude_caches and dir_is_cachedir(dir_fd=dir_fd):
tag_names.append(CACHE_TAG_NAME)
if exclude_if_present is not None:
for tag in exclude_if_present:
try:
os.stat(tag, dir_fd=dir_fd)
tag_names.append(tag)
except FileNotFoundError:
pass
else:
# Use path-based operations (for backward compatibility)
if exclude_caches and dir_is_cachedir(path=path):
tag_names.append(CACHE_TAG_NAME)
if exclude_if_present is not None:
for tag in exclude_if_present:
tag_path = os.path.join(path, tag)
if os.path.exists(tag_path):
tag_names.append(tag)
return tag_names

View file

@ -6,6 +6,7 @@ import os
import shutil
import sys
from argparse import ArgumentTypeError
from contextlib import contextmanager
from datetime import datetime, timezone, timedelta
from io import StringIO, BytesIO
@ -14,6 +15,8 @@ import pytest
from ..archiver.prune_cmd import prune_within, prune_split
from .. import platform
from ..constants import * # NOQA
from ..constants import CACHE_TAG_NAME, CACHE_TAG_CONTENTS
from ..helpers.fs import dir_is_tagged
from ..helpers import Location
from ..helpers import Buffer
from ..helpers import (
@ -1519,3 +1522,125 @@ def test_ec_invalid():
)
def test_max_ec(ec1, ec2, ec_max):
assert max_ec(ec1, ec2) == ec_max
def test_dir_is_tagged(tmpdir):
"""Test dir_is_tagged with both path-based and file descriptor-based operations."""
@contextmanager
def open_dir(path):
fd = os.open(path, os.O_RDONLY)
try:
yield fd
finally:
os.close(fd)
# Create directories for testing exclude_caches
cache_dir = tmpdir.mkdir("cache_dir")
cache_tag_path = cache_dir.join(CACHE_TAG_NAME)
cache_tag_path.write_binary(CACHE_TAG_CONTENTS)
invalid_cache_dir = tmpdir.mkdir("invalid_cache_dir")
invalid_cache_tag_path = invalid_cache_dir.join(CACHE_TAG_NAME)
invalid_cache_tag_path.write_binary(b"invalid signature")
# Create directories for testing exclude_if_present
tagged_dir = tmpdir.mkdir("tagged_dir")
tag_file = tagged_dir.join(".NOBACKUP")
tag_file.write("test")
other_tagged_dir = tmpdir.mkdir("other_tagged_dir")
other_tag_file = other_tagged_dir.join(".DONOTBACKUP")
other_tag_file.write("test")
# Create a directory with both a CACHEDIR.TAG and a custom tag file
both_dir = tmpdir.mkdir("both_dir")
cache_tag_path = both_dir.join(CACHE_TAG_NAME)
cache_tag_path.write_binary(CACHE_TAG_CONTENTS)
custom_tag_path = both_dir.join(".NOBACKUP")
custom_tag_path.write("test")
# Create a directory without any tag files
normal_dir = tmpdir.mkdir("normal_dir")
# Test edge cases
test_dir = tmpdir.mkdir("test_dir")
assert dir_is_tagged(path=str(test_dir), exclude_caches=None, exclude_if_present=None) == []
assert dir_is_tagged(path=str(test_dir), exclude_if_present=[]) == []
# Test with non-existent directory (should not raise an exception)
non_existent_dir = str(tmpdir.join("non_existent"))
result = dir_is_tagged(path=non_existent_dir, exclude_caches=True, exclude_if_present=[".NOBACKUP"])
assert result == []
# Test 1: exclude_caches with path-based operations
assert dir_is_tagged(path=str(cache_dir), exclude_caches=True) == [CACHE_TAG_NAME]
assert dir_is_tagged(path=str(invalid_cache_dir), exclude_caches=True) == []
assert dir_is_tagged(path=str(normal_dir), exclude_caches=True) == []
assert dir_is_tagged(path=str(cache_dir), exclude_caches=False) == []
assert dir_is_tagged(path=str(invalid_cache_dir), exclude_caches=False) == []
assert dir_is_tagged(path=str(normal_dir), exclude_caches=False) == []
# Test 2: exclude_caches with file-descriptor-based operations
with open_dir(str(cache_dir)) as fd:
assert dir_is_tagged(dir_fd=fd, exclude_caches=True) == [CACHE_TAG_NAME]
with open_dir(str(invalid_cache_dir)) as fd:
assert dir_is_tagged(dir_fd=fd, exclude_caches=True) == []
with open_dir(str(normal_dir)) as fd:
assert dir_is_tagged(dir_fd=fd, exclude_caches=True) == []
with open_dir(str(cache_dir)) as fd:
assert dir_is_tagged(dir_fd=fd, exclude_caches=False) == []
with open_dir(str(invalid_cache_dir)) as fd:
assert dir_is_tagged(dir_fd=fd, exclude_caches=False) == []
with open_dir(str(normal_dir)) as fd:
assert dir_is_tagged(dir_fd=fd, exclude_caches=False) == []
# Test 3: exclude_if_present with path-based operations
tags = [".NOBACKUP"]
assert dir_is_tagged(path=str(tagged_dir), exclude_if_present=tags) == [".NOBACKUP"]
assert dir_is_tagged(path=str(other_tagged_dir), exclude_if_present=tags) == []
assert dir_is_tagged(path=str(normal_dir), exclude_if_present=tags) == []
tags = [".NOBACKUP", ".DONOTBACKUP"]
assert dir_is_tagged(path=str(tagged_dir), exclude_if_present=tags) == [".NOBACKUP"]
assert dir_is_tagged(path=str(other_tagged_dir), exclude_if_present=tags) == [".DONOTBACKUP"]
assert dir_is_tagged(path=str(normal_dir), exclude_if_present=tags) == []
# Test 4: exclude_if_present with file descriptor-based operations
tags = [".NOBACKUP"]
with open_dir(str(tagged_dir)) as fd:
assert dir_is_tagged(dir_fd=fd, exclude_if_present=tags) == [".NOBACKUP"]
with open_dir(str(other_tagged_dir)) as fd:
assert dir_is_tagged(dir_fd=fd, exclude_if_present=tags) == []
with open_dir(str(normal_dir)) as fd:
assert dir_is_tagged(dir_fd=fd, exclude_if_present=tags) == []
tags = [".NOBACKUP", ".DONOTBACKUP"]
with open_dir(str(tagged_dir)) as fd:
assert dir_is_tagged(dir_fd=fd, exclude_if_present=tags) == [".NOBACKUP"]
with open_dir(str(other_tagged_dir)) as fd:
assert dir_is_tagged(dir_fd=fd, exclude_if_present=tags) == [".DONOTBACKUP"]
with open_dir(str(normal_dir)) as fd:
assert dir_is_tagged(dir_fd=fd, exclude_if_present=tags) == []
# Test 5: both exclude types with path-based operations
assert sorted(dir_is_tagged(path=str(both_dir), exclude_caches=True, exclude_if_present=[".NOBACKUP"])) == [
".NOBACKUP",
CACHE_TAG_NAME,
]
assert dir_is_tagged(path=str(cache_dir), exclude_caches=True, exclude_if_present=[".NOBACKUP"]) == [CACHE_TAG_NAME]
assert dir_is_tagged(path=str(tagged_dir), exclude_caches=True, exclude_if_present=[".NOBACKUP"]) == [".NOBACKUP"]
assert dir_is_tagged(path=str(normal_dir), exclude_caches=True, exclude_if_present=[".NOBACKUP"]) == []
# Test 6: both exclude types with file descriptor-based operations
with open_dir(str(both_dir)) as fd:
result = dir_is_tagged(dir_fd=fd, exclude_caches=True, exclude_if_present=[".NOBACKUP"])
assert sorted(result) == [".NOBACKUP", CACHE_TAG_NAME]
with open_dir(str(cache_dir)) as fd:
assert dir_is_tagged(dir_fd=fd, exclude_caches=True, exclude_if_present=[".NOBACKUP"]) == [CACHE_TAG_NAME]
with open_dir(str(tagged_dir)) as fd:
assert dir_is_tagged(dir_fd=fd, exclude_caches=True, exclude_if_present=[".NOBACKUP"]) == [".NOBACKUP"]
with open_dir(str(normal_dir)) as fd:
assert dir_is_tagged(dir_fd=fd, exclude_caches=True, exclude_if_present=[".NOBACKUP"]) == []