Merge pull request #9392 from mr-raj12/fix-syncfile-seek-tell
Some checks are pending
Lint / lint (push) Waiting to run
CI / lint (push) Waiting to run
CI / security (push) Waiting to run
CI / asan_ubsan (push) Blocked by required conditions
CI / native_tests (push) Blocked by required conditions
CI / vm_tests (Haiku, false, haiku, r1beta5) (push) Blocked by required conditions
CI / vm_tests (NetBSD, false, netbsd, 10.1) (push) Blocked by required conditions
CI / vm_tests (OmniOS, false, omnios, r151056) (push) Blocked by required conditions
CI / vm_tests (OpenBSD, false, openbsd, 7.7) (push) Blocked by required conditions
CI / vm_tests (borg-freebsd-14-x86_64-gh, FreeBSD, true, freebsd, 14.3) (push) Blocked by required conditions
CI / windows_tests (push) Blocked by required conditions
CodeQL / Analyze (push) Waiting to run

cache: add seek()/tell() to SyncFile, use SaveFile in _write_files_cache, fixes #9390
This commit is contained in:
TW 2026-02-28 11:43:01 +01:00 committed by GitHub
commit f3ac2e0d54
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 88 additions and 29 deletions

View file

@ -582,36 +582,39 @@ class FilesCacheMixin:
discard_after = min(newest_cmtime, start_backup_time)
ttl = int(os.environ.get("BORG_FILES_CACHE_TTL", 2))
files_cache_logger.debug("FILES-CACHE-SAVE: starting...")
# TODO: use something like SaveFile here, but that didn't work due to SyncFile missing .seek().
with IntegrityCheckedFile(path=str(self.path / self.files_cache_name()), write=True) as fd:
entries = 0
age_discarded = 0
race_discarded = 0
for path_hash, entry in files.items():
entry = self.decompress_entry(entry)
if entry.age == 0: # current entries
if max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < discard_after:
# Only keep files seen in this backup that old enough not to suffer race conditions relating
# to filesystem snapshots and ctime/mtime granularity or being modified while we read them.
keep = True
else:
keep = False
race_discarded += 1
else: # old entries
if entry.age < ttl:
# Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
keep = True
else:
keep = False
age_discarded += 1
if keep:
msgpack.pack((path_hash, entry), fd)
entries += 1
cache_path = str(self.path / self.files_cache_name())
with SaveFile(cache_path, binary=True) as sync_file:
with IntegrityCheckedFile(path=cache_path, write=True, override_fd=sync_file) as fd:
entries = 0
age_discarded = 0
race_discarded = 0
for path_hash, entry in files.items():
entry = self.decompress_entry(entry)
if entry.age == 0: # current entries
if max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < discard_after:
# Only keep files seen in this backup that old enough not to suffer race conditions
# relating to filesystem snapshots and ctime/mtime granularity or being modified
# while we read them.
keep = True
else:
keep = False
race_discarded += 1
else: # old entries
if entry.age < ttl:
# Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
keep = True
else:
keep = False
age_discarded += 1
if keep:
msgpack.pack((path_hash, entry), fd)
entries += 1
integrity_data = fd.integrity_data
files_cache_logger.debug(f"FILES-CACHE-KILL: removed {age_discarded} entries with age >= TTL [{ttl}]")
t_str = datetime.fromtimestamp(discard_after / 1e9, timezone.utc).isoformat()
files_cache_logger.debug(f"FILES-CACHE-KILL: removed {race_discarded} entries with ctime/mtime >= {t_str}")
files_cache_logger.debug(f"FILES-CACHE-SAVE: finished, {entries} remaining entries saved.")
return fd.integrity_data
return integrity_data
def file_known_and_unchanged(self, hashed_path, path_hash, st):
"""

View file

@ -1,4 +1,5 @@
import errno
import io
import os
import socket
import unicodedata
@ -163,7 +164,7 @@ class SyncFile:
that corresponds to path (like from os.open(path, ...) or os.mkstemp(...))
:param binary: whether to open in binary mode, default is False.
"""
mode = "xb" if binary else "x" # x -> raise FileExists exception in open() if file exists already
mode = "x+b" if binary else "x+" # x -> raise FileExists exception in open() if file exists already
self.path = path
if fd is None:
self.f = open(str(path), mode=mode) # Python file object
@ -180,6 +181,15 @@ class SyncFile:
def write(self, data):
self.f.write(data)
def read(self, *args, **kwargs):
return self.f.read(*args, **kwargs)
def seek(self, offset, whence=io.SEEK_SET):
return self.f.seek(offset, whence)
def tell(self):
return self.f.tell()
def sync(self):
"""
Synchronize file contents. Everything written prior to sync() must become durable before anything written
@ -195,6 +205,8 @@ class SyncFile:
def close(self):
"""sync() and close."""
if self.f.closed:
return
from .. import platform
dirname = None

View file

@ -1,6 +1,7 @@
import pytest
from ...crypto.file_integrity import DetachedIntegrityCheckedFile, FileIntegrityError
from ...crypto.file_integrity import DetachedIntegrityCheckedFile, FileIntegrityError, IntegrityCheckedFile
from ...platform import SyncFile
class TestReadIntegrityFile:
@ -130,3 +131,19 @@ class TestDetachedIntegrityCheckedFileParts:
if not partial_read:
fd.read()
# But overall it explodes with the final digest. Neat, eh?
class TestIntegrityCheckedFileWithSyncFile:
def test_write_and_verify_with_syncfile(self, tmp_path):
"""IntegrityCheckedFile works correctly with SyncFile as override_fd."""
path = str(tmp_path / "testfile")
with SyncFile(path, binary=True) as sf:
with IntegrityCheckedFile(path=path, write=True, override_fd=sf) as fd:
fd.write(b"test data for integrity check")
integrity_data = fd.integrity_data
assert integrity_data is not None
# verify the written data can be read back with integrity check
with IntegrityCheckedFile(path=path, write=False, integrity_data=integrity_data) as fd:
assert fd.read() == b"test data for integrity check"

View file

@ -1,4 +1,6 @@
from ...platform import swidth
import io
from ...platform import swidth, SyncFile
def test_swidth_ascii():
@ -11,3 +13,28 @@ def test_swidth_cjk():
def test_swidth_mixed():
assert swidth("borgバックアップ") == 4 + 6 * 2
def test_syncfile_seek_tell(tmp_path):
"""SyncFile exposes seek() and tell() from the underlying file object."""
path = tmp_path / "testfile"
with SyncFile(path, binary=True) as sf:
sf.write(b"hello world")
assert sf.tell() == 11
sf.seek(0, io.SEEK_SET)
assert sf.tell() == 0
sf.seek(0, io.SEEK_END)
assert sf.tell() == 11
sf.seek(5, io.SEEK_SET)
assert sf.tell() == 5
assert sf.read() == b" world"
assert path.read_bytes() == b"hello world"
def test_syncfile_close_idempotent(tmp_path):
"""Calling SyncFile.close() twice does not raise."""
path = tmp_path / "testfile"
sf = SyncFile(path, binary=True)
sf.write(b"data")
sf.close()
sf.close() # must not raise