mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-10 09:21:44 -04:00
Merge pull request #9392 from mr-raj12/fix-syncfile-seek-tell
Some checks are pending
Lint / lint (push) Waiting to run
CI / lint (push) Waiting to run
CI / security (push) Waiting to run
CI / asan_ubsan (push) Blocked by required conditions
CI / native_tests (push) Blocked by required conditions
CI / vm_tests (Haiku, false, haiku, r1beta5) (push) Blocked by required conditions
CI / vm_tests (NetBSD, false, netbsd, 10.1) (push) Blocked by required conditions
CI / vm_tests (OmniOS, false, omnios, r151056) (push) Blocked by required conditions
CI / vm_tests (OpenBSD, false, openbsd, 7.7) (push) Blocked by required conditions
CI / vm_tests (borg-freebsd-14-x86_64-gh, FreeBSD, true, freebsd, 14.3) (push) Blocked by required conditions
CI / windows_tests (push) Blocked by required conditions
CodeQL / Analyze (push) Waiting to run
Some checks are pending
Lint / lint (push) Waiting to run
CI / lint (push) Waiting to run
CI / security (push) Waiting to run
CI / asan_ubsan (push) Blocked by required conditions
CI / native_tests (push) Blocked by required conditions
CI / vm_tests (Haiku, false, haiku, r1beta5) (push) Blocked by required conditions
CI / vm_tests (NetBSD, false, netbsd, 10.1) (push) Blocked by required conditions
CI / vm_tests (OmniOS, false, omnios, r151056) (push) Blocked by required conditions
CI / vm_tests (OpenBSD, false, openbsd, 7.7) (push) Blocked by required conditions
CI / vm_tests (borg-freebsd-14-x86_64-gh, FreeBSD, true, freebsd, 14.3) (push) Blocked by required conditions
CI / windows_tests (push) Blocked by required conditions
CodeQL / Analyze (push) Waiting to run
cache: add seek()/tell() to SyncFile, use SaveFile in _write_files_cache, fixes #9390
This commit is contained in:
commit
f3ac2e0d54
4 changed files with 88 additions and 29 deletions
|
|
@ -582,36 +582,39 @@ class FilesCacheMixin:
|
|||
discard_after = min(newest_cmtime, start_backup_time)
|
||||
ttl = int(os.environ.get("BORG_FILES_CACHE_TTL", 2))
|
||||
files_cache_logger.debug("FILES-CACHE-SAVE: starting...")
|
||||
# TODO: use something like SaveFile here, but that didn't work due to SyncFile missing .seek().
|
||||
with IntegrityCheckedFile(path=str(self.path / self.files_cache_name()), write=True) as fd:
|
||||
entries = 0
|
||||
age_discarded = 0
|
||||
race_discarded = 0
|
||||
for path_hash, entry in files.items():
|
||||
entry = self.decompress_entry(entry)
|
||||
if entry.age == 0: # current entries
|
||||
if max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < discard_after:
|
||||
# Only keep files seen in this backup that old enough not to suffer race conditions relating
|
||||
# to filesystem snapshots and ctime/mtime granularity or being modified while we read them.
|
||||
keep = True
|
||||
else:
|
||||
keep = False
|
||||
race_discarded += 1
|
||||
else: # old entries
|
||||
if entry.age < ttl:
|
||||
# Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
|
||||
keep = True
|
||||
else:
|
||||
keep = False
|
||||
age_discarded += 1
|
||||
if keep:
|
||||
msgpack.pack((path_hash, entry), fd)
|
||||
entries += 1
|
||||
cache_path = str(self.path / self.files_cache_name())
|
||||
with SaveFile(cache_path, binary=True) as sync_file:
|
||||
with IntegrityCheckedFile(path=cache_path, write=True, override_fd=sync_file) as fd:
|
||||
entries = 0
|
||||
age_discarded = 0
|
||||
race_discarded = 0
|
||||
for path_hash, entry in files.items():
|
||||
entry = self.decompress_entry(entry)
|
||||
if entry.age == 0: # current entries
|
||||
if max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < discard_after:
|
||||
# Only keep files seen in this backup that old enough not to suffer race conditions
|
||||
# relating to filesystem snapshots and ctime/mtime granularity or being modified
|
||||
# while we read them.
|
||||
keep = True
|
||||
else:
|
||||
keep = False
|
||||
race_discarded += 1
|
||||
else: # old entries
|
||||
if entry.age < ttl:
|
||||
# Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
|
||||
keep = True
|
||||
else:
|
||||
keep = False
|
||||
age_discarded += 1
|
||||
if keep:
|
||||
msgpack.pack((path_hash, entry), fd)
|
||||
entries += 1
|
||||
integrity_data = fd.integrity_data
|
||||
files_cache_logger.debug(f"FILES-CACHE-KILL: removed {age_discarded} entries with age >= TTL [{ttl}]")
|
||||
t_str = datetime.fromtimestamp(discard_after / 1e9, timezone.utc).isoformat()
|
||||
files_cache_logger.debug(f"FILES-CACHE-KILL: removed {race_discarded} entries with ctime/mtime >= {t_str}")
|
||||
files_cache_logger.debug(f"FILES-CACHE-SAVE: finished, {entries} remaining entries saved.")
|
||||
return fd.integrity_data
|
||||
return integrity_data
|
||||
|
||||
def file_known_and_unchanged(self, hashed_path, path_hash, st):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import errno
|
||||
import io
|
||||
import os
|
||||
import socket
|
||||
import unicodedata
|
||||
|
|
@ -163,7 +164,7 @@ class SyncFile:
|
|||
that corresponds to path (like from os.open(path, ...) or os.mkstemp(...))
|
||||
:param binary: whether to open in binary mode, default is False.
|
||||
"""
|
||||
mode = "xb" if binary else "x" # x -> raise FileExists exception in open() if file exists already
|
||||
mode = "x+b" if binary else "x+" # x -> raise FileExists exception in open() if file exists already
|
||||
self.path = path
|
||||
if fd is None:
|
||||
self.f = open(str(path), mode=mode) # Python file object
|
||||
|
|
@ -180,6 +181,15 @@ class SyncFile:
|
|||
def write(self, data):
|
||||
self.f.write(data)
|
||||
|
||||
def read(self, *args, **kwargs):
|
||||
return self.f.read(*args, **kwargs)
|
||||
|
||||
def seek(self, offset, whence=io.SEEK_SET):
|
||||
return self.f.seek(offset, whence)
|
||||
|
||||
def tell(self):
|
||||
return self.f.tell()
|
||||
|
||||
def sync(self):
|
||||
"""
|
||||
Synchronize file contents. Everything written prior to sync() must become durable before anything written
|
||||
|
|
@ -195,6 +205,8 @@ class SyncFile:
|
|||
|
||||
def close(self):
|
||||
"""sync() and close."""
|
||||
if self.f.closed:
|
||||
return
|
||||
from .. import platform
|
||||
|
||||
dirname = None
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import pytest
|
||||
|
||||
from ...crypto.file_integrity import DetachedIntegrityCheckedFile, FileIntegrityError
|
||||
from ...crypto.file_integrity import DetachedIntegrityCheckedFile, FileIntegrityError, IntegrityCheckedFile
|
||||
from ...platform import SyncFile
|
||||
|
||||
|
||||
class TestReadIntegrityFile:
|
||||
|
|
@ -130,3 +131,19 @@ class TestDetachedIntegrityCheckedFileParts:
|
|||
if not partial_read:
|
||||
fd.read()
|
||||
# But overall it explodes with the final digest. Neat, eh?
|
||||
|
||||
|
||||
class TestIntegrityCheckedFileWithSyncFile:
|
||||
def test_write_and_verify_with_syncfile(self, tmp_path):
|
||||
"""IntegrityCheckedFile works correctly with SyncFile as override_fd."""
|
||||
path = str(tmp_path / "testfile")
|
||||
with SyncFile(path, binary=True) as sf:
|
||||
with IntegrityCheckedFile(path=path, write=True, override_fd=sf) as fd:
|
||||
fd.write(b"test data for integrity check")
|
||||
integrity_data = fd.integrity_data
|
||||
|
||||
assert integrity_data is not None
|
||||
|
||||
# verify the written data can be read back with integrity check
|
||||
with IntegrityCheckedFile(path=path, write=False, integrity_data=integrity_data) as fd:
|
||||
assert fd.read() == b"test data for integrity check"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
from ...platform import swidth
|
||||
import io
|
||||
|
||||
from ...platform import swidth, SyncFile
|
||||
|
||||
|
||||
def test_swidth_ascii():
|
||||
|
|
@ -11,3 +13,28 @@ def test_swidth_cjk():
|
|||
|
||||
def test_swidth_mixed():
|
||||
assert swidth("borgバックアップ") == 4 + 6 * 2
|
||||
|
||||
|
||||
def test_syncfile_seek_tell(tmp_path):
|
||||
"""SyncFile exposes seek() and tell() from the underlying file object."""
|
||||
path = tmp_path / "testfile"
|
||||
with SyncFile(path, binary=True) as sf:
|
||||
sf.write(b"hello world")
|
||||
assert sf.tell() == 11
|
||||
sf.seek(0, io.SEEK_SET)
|
||||
assert sf.tell() == 0
|
||||
sf.seek(0, io.SEEK_END)
|
||||
assert sf.tell() == 11
|
||||
sf.seek(5, io.SEEK_SET)
|
||||
assert sf.tell() == 5
|
||||
assert sf.read() == b" world"
|
||||
assert path.read_bytes() == b"hello world"
|
||||
|
||||
|
||||
def test_syncfile_close_idempotent(tmp_path):
|
||||
"""Calling SyncFile.close() twice does not raise."""
|
||||
path = tmp_path / "testfile"
|
||||
sf = SyncFile(path, binary=True)
|
||||
sf.write(b"data")
|
||||
sf.close()
|
||||
sf.close() # must not raise
|
||||
|
|
|
|||
Loading…
Reference in a new issue