Merge pull request #9200 from ThomasWaldmann/fix-9199-legacyremote-raise-missing
Some checks failed
Lint / lint (push) Has been cancelled
CI / lint (push) Has been cancelled
CI / security (push) Has been cancelled
CodeQL / Analyze (push) Has been cancelled
CI / asan_ubsan (push) Has been cancelled
CI / native_tests (push) Has been cancelled
CI / vm_tests (Haiku, false, haiku, r1beta5) (push) Has been cancelled
CI / vm_tests (NetBSD, false, netbsd, 10.1) (push) Has been cancelled
CI / vm_tests (OpenBSD, false, openbsd, 7.7) (push) Has been cancelled
CI / vm_tests (borg-freebsd-14-x86_64-gh, FreeBSD, true, freebsd, 14.3) (push) Has been cancelled
CI / windows_tests (push) Has been cancelled

transfer and legacyremote fixes
This commit is contained in:
TW 2025-12-04 01:59:31 +01:00 committed by GitHub
commit 231bf26552
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 52 additions and 12 deletions

View file

@ -20,7 +20,16 @@ logger = create_logger()
def transfer_chunks(
upgrader, other_repository, other_manifest, other_chunks, archive, cache, recompress, dry_run, chunker_params=None
upgrader,
other_repository,
other_manifest,
other_chunks,
archive,
cache,
manifest,
recompress,
dry_run,
chunker_params=None,
):
"""
Transfer chunks from another repository to the current repository.
@ -41,7 +50,7 @@ def transfer_chunks(
file = ChunkIteratorFileWrapper(chunk_iterator)
# Create a chunker with the specified parameters
chunker = get_chunker(*chunker_params, key=archive.key, sparse=False)
chunker = get_chunker(*chunker_params, key=manifest.key, sparse=False)
for chunk in chunker.chunkify(file):
if not dry_run:
chunk_id, data = cached_hash(chunk, archive.key.id_hash)
@ -226,6 +235,7 @@ class TransferMixIn:
other_chunks,
archive,
cache,
manifest,
args.recompress,
dry_run,
args.chunker_params,

View file

@ -664,11 +664,12 @@ class LegacyRemoteRepository:
def list(self, limit=None, marker=None):
"""actual remoting is done via self.call in the @api decorator"""
def get(self, id, read_data=True):
for resp in self.get_many([id], read_data=read_data):
def get(self, id, read_data=True, raise_missing=True):
for resp in self.get_many([id], read_data=read_data, raise_missing=raise_missing):
return resp
def get_many(self, ids, read_data=True, is_preloaded=False):
def get_many(self, ids, read_data=True, is_preloaded=False, raise_missing=True):
# note: legacy remote protocol does not support raise_missing parameter, so we ignore it here
yield from self.call_many("get", [{"id": id, "read_data": read_data} for id in ids], is_preloaded=is_preloaded)
@api(since=parse_version("1.0.0"))
@ -747,11 +748,11 @@ class RepositoryNoCache:
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def get(self, key, read_data=True):
return next(self.get_many([key], read_data=read_data, cache=False))
def get(self, key, read_data=True, raise_missing=True):
return next(self.get_many([key], read_data=read_data, raise_missing=raise_missing, cache=False))
def get_many(self, keys, read_data=True, cache=True):
for key, data in zip(keys, self.repository.get_many(keys, read_data=read_data)):
def get_many(self, keys, read_data=True, cache=True, raise_missing=True):
for key, data in zip(keys, self.repository.get_many(keys, read_data=read_data, raise_missing=raise_missing)):
yield self.transform(key, data)
def log_instrumentation(self):
@ -856,10 +857,12 @@ class RepositoryCache(RepositoryNoCache):
self.cache.clear()
shutil.rmtree(self.basedir)
def get_many(self, keys, read_data=True, cache=True):
def get_many(self, keys, read_data=True, cache=True, raise_missing=True):
# It could use different cache keys depending on read_data and cache full vs. meta-only chunks.
unknown_keys = [key for key in keys if self.prefixed_key(key, complete=read_data) not in self.cache]
repository_iterator = zip(unknown_keys, self.repository.get_many(unknown_keys, read_data=read_data))
repository_iterator = zip(
unknown_keys, self.repository.get_many(unknown_keys, read_data=read_data, raise_missing=raise_missing)
)
for key in keys:
pkey = self.prefixed_key(key, complete=read_data)
if pkey in self.cache:
@ -877,7 +880,7 @@ class RepositoryCache(RepositoryNoCache):
else:
# slow path: eviction during this get_many removed this key from the cache
t0 = time.perf_counter()
data = self.repository.get(key, read_data=read_data)
data = self.repository.get(key, read_data=read_data, raise_missing=raise_missing)
self.slow_lat += time.perf_counter() - t0
transformed = self.add_entry(key, data, cache, complete=read_data)
self.slow_misses += 1

View file

@ -473,6 +473,33 @@ def test_transfer_rechunk(archivers, request, monkeypatch):
assert dest_hash == source_file_hashes[item.path], f"Content hash mismatch for {item.path}"
def test_transfer_rechunk_dry_run(archivers, request, monkeypatch):
"""Ensure --dry-run works together with --chunker-params (re-chunking path).
This specifically guards against regressions like AttributeError when archive is None
during dry-run (see issue #9199).
"""
archiver = request.getfixturevalue(archivers)
BLKSIZE = 512
source_chunker_params = "buzhash,19,23,21,4095" # default-ish buzhash parameters
dest_chunker_params = f"fixed,{BLKSIZE}" # simple deterministic chunking
# Prepare source repo and create one archive
with setup_repos(archiver, monkeypatch) as other_repo1:
contents = random.randbytes(8 * BLKSIZE)
create_regular_file(archiver.input_path, "file.bin", contents=contents)
cmd(archiver, "create", f"--chunker-params={source_chunker_params}", "arch", "input")
# Now we are in the destination repo (setup_repos switched us on context exit).
# Run transfer in dry-run mode with re-chunking. This must not crash.
cmd(archiver, "transfer", other_repo1, "--dry-run", f"--chunker-params={dest_chunker_params}")
# Dry-run must not have created archives in the destination repo.
listing = cmd(archiver, "repo-list")
assert "arch" not in listing
def test_issue_9022(archivers, request, monkeypatch):
"""
Regression test for borgbackup/borg#9022: After "borg transfer --from-borg1",