extract, diff, ...: use raise_missing=False

preloading: always use raise_missing=False, because
the behaviour is defined at preloading time.

fetch_many: use get_many with raise_missing=False.
if get_many yields None instead of the expected chunk
cdata bytes, on-the-fly create an all-zero replacement
chunk of the correct size (if the size is known) and
emit an error msg about the missing chunk id / size.

note: for borg recreate with re-chunking this is a bit
unpretty, because it will transform a missing chunk into
a zero bytes range in the target file in the recreated
archive. it will emit an error message at recreate time,
but afterwards the recreated archive will not "know"
about the problem any more and will just have that
zero-patched file.
so guess borg recreate with re-chunking should better
only be used on repos that do not miss chunks.
This commit is contained in:
Thomas Waldmann 2025-03-01 21:51:28 +01:00
parent a9b2291281
commit 84fe9d2c67
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
3 changed files with 34 additions and 4 deletions

View file

@ -325,8 +325,14 @@ class DownloadPipeline:
sizes = [None] * len(ids)
else:
raise TypeError(f"unsupported or mixed element types: {chunks}")
for id, size, cdata in zip(ids, sizes, self.repository.get_many(ids, is_preloaded=is_preloaded)):
_, data = self.repo_objs.parse(id, cdata, ro_type=ro_type)
for id, size, cdata in zip(
ids, sizes, self.repository.get_many(ids, is_preloaded=is_preloaded, raise_missing=False)
):
if cdata is None:
logger.error(f"repository object {bin_to_hex(id)} missing, returning {size} zero bytes.")
data = zeros[:size] if size is not None else None
else:
_, data = self.repo_objs.parse(id, cdata, ro_type=ro_type)
assert size is None or len(data) == size
yield data

View file

@ -943,7 +943,9 @@ class RemoteRepository:
self.to_send.push_back(msgpack.packb({MSGID: self.msgid, MSG: cmd, ARGS: args}))
if not self.to_send and self.preload_ids:
chunk_id = self.preload_ids.pop(0)
args = {"id": chunk_id, "raise_missing": True}
# for preloading chunks, the raise_missing behaviour is defined HERE,
# not in the get_many / fetch_many call that later fetches the preloaded chunks.
args = {"id": chunk_id, "raise_missing": False}
self.msgid += 1
self.chunkid_to_msgids.setdefault(chunk_id, []).append(self.msgid)
self.to_send.push_back(msgpack.packb({MSGID: self.msgid, MSG: "get", ARGS: args}))

View file

@ -9,7 +9,7 @@ import pytest
from ... import xattr
from ...chunker import has_seek_hole
from ...constants import * # NOQA
from ...helpers import EXIT_WARNING, BackupPermissionError
from ...helpers import EXIT_WARNING, BackupPermissionError, bin_to_hex
from ...helpers import flags_noatime, flags_normal
from .. import changedir, same_ts_ns
from .. import are_symlinks_supported, are_hardlinks_supported, is_utime_fully_supported, is_birthtime_fully_supported
@ -24,6 +24,9 @@ from . import (
_extract_hardlinks_setup,
assert_creates_file,
generate_archiver_tests,
create_src_archive,
open_archive,
src_file,
)
pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA
@ -737,3 +740,22 @@ def test_dry_run_extraction_flags(archivers, request):
print(output)
assert not os.listdir("output"), "Output directory should be empty after dry-run"
def test_extract_file_with_missing_chunk(archivers, request):
archiver = request.getfixturevalue(archivers)
cmd(archiver, "repo-create", RK_ENCRYPTION)
create_src_archive(archiver, "archive")
# Get rid of a chunk
archive, repository = open_archive(archiver.repository_path, "archive")
with repository:
for item in archive.iter_items():
if item.path.endswith(src_file):
chunk = item.chunks[-1]
repository.delete(chunk.id)
break
else:
assert False # missed the file
output = cmd(archiver, "extract", "archive")
# TODO: this is a bit dirty still: no warning/error rc, no filename output for the damaged file.
assert f"repository object {bin_to_hex(chunk.id)} missing, returning {chunk.size} zero bytes." in output