From 84fe9d2c67b3469f8d0ee7ad968ee49e2def54ed Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 1 Mar 2025 21:51:28 +0100 Subject: [PATCH] extract, diff, ...: use raise_missing=False preloading: always use raise_missing=False, because the behaviour is defined at preloading time. fetch_many: use get_many with raise_missing=False. if get_many yields None instead of the expected chunk cdata bytes, on-the-fly create an all-zero replacement chunk of the correct size (if the size is known) and emit an error msg about the missing chunk id / size. note: for borg recreate with re-chunking this is a bit unpretty, because it will transform a missing chunk into a zero bytes range in the target file in the recreated archive. it will emit an error message at recreate time, but afterwards the recreated archive will not "know" about the problem any more and will just have that zero-patched file. so guess borg recreate with re-chunking should better only be used on repos that do not miss chunks. --- src/borg/archive.py | 10 ++++++-- src/borg/remote.py | 4 +++- .../testsuite/archiver/extract_cmd_test.py | 24 ++++++++++++++++++- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 0228568c1..da5b74410 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -325,8 +325,14 @@ class DownloadPipeline: sizes = [None] * len(ids) else: raise TypeError(f"unsupported or mixed element types: {chunks}") - for id, size, cdata in zip(ids, sizes, self.repository.get_many(ids, is_preloaded=is_preloaded)): - _, data = self.repo_objs.parse(id, cdata, ro_type=ro_type) + for id, size, cdata in zip( + ids, sizes, self.repository.get_many(ids, is_preloaded=is_preloaded, raise_missing=False) + ): + if cdata is None: + logger.error(f"repository object {bin_to_hex(id)} missing, returning {size} zero bytes.") + data = zeros[:size] if size is not None else None + else: + _, data = self.repo_objs.parse(id, cdata, ro_type=ro_type) assert size is None or len(data) == size yield data diff --git a/src/borg/remote.py b/src/borg/remote.py index 2d642fdb7..7c0871c02 100644 --- a/src/borg/remote.py +++ b/src/borg/remote.py @@ -943,7 +943,9 @@ class RemoteRepository: self.to_send.push_back(msgpack.packb({MSGID: self.msgid, MSG: cmd, ARGS: args})) if not self.to_send and self.preload_ids: chunk_id = self.preload_ids.pop(0) - args = {"id": chunk_id, "raise_missing": True} + # for preloading chunks, the raise_missing behaviour is defined HERE, + # not in the get_many / fetch_many call that later fetches the preloaded chunks. + args = {"id": chunk_id, "raise_missing": False} self.msgid += 1 self.chunkid_to_msgids.setdefault(chunk_id, []).append(self.msgid) self.to_send.push_back(msgpack.packb({MSGID: self.msgid, MSG: "get", ARGS: args})) diff --git a/src/borg/testsuite/archiver/extract_cmd_test.py b/src/borg/testsuite/archiver/extract_cmd_test.py index a6dd9632c..fc0226ed2 100644 --- a/src/borg/testsuite/archiver/extract_cmd_test.py +++ b/src/borg/testsuite/archiver/extract_cmd_test.py @@ -9,7 +9,7 @@ import pytest from ... import xattr from ...chunker import has_seek_hole from ...constants import * # NOQA -from ...helpers import EXIT_WARNING, BackupPermissionError +from ...helpers import EXIT_WARNING, BackupPermissionError, bin_to_hex from ...helpers import flags_noatime, flags_normal from .. import changedir, same_ts_ns from .. import are_symlinks_supported, are_hardlinks_supported, is_utime_fully_supported, is_birthtime_fully_supported @@ -24,6 +24,9 @@ from . import ( _extract_hardlinks_setup, assert_creates_file, generate_archiver_tests, + create_src_archive, + open_archive, + src_file, ) pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary") # NOQA @@ -737,3 +740,22 @@ def test_dry_run_extraction_flags(archivers, request): print(output) assert not os.listdir("output"), "Output directory should be empty after dry-run" + + +def test_extract_file_with_missing_chunk(archivers, request): + archiver = request.getfixturevalue(archivers) + cmd(archiver, "repo-create", RK_ENCRYPTION) + create_src_archive(archiver, "archive") + # Get rid of a chunk + archive, repository = open_archive(archiver.repository_path, "archive") + with repository: + for item in archive.iter_items(): + if item.path.endswith(src_file): + chunk = item.chunks[-1] + repository.delete(chunk.id) + break + else: + assert False # missed the file + output = cmd(archiver, "extract", "archive") + # TODO: this is a bit dirty still: no warning/error rc, no filename output for the damaged file. + assert f"repository object {bin_to_hex(chunk.id)} missing, returning {chunk.size} zero bytes." in output