From bb5cf96fe8a469ec3fbfe4c2184d587b86a8fe1f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 14 Sep 2024 23:57:48 +0200 Subject: [PATCH] check: fix/enhance code, rewrite test - we should always output name and id when talking about an archive - no problem anymore if names in archives directory are "duplicate" - use "by-id" archives directory entry delete function - rewrite/simplify test for borg check --undelete-archives --- src/borg/archive.py | 40 +++++++++--------------- src/borg/testsuite/archiver/check_cmd.py | 36 ++++++--------------- 2 files changed, 25 insertions(+), 51 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 3acc944c2..77673ea92 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1818,23 +1818,13 @@ class ArchiveChecker: archive = self.key.unpack_archive(data) archive = ArchiveItem(internal_dict=archive) name = archive.name - logger.info(f"Found archive {name}, id {bin_to_hex(chunk_id)}.") - if self.manifest.archives.exists_name_and_id(name, chunk_id): + archive_id, archive_id_hex = chunk_id, bin_to_hex(chunk_id) + logger.info(f"Found archive {name} {archive_id_hex}.") + if self.manifest.archives.exists_name_and_id(name, archive_id): logger.info("We already have an archives directory entry for this.") - elif not self.manifest.archives.exists(name): - # no archives list entry yet and name is not taken yet, create an entry - logger.warning(f"Creating archives directory entry for {name}.") - self.manifest.archives.create(name, chunk_id, archive.time) else: - # we don't have an entry yet, but the name is taken by something else - i = 1 - while True: - new_name = "%s.%d" % (name, i) - if not self.manifest.archives.exists(new_name): - break - i += 1 - logger.warning(f"Creating archives directory entry using {new_name}.") - self.manifest.archives.create(new_name, chunk_id, archive.time) + logger.warning(f"Creating archives directory entry for {name} {archive_id_hex}.") + self.manifest.archives.create(name, archive_id, archive.time) pi.finish() logger.info("Rebuilding missing archives directory entries completed.") @@ -2046,28 +2036,28 @@ class ArchiveChecker: with cache_if_remote(self.repository) as repository: for i, info in enumerate(archive_infos): pi.show(i) - logger.info(f"Analyzing archive {info.name} ({i + 1}/{num_archives})") - archive_id = info.id + archive_id, archive_id_hex = info.id, bin_to_hex(info.id) + logger.info(f"Analyzing archive {info.name} {archive_id_hex} ({i + 1}/{num_archives})") if archive_id not in self.chunks: - logger.error("Archive metadata block %s is missing!", bin_to_hex(archive_id)) + logger.error(f"Archive metadata block {archive_id_hex} is missing!") self.error_found = True if self.repair: - logger.error(f"Deleting broken archive {info.name}.") - self.manifest.archives.delete(info.name) + logger.error(f"Deleting broken archive {info.name} {archive_id_hex}.") + self.manifest.archives.delete_by_id(archive_id) else: - logger.error(f"Would delete broken archive {info.name}.") + logger.error(f"Would delete broken archive {info.name} {archive_id_hex}.") continue cdata = self.repository.get(archive_id) try: _, data = self.repo_objs.parse(archive_id, cdata, ro_type=ROBJ_ARCHIVE_META) except IntegrityError as integrity_error: - logger.error("Archive metadata block %s is corrupted: %s", bin_to_hex(archive_id), integrity_error) + logger.error(f"Archive metadata block {archive_id_hex} is corrupted: {integrity_error}") self.error_found = True if self.repair: - logger.error(f"Deleting broken archive {info.name}.") - self.manifest.archives.delete(info.name) + logger.error(f"Deleting broken archive {info.name} {archive_id_hex}.") + self.manifest.archives.delete_by_id(archive_id) else: - logger.error(f"Would delete broken archive {info.name}.") + logger.error(f"Would delete broken archive {info.name} {archive_id_hex}.") continue archive = self.key.unpack_archive(data) archive = ArchiveItem(internal_dict=archive) diff --git a/src/borg/testsuite/archiver/check_cmd.py b/src/borg/testsuite/archiver/check_cmd.py index 623430b42..78742a01d 100644 --- a/src/borg/testsuite/archiver/check_cmd.py +++ b/src/borg/testsuite/archiver/check_cmd.py @@ -267,36 +267,20 @@ def test_manifest_rebuild_corrupted_chunk(archivers, request): cmd(archiver, "check", exit_code=0) -def test_manifest_rebuild_duplicate_archive(archivers, request): +def test_check_undelete_archives(archivers, request): archiver = request.getfixturevalue(archivers) - check_cmd_setup(archiver) - archive, repository = open_archive(archiver.repository_path, "archive1") - repo_objs = archive.repo_objs - with repository: - manifest = repository.get_manifest() - corrupted_manifest = manifest[:123] + b"corrupted!" + manifest[123:] - repository.put_manifest(corrupted_manifest) - archive_dict = { - "command_line": "", - "item_ptrs": [], - "hostname": "foo", - "username": "bar", - "name": "archive1", - "time": "2016-12-15T18:49:51.849711", - "version": 2, - } - archive = repo_objs.key.pack_metadata(archive_dict) - archive_id = repo_objs.id_hash(archive) - repository.put(archive_id, repo_objs.format(archive_id, {}, archive, ro_type=ROBJ_ARCHIVE_META)) - cmd(archiver, "check", exit_code=1) - # when undeleting archives, borg check will discover both the original archive1 as well as - # the fake archive1 we created above. for the fake one, a new archives directory entry - # named archive1.1 will be created because we request undeleting archives and there - # is no archives directory entry for the fake archive yet. + check_cmd_setup(archiver) # creates archive1 and archive2 + # borg delete does it rather quick and dirty: it only kills the archives directory entry + cmd(archiver, "delete", "archive1") + cmd(archiver, "delete", "archive2") + output = cmd(archiver, "repo-list") + assert "archive1" not in output + assert "archive2" not in output + # borg check will re-discover archive1 and archive2 and new archives directory entries + # will be created because we requested undeleting archives. cmd(archiver, "check", "--repair", "--undelete-archives", exit_code=0) output = cmd(archiver, "repo-list") assert "archive1" in output - assert "archive1.1" in output assert "archive2" in output