check: fix/enhance code, rewrite test

- we should always output name and id when talking about an archive
- no problem anymore if names in archives directory are "duplicate"
- use "by-id" archives directory entry delete function
- rewrite/simplify test for borg check --undelete-archives
This commit is contained in:
Thomas Waldmann 2024-09-14 23:57:48 +02:00
parent 81a27c1dbe
commit bb5cf96fe8
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
2 changed files with 25 additions and 51 deletions

View file

@ -1818,23 +1818,13 @@ class ArchiveChecker:
archive = self.key.unpack_archive(data)
archive = ArchiveItem(internal_dict=archive)
name = archive.name
logger.info(f"Found archive {name}, id {bin_to_hex(chunk_id)}.")
if self.manifest.archives.exists_name_and_id(name, chunk_id):
archive_id, archive_id_hex = chunk_id, bin_to_hex(chunk_id)
logger.info(f"Found archive {name} {archive_id_hex}.")
if self.manifest.archives.exists_name_and_id(name, archive_id):
logger.info("We already have an archives directory entry for this.")
elif not self.manifest.archives.exists(name):
# no archives list entry yet and name is not taken yet, create an entry
logger.warning(f"Creating archives directory entry for {name}.")
self.manifest.archives.create(name, chunk_id, archive.time)
else:
# we don't have an entry yet, but the name is taken by something else
i = 1
while True:
new_name = "%s.%d" % (name, i)
if not self.manifest.archives.exists(new_name):
break
i += 1
logger.warning(f"Creating archives directory entry using {new_name}.")
self.manifest.archives.create(new_name, chunk_id, archive.time)
logger.warning(f"Creating archives directory entry for {name} {archive_id_hex}.")
self.manifest.archives.create(name, archive_id, archive.time)
pi.finish()
logger.info("Rebuilding missing archives directory entries completed.")
@ -2046,28 +2036,28 @@ class ArchiveChecker:
with cache_if_remote(self.repository) as repository:
for i, info in enumerate(archive_infos):
pi.show(i)
logger.info(f"Analyzing archive {info.name} ({i + 1}/{num_archives})")
archive_id = info.id
archive_id, archive_id_hex = info.id, bin_to_hex(info.id)
logger.info(f"Analyzing archive {info.name} {archive_id_hex} ({i + 1}/{num_archives})")
if archive_id not in self.chunks:
logger.error("Archive metadata block %s is missing!", bin_to_hex(archive_id))
logger.error(f"Archive metadata block {archive_id_hex} is missing!")
self.error_found = True
if self.repair:
logger.error(f"Deleting broken archive {info.name}.")
self.manifest.archives.delete(info.name)
logger.error(f"Deleting broken archive {info.name} {archive_id_hex}.")
self.manifest.archives.delete_by_id(archive_id)
else:
logger.error(f"Would delete broken archive {info.name}.")
logger.error(f"Would delete broken archive {info.name} {archive_id_hex}.")
continue
cdata = self.repository.get(archive_id)
try:
_, data = self.repo_objs.parse(archive_id, cdata, ro_type=ROBJ_ARCHIVE_META)
except IntegrityError as integrity_error:
logger.error("Archive metadata block %s is corrupted: %s", bin_to_hex(archive_id), integrity_error)
logger.error(f"Archive metadata block {archive_id_hex} is corrupted: {integrity_error}")
self.error_found = True
if self.repair:
logger.error(f"Deleting broken archive {info.name}.")
self.manifest.archives.delete(info.name)
logger.error(f"Deleting broken archive {info.name} {archive_id_hex}.")
self.manifest.archives.delete_by_id(archive_id)
else:
logger.error(f"Would delete broken archive {info.name}.")
logger.error(f"Would delete broken archive {info.name} {archive_id_hex}.")
continue
archive = self.key.unpack_archive(data)
archive = ArchiveItem(internal_dict=archive)

View file

@ -267,36 +267,20 @@ def test_manifest_rebuild_corrupted_chunk(archivers, request):
cmd(archiver, "check", exit_code=0)
def test_manifest_rebuild_duplicate_archive(archivers, request):
def test_check_undelete_archives(archivers, request):
archiver = request.getfixturevalue(archivers)
check_cmd_setup(archiver)
archive, repository = open_archive(archiver.repository_path, "archive1")
repo_objs = archive.repo_objs
with repository:
manifest = repository.get_manifest()
corrupted_manifest = manifest[:123] + b"corrupted!" + manifest[123:]
repository.put_manifest(corrupted_manifest)
archive_dict = {
"command_line": "",
"item_ptrs": [],
"hostname": "foo",
"username": "bar",
"name": "archive1",
"time": "2016-12-15T18:49:51.849711",
"version": 2,
}
archive = repo_objs.key.pack_metadata(archive_dict)
archive_id = repo_objs.id_hash(archive)
repository.put(archive_id, repo_objs.format(archive_id, {}, archive, ro_type=ROBJ_ARCHIVE_META))
cmd(archiver, "check", exit_code=1)
# when undeleting archives, borg check will discover both the original archive1 as well as
# the fake archive1 we created above. for the fake one, a new archives directory entry
# named archive1.1 will be created because we request undeleting archives and there
# is no archives directory entry for the fake archive yet.
check_cmd_setup(archiver) # creates archive1 and archive2
# borg delete does it rather quick and dirty: it only kills the archives directory entry
cmd(archiver, "delete", "archive1")
cmd(archiver, "delete", "archive2")
output = cmd(archiver, "repo-list")
assert "archive1" not in output
assert "archive2" not in output
# borg check will re-discover archive1 and archive2 and new archives directory entries
# will be created because we requested undeleting archives.
cmd(archiver, "check", "--repair", "--undelete-archives", exit_code=0)
output = cmd(archiver, "repo-list")
assert "archive1" in output
assert "archive1.1" in output
assert "archive2" in output