diff --git a/src/borg/archive.py b/src/borg/archive.py index f3b10e588..c07cec08c 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1635,7 +1635,7 @@ class ArchiveChecker: *, verify_data=False, repair=False, - undelete_archives=False, + find_lost_archives=False, match=None, sort_by="", first=0, @@ -1648,7 +1648,7 @@ class ArchiveChecker: """Perform a set of checks on 'repository' :param repair: enable repair mode, write updated or corrected data into repository - :param undelete_archives: create archive directory entries that are missing + :param find_lost_archives: create archive directory entries that are missing :param first/last/sort_by: only check this number of first/last archives ordered by sort_by :param match: only check archives matching this pattern :param older/newer: only check archives older/newer than timedelta from now @@ -1685,7 +1685,7 @@ class ArchiveChecker: rebuild_manifest = True if rebuild_manifest: self.manifest = self.rebuild_manifest() - if undelete_archives: + if find_lost_archives: self.rebuild_archives_directory() self.rebuild_archives( match=match, first=first, last=last, sort_by=sort_by, older=older, oldest=oldest, newer=newer, newest=newest @@ -1815,8 +1815,10 @@ class ArchiveChecker: """Rebuild the archives directory, undeleting archives. Iterates through all objects in the repository looking for archive metadata blocks. - When finding some that do not have a corresponding archives directory entry, it will - create that entry (undeleting all archives). + When finding some that do not have a corresponding archives directory entry (either + a normal entry for an "existing" archive, or a soft-deleted entry for a "deleted" + archive), it will create that entry (making the archives directory consistent with + the repository). """ def valid_archive(obj): @@ -1862,12 +1864,18 @@ class ArchiveChecker: archive = ArchiveItem(internal_dict=archive) name = archive.name archive_id, archive_id_hex = chunk_id, bin_to_hex(chunk_id) - logger.info(f"Found archive {name} {archive_id_hex}.") - if self.manifest.archives.exists_name_and_id(name, archive_id): - logger.info("We already have an archives directory entry for this.") + if self.manifest.archives.exists_id(archive_id, deleted=False): + logger.debug(f"We already have an archives directory entry for {name} {archive_id_hex}.") + elif self.manifest.archives.exists_id(archive_id, deleted=True): + logger.debug(f"We already have a deleted archives directory entry for {name} {archive_id_hex}.") else: - logger.warning(f"Creating archives directory entry for {name} {archive_id_hex}.") - self.manifest.archives.create(name, archive_id, archive.time) + self.error_found = True + if self.repair: + logger.warning(f"Creating archives directory entry for {name} {archive_id_hex}.") + self.manifest.archives.create(name, archive_id, archive.time) + else: + logger.warning(f"Would create archives directory entry for {name} {archive_id_hex}.") + pi.finish() logger.info("Rebuilding missing archives directory entries completed.") diff --git a/src/borg/archiver/check_cmd.py b/src/borg/archiver/check_cmd.py index a7d0ea990..7fe962bff 100644 --- a/src/borg/archiver/check_cmd.py +++ b/src/borg/archiver/check_cmd.py @@ -35,10 +35,10 @@ class CheckMixIn: raise CommandError( "--repository-only contradicts --first, --last, -a / --match-archives and --verify-data arguments." ) + if args.repo_only and args.find_lost_archives: + raise CommandError("--repository-only contradicts the --find-lost-archives option.") if args.repair and args.max_duration: raise CommandError("--repair does not allow --max-duration argument.") - if args.undelete_archives and not args.repair: - raise CommandError("--undelete-archives requires --repair argument.") if args.max_duration and not args.repo_only: # when doing a partial repo check, we can only check xxh64 hashes in repository files. # archives check requires that a full repo check was done before and has built/cached a ChunkIndex. @@ -51,7 +51,7 @@ class CheckMixIn: repository, verify_data=args.verify_data, repair=args.repair, - undelete_archives=args.undelete_archives, + find_lost_archives=args.find_lost_archives, match=args.match_archives, sort_by=args.sort_by or "ts", first=args.first, @@ -180,11 +180,12 @@ class CheckMixIn: Consequently, if lost chunks were repaired earlier, it is advised to run ``--repair`` a second time after creating some new backups. - If ``--repair --undelete-archives`` is given, Borg will scan the repository + If ``--repair --find-lost-archives`` is given, Borg will scan the repository for archive metadata and if it finds some where no corresponding archives - directory entry exists, it will create the entries. This is basically undoing - ``borg delete archive`` or ``borg prune ...`` commands and only possible before - ``borg compact`` would remove the archives' data completely. + directory entry exists, it will create one. + This will make archives reappear for which the directory entry was lost. + This is only possible before ``borg compact`` would remove the archives' + data completely. """ ) subparser = subparsers.add_parser( @@ -213,10 +214,7 @@ class CheckMixIn: "--repair", dest="repair", action="store_true", help="attempt to repair any inconsistencies found" ) subparser.add_argument( - "--undelete-archives", - dest="undelete_archives", - action="store_true", - help="attempt to undelete archives (use with --repair)", + "--find-lost-archives", dest="find_lost_archives", action="store_true", help="attempt to find lost archives" ) subparser.add_argument( "--max-duration", diff --git a/src/borg/manifest.py b/src/borg/manifest.py index 9dc9e818c..608bfcaab 100644 --- a/src/borg/manifest.py +++ b/src/borg/manifest.py @@ -222,6 +222,14 @@ class Archives: else: return name in self._archives + def exists_id(self, id, *, deleted=False): + # check if an archive with this id exists + assert isinstance(id, bytes) + if not self.legacy: + return id in self.ids(deleted=deleted) + else: + raise NotImplementedError + def exists_name_and_id(self, name, id): # check if an archive with this name AND id exists assert isinstance(name, str) diff --git a/src/borg/testsuite/archiver/check_cmd_test.py b/src/borg/testsuite/archiver/check_cmd_test.py index 0eb19e7f1..cc2ee31e5 100644 --- a/src/borg/testsuite/archiver/check_cmd_test.py +++ b/src/borg/testsuite/archiver/check_cmd_test.py @@ -1,4 +1,5 @@ from datetime import datetime, timezone, timedelta +from pathlib import Path import shutil from unittest.mock import patch @@ -270,18 +271,21 @@ def test_manifest_rebuild_corrupted_chunk(archivers, request): def test_check_undelete_archives(archivers, request): archiver = request.getfixturevalue(archivers) check_cmd_setup(archiver) # creates archive1 and archive2 - # borg delete does it rather quick and dirty: it only kills the archives directory entry - cmd(archiver, "delete", "archive1") - cmd(archiver, "delete", "archive2") - output = cmd(archiver, "repo-list") - assert "archive1" not in output - assert "archive2" not in output - # borg check will re-discover archive1 and archive2 and new archives directory entries - # will be created because we requested undeleting archives. - cmd(archiver, "check", "--repair", "--undelete-archives", exit_code=0) + existing_archive_ids = set(cmd(archiver, "repo-list", "--short").splitlines()) + create_src_archive(archiver, "archive3") + archive_ids = set(cmd(archiver, "repo-list", "--short").splitlines()) + new_archive_id_hex = (archive_ids - existing_archive_ids).pop() + (Path(archiver.repository_path) / "archives" / new_archive_id_hex).unlink() # lose the entry for archive3 output = cmd(archiver, "repo-list") assert "archive1" in output assert "archive2" in output + assert "archive3" not in output + # borg check will re-discover archive3 and create a new archives directory entry. + cmd(archiver, "check", "--repair", "--find-lost-archives", exit_code=0) + output = cmd(archiver, "repo-list") + assert "archive1" in output + assert "archive2" in output + assert "archive3" in output def test_spoofed_archive(archivers, request):