diff --git a/src/borg/archiver/compact_cmd.py b/src/borg/archiver/compact_cmd.py index 4c9ab74f3..2785c19a7 100644 --- a/src/borg/archiver/compact_cmd.py +++ b/src/borg/archiver/compact_cmd.py @@ -65,7 +65,9 @@ class ArchiveGarbageCollector: # and also remove all older cached chunk indexes. # write_chunkindex_to_repo now removes all flags and size infos. # we need this, as we put the wrong size in there to support --stats computations. - write_chunkindex_to_repo_cache(self.repository, self.chunks, clear=True, force_write=True, delete_other=True) + write_chunkindex_to_repo_cache( + self.repository, self.chunks, incremental=False, clear=True, force_write=True, delete_other=True + ) self.chunks = None # nothing there (cleared!) def analyze_archives(self) -> Tuple[Set, int, int, int]: @@ -113,6 +115,8 @@ class ArchiveGarbageCollector: def report_and_delete(self): if self.missing_chunks: logger.error(f"Repository has {len(self.missing_chunks)} missing objects!") + for id in sorted(self.missing_chunks): + logger.debug(f"Missing object {bin_to_hex(id)}") set_ec(EXIT_ERROR) logger.info("Cleaning archives directory from soft-deleted archives...") diff --git a/src/borg/cache.py b/src/borg/cache.py index c5d8e0907..1aeded8ae 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -705,20 +705,24 @@ CHUNKINDEX_HASH_SEED = 3 def write_chunkindex_to_repo_cache( - repository, chunks, *, clear=False, force_write=False, delete_other=False, delete_these=None + repository, chunks, *, incremental=True, clear=False, force_write=False, delete_other=False, delete_these=None ): - # the borghash code has no means to only serialize the F_NEW table entries, - # thus we copy only the new entries to a temporary table: - new_chunks = ChunkIndex() # for now, we don't want to serialize the flags or the size, just the keys (chunk IDs): cleaned_value = ChunkIndexEntry(flags=ChunkIndex.F_NONE, size=0) - for key, _ in chunks.iteritems(only_new=True): - new_chunks[key] = cleaned_value + chunks_to_write = ChunkIndex() + # incremental==True: + # the borghash code has no means to only serialize the F_NEW table entries, + # thus we copy only the new entries to a temporary table. + # incremental==False: + # maybe copying the stuff into a new ChunkIndex is not needed here, + # but for simplicity, we do it anyway. + for key, _ in chunks.iteritems(only_new=incremental): + chunks_to_write[key] = cleaned_value with io.BytesIO() as f: - new_chunks.write(f) + chunks_to_write.write(f) data = f.getvalue() - logger.debug(f"caching {len(new_chunks)} new chunks.") - new_chunks.clear() # free memory of the temporary table + logger.debug(f"caching {len(chunks_to_write)} chunks (incremental={incremental}).") + chunks_to_write.clear() # free memory of the temporary table if clear: # if we don't need the in-memory chunks index anymore: chunks.clear() # free memory, immediately diff --git a/src/borg/repository.py b/src/borg/repository.py index 4d2d74ce0..4218e38a3 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -362,7 +362,9 @@ class Repository: # if we did a full pass in one go, we built a complete, uptodate ChunkIndex, cache it! from .cache import write_chunkindex_to_repo_cache - write_chunkindex_to_repo_cache(self, chunks, clear=True, force_write=True, delete_other=True) + write_chunkindex_to_repo_cache( + self, chunks, incremental=False, clear=True, force_write=True, delete_other=True + ) except StoreObjectNotFound: # it can be that there is no "data/" at all, then it crashes when iterating infos. pass diff --git a/src/borg/testsuite/archiver/compact_cmd_test.py b/src/borg/testsuite/archiver/compact_cmd_test.py index 66ae73372..65524fc90 100644 --- a/src/borg/testsuite/archiver/compact_cmd_test.py +++ b/src/borg/testsuite/archiver/compact_cmd_test.py @@ -59,3 +59,23 @@ def test_compact_after_deleting_some_archives(archivers, request, stats): else: assert "Repository has data stored in 0 objects." not in output assert "Finished compaction" in output + + +def test_compact_index_corruption(archivers, request): + # see issue #8813 (borg did not write a complete index) + archiver = request.getfixturevalue(archivers) + + cmd(archiver, "repo-create", RK_ENCRYPTION) + create_src_archive(archiver, "archive1") + + output = cmd(archiver, "compact", "-v", "--stats", exit_code=0) + assert "missing objects" not in output + + output = cmd(archiver, "compact", "-v", exit_code=0) + assert "missing objects" not in output + + output = cmd(archiver, "compact", "-v", exit_code=0) + assert "missing objects" not in output + + output = cmd(archiver, "compact", "-v", "--stats", exit_code=0) + assert "missing objects" not in output