Merge pull request #8821 from ThomasWaldmann/compact-fixes

borg compact fix / test
This commit is contained in:
TW 2025-05-08 11:17:53 +02:00 committed by GitHub
commit b14be7fa92
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 41 additions and 11 deletions

View file

@ -65,7 +65,9 @@ class ArchiveGarbageCollector:
# and also remove all older cached chunk indexes.
# write_chunkindex_to_repo now removes all flags and size infos.
# we need this, as we put the wrong size in there to support --stats computations.
write_chunkindex_to_repo_cache(self.repository, self.chunks, clear=True, force_write=True, delete_other=True)
write_chunkindex_to_repo_cache(
self.repository, self.chunks, incremental=False, clear=True, force_write=True, delete_other=True
)
self.chunks = None # nothing there (cleared!)
def analyze_archives(self) -> Tuple[Set, int, int, int]:
@ -113,6 +115,8 @@ class ArchiveGarbageCollector:
def report_and_delete(self):
if self.missing_chunks:
logger.error(f"Repository has {len(self.missing_chunks)} missing objects!")
for id in sorted(self.missing_chunks):
logger.debug(f"Missing object {bin_to_hex(id)}")
set_ec(EXIT_ERROR)
logger.info("Cleaning archives directory from soft-deleted archives...")

View file

@ -705,20 +705,24 @@ CHUNKINDEX_HASH_SEED = 3
def write_chunkindex_to_repo_cache(
repository, chunks, *, clear=False, force_write=False, delete_other=False, delete_these=None
repository, chunks, *, incremental=True, clear=False, force_write=False, delete_other=False, delete_these=None
):
# the borghash code has no means to only serialize the F_NEW table entries,
# thus we copy only the new entries to a temporary table:
new_chunks = ChunkIndex()
# for now, we don't want to serialize the flags or the size, just the keys (chunk IDs):
cleaned_value = ChunkIndexEntry(flags=ChunkIndex.F_NONE, size=0)
for key, _ in chunks.iteritems(only_new=True):
new_chunks[key] = cleaned_value
chunks_to_write = ChunkIndex()
# incremental==True:
# the borghash code has no means to only serialize the F_NEW table entries,
# thus we copy only the new entries to a temporary table.
# incremental==False:
# maybe copying the stuff into a new ChunkIndex is not needed here,
# but for simplicity, we do it anyway.
for key, _ in chunks.iteritems(only_new=incremental):
chunks_to_write[key] = cleaned_value
with io.BytesIO() as f:
new_chunks.write(f)
chunks_to_write.write(f)
data = f.getvalue()
logger.debug(f"caching {len(new_chunks)} new chunks.")
new_chunks.clear() # free memory of the temporary table
logger.debug(f"caching {len(chunks_to_write)} chunks (incremental={incremental}).")
chunks_to_write.clear() # free memory of the temporary table
if clear:
# if we don't need the in-memory chunks index anymore:
chunks.clear() # free memory, immediately

View file

@ -362,7 +362,9 @@ class Repository:
# if we did a full pass in one go, we built a complete, uptodate ChunkIndex, cache it!
from .cache import write_chunkindex_to_repo_cache
write_chunkindex_to_repo_cache(self, chunks, clear=True, force_write=True, delete_other=True)
write_chunkindex_to_repo_cache(
self, chunks, incremental=False, clear=True, force_write=True, delete_other=True
)
except StoreObjectNotFound:
# it can be that there is no "data/" at all, then it crashes when iterating infos.
pass

View file

@ -59,3 +59,23 @@ def test_compact_after_deleting_some_archives(archivers, request, stats):
else:
assert "Repository has data stored in 0 objects." not in output
assert "Finished compaction" in output
def test_compact_index_corruption(archivers, request):
# see issue #8813 (borg did not write a complete index)
archiver = request.getfixturevalue(archivers)
cmd(archiver, "repo-create", RK_ENCRYPTION)
create_src_archive(archiver, "archive1")
output = cmd(archiver, "compact", "-v", "--stats", exit_code=0)
assert "missing objects" not in output
output = cmd(archiver, "compact", "-v", exit_code=0)
assert "missing objects" not in output
output = cmd(archiver, "compact", "-v", exit_code=0)
assert "missing objects" not in output
output = cmd(archiver, "compact", "-v", "--stats", exit_code=0)
assert "missing objects" not in output