From d955f8ce75d213022cf969058cdcc56c762df37c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 22 Sep 2025 19:12:48 +0200 Subject: [PATCH] fix reading borg 1.x repo index, fixes #9022 2 fixes: - add code to update/verify the HashHeader integrity hash. this code was missing and led to FileIntegrityError on the borg 1.x repo index. - when reading a non-compact borg 1.x hash table from disk (like the borg repo index), only add the "used" buckets to the in-memory hashtable, but not the unused/tombstone buckets. The corruption described in #9022 was happening like this: - borg failed to read the repo index, because the integrity check failed - due to open_index(..., auto_recover=True), it tried to "fix" it by writing an empty hash table to disk. borg 1.x usually then rebuilt the index, but somehow this wasn't happening for the user in #9022. --- src/borg/hashindex.pyx | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/borg/hashindex.pyx b/src/borg/hashindex.pyx index 97fc213f1..511a395b1 100644 --- a/src/borg/hashindex.pyx +++ b/src/borg/hashindex.pyx @@ -201,6 +201,10 @@ class NSIndex1(HTProxyMixin, MutableMapping): used = len(self.ht) header_bytes = struct.pack(self.HEADER_FMT, self.MAGIC, used, used, self.KEY_SIZE, self.VALUE_SIZE) fd.write(header_bytes) + # record the header as a separate integrity-hash part if supported + hash_part = getattr(fd, "hash_part", None) + if hash_part: + hash_part("HashHeader") count = 0 for key, _ in self.ht.items(): value = self.ht._get_raw(key) @@ -214,6 +218,10 @@ class NSIndex1(HTProxyMixin, MutableMapping): header_bytes = fd.read(header_size) if len(header_bytes) < header_size: raise ValueError(f"Invalid file, file is too short (header).") + # verify the header as a separate integrity-hash part if supported + hash_part = getattr(fd, "hash_part", None) + if hash_part: + hash_part("HashHeader") magic, entries, buckets, ksize, vsize = struct.unpack(self.HEADER_FMT, header_bytes) if magic != self.MAGIC: raise ValueError(f"Invalid file, magic {self.MAGIC.decode()} not found.") @@ -228,6 +236,10 @@ class NSIndex1(HTProxyMixin, MutableMapping): for i in range(buckets): key = fd.read(ksize) value = fd.read(vsize) + if value.startswith(b'\xFF\xFF\xFF\xFF'): # LE for 0xffffffff (empty/unused bucket) + continue + if value.startswith(b'\xFE\xFF\xFF\xFF'): # LE for 0xfffffffe (deleted/tombstone bucket) + continue self.ht._set_raw(key, value) pos = fd.tell() assert pos == end_of_file