fix reading borg 1.x repo index, fixes #9022

2 fixes:
- add code to update/verify the HashHeader integrity hash. this code was
  missing and led to FileIntegrityError on the borg 1.x repo index.
- when reading a non-compact borg 1.x hash table from disk (like the borg
  repo index), only add the "used" buckets to the in-memory hashtable,
  but not the unused/tombstone buckets.

The corruption described in #9022 was happening like this:
- borg failed to read the repo index, because the integrity check failed
- due to open_index(..., auto_recover=True), it tried to "fix" it by
  writing an empty hash table to disk. borg 1.x usually then rebuilt the
  index, but somehow this wasn't happening for the user in #9022.
This commit is contained in:
Thomas Waldmann 2025-09-22 19:12:48 +02:00
parent 9cbd0ca461
commit d955f8ce75
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01

View file

@ -201,6 +201,10 @@ class NSIndex1(HTProxyMixin, MutableMapping):
used = len(self.ht)
header_bytes = struct.pack(self.HEADER_FMT, self.MAGIC, used, used, self.KEY_SIZE, self.VALUE_SIZE)
fd.write(header_bytes)
# record the header as a separate integrity-hash part if supported
hash_part = getattr(fd, "hash_part", None)
if hash_part:
hash_part("HashHeader")
count = 0
for key, _ in self.ht.items():
value = self.ht._get_raw(key)
@ -214,6 +218,10 @@ class NSIndex1(HTProxyMixin, MutableMapping):
header_bytes = fd.read(header_size)
if len(header_bytes) < header_size:
raise ValueError(f"Invalid file, file is too short (header).")
# verify the header as a separate integrity-hash part if supported
hash_part = getattr(fd, "hash_part", None)
if hash_part:
hash_part("HashHeader")
magic, entries, buckets, ksize, vsize = struct.unpack(self.HEADER_FMT, header_bytes)
if magic != self.MAGIC:
raise ValueError(f"Invalid file, magic {self.MAGIC.decode()} not found.")
@ -228,6 +236,10 @@ class NSIndex1(HTProxyMixin, MutableMapping):
for i in range(buckets):
key = fd.read(ksize)
value = fd.read(vsize)
if value.startswith(b'\xFF\xFF\xFF\xFF'): # LE for 0xffffffff (empty/unused bucket)
continue
if value.startswith(b'\xFE\xFF\xFF\xFF'): # LE for 0xfffffffe (deleted/tombstone bucket)
continue
self.ht._set_raw(key, value)
pos = fd.tell()
assert pos == end_of_file