mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-11 01:41:57 -04:00
Merge pull request #9024 from ThomasWaldmann/transfer-corrupts-src-repo
fix borg transfer corrupting the src repo index
This commit is contained in:
commit
3bff0c31cf
4 changed files with 73 additions and 39 deletions
|
|
@ -201,6 +201,10 @@ class NSIndex1(HTProxyMixin, MutableMapping):
|
|||
used = len(self.ht)
|
||||
header_bytes = struct.pack(self.HEADER_FMT, self.MAGIC, used, used, self.KEY_SIZE, self.VALUE_SIZE)
|
||||
fd.write(header_bytes)
|
||||
# record the header as a separate integrity-hash part if supported
|
||||
hash_part = getattr(fd, "hash_part", None)
|
||||
if hash_part:
|
||||
hash_part("HashHeader")
|
||||
count = 0
|
||||
for key, _ in self.ht.items():
|
||||
value = self.ht._get_raw(key)
|
||||
|
|
@ -214,6 +218,10 @@ class NSIndex1(HTProxyMixin, MutableMapping):
|
|||
header_bytes = fd.read(header_size)
|
||||
if len(header_bytes) < header_size:
|
||||
raise ValueError(f"Invalid file, file is too short (header).")
|
||||
# verify the header as a separate integrity-hash part if supported
|
||||
hash_part = getattr(fd, "hash_part", None)
|
||||
if hash_part:
|
||||
hash_part("HashHeader")
|
||||
magic, entries, buckets, ksize, vsize = struct.unpack(self.HEADER_FMT, header_bytes)
|
||||
if magic != self.MAGIC:
|
||||
raise ValueError(f"Invalid file, magic {self.MAGIC.decode()} not found.")
|
||||
|
|
@ -228,6 +236,10 @@ class NSIndex1(HTProxyMixin, MutableMapping):
|
|||
for i in range(buckets):
|
||||
key = fd.read(ksize)
|
||||
value = fd.read(vsize)
|
||||
if value.startswith(b'\xFF\xFF\xFF\xFF'): # LE for 0xffffffff (empty/unused bucket)
|
||||
continue
|
||||
if value.startswith(b'\xFE\xFF\xFF\xFF'): # LE for 0xfffffffe (deleted/tombstone bucket)
|
||||
continue
|
||||
self.ht._set_raw(key, value)
|
||||
pos = fd.tell()
|
||||
assert pos == end_of_file
|
||||
|
|
|
|||
|
|
@ -515,23 +515,13 @@ class LegacyRepository:
|
|||
return
|
||||
return integrity[key]
|
||||
|
||||
def open_index(self, transaction_id, auto_recover=True):
|
||||
def open_index(self, transaction_id):
|
||||
if transaction_id is None:
|
||||
return NSIndex1()
|
||||
index_path = os.path.join(self.path, "index.%d" % transaction_id)
|
||||
integrity_data = self._read_integrity(transaction_id, "index")
|
||||
try:
|
||||
with IntegrityCheckedFile(index_path, write=False, integrity_data=integrity_data) as fd:
|
||||
return NSIndex1.read(fd)
|
||||
except (ValueError, OSError, FileIntegrityError) as exc:
|
||||
logger.warning("Repository index missing or corrupted, trying to recover from: %s", exc)
|
||||
os.unlink(index_path)
|
||||
if not auto_recover:
|
||||
raise
|
||||
self.prepare_txn(self.get_transaction_id())
|
||||
# don't leave an open transaction around
|
||||
self.commit(compact=False)
|
||||
return self.open_index(self.get_transaction_id())
|
||||
with IntegrityCheckedFile(index_path, write=False, integrity_data=integrity_data) as fd:
|
||||
return NSIndex1.read(fd)
|
||||
|
||||
def _unpack_hints(self, transaction_id):
|
||||
hints_path = os.path.join(self.path, "hints.%d" % transaction_id)
|
||||
|
|
@ -560,11 +550,11 @@ class LegacyRepository:
|
|||
raise
|
||||
if not self.index or transaction_id is None:
|
||||
try:
|
||||
self.index = self.open_index(transaction_id, auto_recover=False)
|
||||
self.index = self.open_index(transaction_id)
|
||||
except (ValueError, OSError, FileIntegrityError) as exc:
|
||||
logger.warning("Checking repository transaction due to previous error: %s", exc)
|
||||
self.check_transaction()
|
||||
self.index = self.open_index(transaction_id, auto_recover=False)
|
||||
self.index = self.open_index(transaction_id)
|
||||
if transaction_id is None:
|
||||
self.segments = {} # XXX bad name: usage_count_of_segment_x = self.segments[x]
|
||||
self.compact = FreeSpace() # XXX bad name: freeable_space_of_segment_x = self.compact[x]
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import glob
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
|
|
@ -469,3 +470,58 @@ def test_transfer_rechunk(archivers, request, monkeypatch):
|
|||
# Verify that the file hash is identical to the source
|
||||
assert item.path in source_file_hashes, f"File {item.path} not found in source archive"
|
||||
assert dest_hash == source_file_hashes[item.path], f"Content hash mismatch for {item.path}"
|
||||
|
||||
|
||||
def test_issue_9022(archivers, request, monkeypatch):
|
||||
"""
|
||||
Regression test for borgbackup/borg#9022: After "borg transfer --from-borg1",
|
||||
the source Borg 1.x repository index must not be changed.
|
||||
"""
|
||||
archiver = request.getfixturevalue(archivers)
|
||||
if archiver.get_kind() in ["remote", "binary"]:
|
||||
pytest.skip("only works locally")
|
||||
|
||||
# Prepare source (borg 1.2) repo from tarball next to this test file
|
||||
repo12_tar = os.path.join(os.path.dirname(__file__), "repo12.tar.gz")
|
||||
|
||||
original_location = archiver.repository_location
|
||||
extract_dir = f"{original_location}1"
|
||||
os.makedirs(extract_dir)
|
||||
with tarfile.open(repo12_tar) as tf:
|
||||
tf.extractall(extract_dir)
|
||||
|
||||
def index_meta(repo_path):
|
||||
index_files = sorted(glob.glob(os.path.join(repo_path, "index.*")))
|
||||
assert len(index_files) == 1, f"Expected exactly 1 index file before transfer, found {len(index_files)}"
|
||||
st = os.stat(index_files[0])
|
||||
# Return (mtime_ns, size, inode). Use fallbacks where attributes may not exist on some platforms.
|
||||
mtime_ns = getattr(st, "st_mtime_ns", int(st.st_mtime * 1e9))
|
||||
inode = getattr(st, "st_ino", None)
|
||||
return (mtime_ns, st.st_size, inode)
|
||||
|
||||
# Record pre-transfer index file metadata
|
||||
pre_meta = index_meta(extract_dir)
|
||||
|
||||
other_repo1 = f"--other-repo={original_location}1"
|
||||
|
||||
# Destination repo where we transfer to (borg 2 repo)
|
||||
archiver.repository_location = f"{original_location}2"
|
||||
|
||||
# Set passphrases: repo12 testdata uses "waytooeasyonlyfortests"
|
||||
monkeypatch.setenv("BORG_PASSPHRASE", "pw2")
|
||||
monkeypatch.setenv("BORG_OTHER_PASSPHRASE", "waytooeasyonlyfortests")
|
||||
# For this test, we must not weaken KDF, otherwise borg2 couldn't decrypt the borg1 key
|
||||
os.environ["BORG_TESTONLY_WEAKEN_KDF"] = "0"
|
||||
|
||||
# Create destination repo and run transfer from borg1 source
|
||||
cmd(archiver, "repo-create", RK_ENCRYPTION, other_repo1, "--from-borg1")
|
||||
cmd(archiver, "transfer", other_repo1, "--from-borg1")
|
||||
|
||||
# After transfer, ensure the source borg1 index file looks valid and unchanged.
|
||||
post_meta = index_meta(extract_dir)
|
||||
|
||||
assert post_meta == pre_meta, (
|
||||
f"Index file metadata changed after transfer!\n"
|
||||
f"Before: mtime_ns={pre_meta[0]}, size={pre_meta[1]}, inode={pre_meta[2]}\n"
|
||||
f"After: mtime_ns={post_meta[0]}, size={post_meta[1]}, inode={post_meta[2]}"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -571,21 +571,6 @@ def test_unreadable_hints(repository):
|
|||
do_commit(repository)
|
||||
|
||||
|
||||
def test_index(repository):
|
||||
make_auxiliary(repository)
|
||||
with open(os.path.join(repository.path, "index.1"), "wb") as fd:
|
||||
fd.write(b"123456789")
|
||||
do_commit(repository)
|
||||
|
||||
|
||||
def test_index_outside_transaction(repository):
|
||||
make_auxiliary(repository)
|
||||
with open(os.path.join(repository.path, "index.1"), "wb") as fd:
|
||||
fd.write(b"123456789")
|
||||
with repository:
|
||||
assert len(repository) == 1
|
||||
|
||||
|
||||
def _corrupt_index(repository):
|
||||
# HashIndex is able to detect incorrect headers and file lengths,
|
||||
# but on its own it can't tell if the data is correct.
|
||||
|
|
@ -601,15 +586,6 @@ def _corrupt_index(repository):
|
|||
fd.write(corrupted_index_data)
|
||||
|
||||
|
||||
def test_index_corrupted(repository):
|
||||
make_auxiliary(repository)
|
||||
_corrupt_index(repository)
|
||||
with repository:
|
||||
# data corruption is detected due to mismatching checksums, and fixed by rebuilding the index.
|
||||
assert len(repository) == 1
|
||||
assert pdchunk(repository.get(H(0))) == b"foo"
|
||||
|
||||
|
||||
def test_index_corrupted_without_integrity(repository):
|
||||
make_auxiliary(repository)
|
||||
_corrupt_index(repository)
|
||||
|
|
|
|||
Loading…
Reference in a new issue