mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-09 08:51:54 -04:00
Merge pull request #9677 from mr-raj12/pack-files-step1-remove-xxh64
repoobj: remove xxh64 checksums from blob header
This commit is contained in:
commit
910f223e59
5 changed files with 39 additions and 70 deletions
|
|
@ -1,8 +1,6 @@
|
|||
from collections import namedtuple
|
||||
from struct import Struct
|
||||
|
||||
from xxhash import xxh64
|
||||
|
||||
from .constants import * # NOQA
|
||||
from .helpers import msgpack, workarounds
|
||||
from .helpers.errors import IntegrityError
|
||||
|
|
@ -13,10 +11,9 @@ AUTHENTICATED_NO_KEY = "authenticated_no_key" in workarounds
|
|||
|
||||
|
||||
class RepoObj:
|
||||
# Object header format includes size information for parsing the object into meta and data,
|
||||
# as well as hashes to enable checking consistency without having the borg key.
|
||||
obj_header = Struct("<II8s8s") # meta size (32b), data size (32b), meta hash (64b), data hash (64b)
|
||||
ObjHeader = namedtuple("ObjHeader", "meta_size data_size meta_hash data_hash")
|
||||
# Object header: sizes of the encrypted meta and data sections.
|
||||
obj_header = Struct("<II") # meta size (32b), data size (32b)
|
||||
ObjHeader = namedtuple("ObjHeader", "meta_size data_size")
|
||||
|
||||
@classmethod
|
||||
def extract_crypted_data(cls, data: bytes) -> bytes:
|
||||
|
|
@ -67,9 +64,7 @@ class RepoObj:
|
|||
data_encrypted = self.key.encrypt(id, data_compressed)
|
||||
meta_packed = msgpack.packb(meta)
|
||||
meta_encrypted = self.key.encrypt(id, meta_packed)
|
||||
hdr = self.ObjHeader(
|
||||
len(meta_encrypted), len(data_encrypted), xxh64(meta_encrypted).digest(), xxh64(data_encrypted).digest()
|
||||
)
|
||||
hdr = self.ObjHeader(len(meta_encrypted), len(data_encrypted))
|
||||
hdr_packed = self.obj_header.pack(*hdr)
|
||||
return hdr_packed + meta_encrypted + data_encrypted
|
||||
|
||||
|
|
|
|||
|
|
@ -2,8 +2,6 @@ import os
|
|||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from xxhash import xxh64
|
||||
|
||||
from borgstore.store import Store
|
||||
from borgstore.store import ObjectNotFound as StoreObjectNotFound
|
||||
from borgstore.backends.errors import BackendError as StoreBackendError
|
||||
|
|
@ -307,13 +305,9 @@ class Repository:
|
|||
meta = obj[hdr_size : hdr_size + hdr.meta_size]
|
||||
if hdr.meta_size != len(meta):
|
||||
log_error("metadata size incorrect.")
|
||||
elif hdr.meta_hash != xxh64(meta).digest():
|
||||
log_error("metadata does not match checksum.")
|
||||
data = obj[hdr_size + hdr.meta_size : hdr_size + hdr.meta_size + hdr.data_size]
|
||||
if hdr.data_size != len(data):
|
||||
log_error("data size incorrect.")
|
||||
elif hdr.data_hash != xxh64(data).digest():
|
||||
log_error("data does not match checksum.")
|
||||
else:
|
||||
log_error("too small.")
|
||||
|
||||
|
|
|
|||
|
|
@ -362,50 +362,34 @@ def test_verify_data(archivers, request, init_args):
|
|||
if archiver.get_kind() != "local":
|
||||
pytest.skip("only works locally, patches objects")
|
||||
|
||||
# it's tricky to test the cryptographic data verification, because usually already the
|
||||
# repository-level xxh64 hash fails to verify. So we use a fake one that doesn't.
|
||||
# note: it only works like tested here for a highly engineered data corruption attack,
|
||||
# because with accidental corruption, usually already the xxh64 low-level check fails.
|
||||
def fake_xxh64(data, seed=0):
|
||||
# xxhash.xxh64.digest() returns -> bytes
|
||||
class FakeDigest:
|
||||
def digest(self):
|
||||
return b"fakefake"
|
||||
check_cmd_setup(archiver)
|
||||
shutil.rmtree(archiver.repository_path)
|
||||
cmd(archiver, "repo-create", *init_args)
|
||||
create_src_archive(archiver, "archive1")
|
||||
archive, repository = open_archive(archiver.repository_path, "archive1")
|
||||
with repository:
|
||||
for item in archive.iter_items():
|
||||
if item.path.endswith(src_file):
|
||||
chunk = item.chunks[-1]
|
||||
data = repository.get(chunk.id)
|
||||
data = data[0:123] + b"x" + data[123:]
|
||||
repository.put(chunk.id, data)
|
||||
break
|
||||
|
||||
return FakeDigest()
|
||||
# the normal archives check does not read file content data.
|
||||
cmd(archiver, "check", "--archives-only", exit_code=0)
|
||||
# but with --verify-data, it does and notices the issue.
|
||||
output = cmd(archiver, "check", "--archives-only", "--verify-data", exit_code=1)
|
||||
assert f"{bin_to_hex(chunk.id)}, integrity error" in output
|
||||
|
||||
import borg.repoobj
|
||||
import borg.repository
|
||||
# repair will find the defect chunk and remove it
|
||||
output = cmd(archiver, "check", "--repair", "--verify-data", exit_code=0)
|
||||
assert f"{bin_to_hex(chunk.id)}, integrity error" in output
|
||||
assert f"{src_file}: Missing file chunk detected" in output
|
||||
|
||||
with patch.object(borg.repoobj, "xxh64", fake_xxh64), patch.object(borg.repository, "xxh64", fake_xxh64):
|
||||
check_cmd_setup(archiver)
|
||||
shutil.rmtree(archiver.repository_path)
|
||||
cmd(archiver, "repo-create", *init_args)
|
||||
create_src_archive(archiver, "archive1")
|
||||
archive, repository = open_archive(archiver.repository_path, "archive1")
|
||||
with repository:
|
||||
for item in archive.iter_items():
|
||||
if item.path.endswith(src_file):
|
||||
chunk = item.chunks[-1]
|
||||
data = repository.get(chunk.id)
|
||||
data = data[0:123] + b"x" + data[123:]
|
||||
repository.put(chunk.id, data)
|
||||
break
|
||||
|
||||
# the normal archives check does not read file content data.
|
||||
cmd(archiver, "check", "--archives-only", exit_code=0)
|
||||
# but with --verify-data, it does and notices the issue.
|
||||
output = cmd(archiver, "check", "--archives-only", "--verify-data", exit_code=1)
|
||||
assert f"{bin_to_hex(chunk.id)}, integrity error" in output
|
||||
|
||||
# repair will find the defect chunk and remove it
|
||||
output = cmd(archiver, "check", "--repair", "--verify-data", exit_code=0)
|
||||
assert f"{bin_to_hex(chunk.id)}, integrity error" in output
|
||||
assert f"{src_file}: Missing file chunk detected" in output
|
||||
|
||||
# run with --verify-data again, it will notice the missing chunk.
|
||||
output = cmd(archiver, "check", "--archives-only", "--verify-data", exit_code=1)
|
||||
assert f"{src_file}: Missing file chunk detected" in output
|
||||
# run with --verify-data again, it will notice the missing chunk.
|
||||
output = cmd(archiver, "check", "--archives-only", "--verify-data", exit_code=1)
|
||||
assert f"{src_file}: Missing file chunk detected" in output
|
||||
|
||||
|
||||
@pytest.mark.parametrize("init_args", [["--encryption=repokey-aes-ocb"], ["--encryption", "none"]])
|
||||
|
|
@ -427,13 +411,13 @@ def test_corrupted_file_chunk(archivers, request, init_args):
|
|||
repository.put(chunk.id, data)
|
||||
break
|
||||
|
||||
# the normal check checks all repository objects and the xxh64 checksum fails.
|
||||
output = cmd(archiver, "check", "--repository-only", exit_code=1)
|
||||
assert f"{bin_to_hex(chunk.id)} is corrupted: data does not match checksum." in output
|
||||
# --verify-data decrypts and catches the corruption.
|
||||
output = cmd(archiver, "check", "--archives-only", "--verify-data", exit_code=1)
|
||||
assert f"{bin_to_hex(chunk.id)}, integrity error" in output
|
||||
|
||||
# repair: the defect chunk will be removed by repair.
|
||||
output = cmd(archiver, "check", "--repair", exit_code=0)
|
||||
assert f"{bin_to_hex(chunk.id)} is corrupted: data does not match checksum." in output
|
||||
# repair: the defect chunk will be removed.
|
||||
output = cmd(archiver, "check", "--repair", "--verify-data", exit_code=0)
|
||||
assert f"{bin_to_hex(chunk.id)}, integrity error" in output
|
||||
assert f"{src_file}: Missing file chunk detected" in output
|
||||
|
||||
# run normal check again
|
||||
|
|
|
|||
|
|
@ -5,8 +5,6 @@ import sys
|
|||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from xxhash import xxh64
|
||||
|
||||
from ..legacy.hashindex import NSIndex1
|
||||
from ..helpers import Location
|
||||
from ..helpers import IntegrityError
|
||||
|
|
@ -75,7 +73,7 @@ def get_path(repository):
|
|||
|
||||
def fchunk(data, meta=b""):
|
||||
# Create a raw chunk that has a valid RepoObj layout but does not use encryption or compression.
|
||||
hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest())
|
||||
hdr = RepoObj.obj_header.pack(len(meta), len(data))
|
||||
assert isinstance(data, bytes)
|
||||
chunk = hdr + meta + data
|
||||
return chunk
|
||||
|
|
@ -150,7 +148,7 @@ def test_multiple_transactions(repo_fixtures, request):
|
|||
def test_read_data(repo_fixtures, request):
|
||||
with get_repository_from_fixture(repo_fixtures, request) as repository:
|
||||
meta, data = b"meta", b"data"
|
||||
hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest())
|
||||
hdr = RepoObj.obj_header.pack(len(meta), len(data))
|
||||
chunk_complete = hdr + meta + data
|
||||
repository.put(H(0), chunk_complete)
|
||||
repository.commit(compact=False)
|
||||
|
|
|
|||
|
|
@ -3,8 +3,6 @@ import os
|
|||
import sys
|
||||
|
||||
import pytest
|
||||
from xxhash import xxh64
|
||||
|
||||
from ..helpers import Location
|
||||
from ..helpers import IntegrityError
|
||||
from ..platformflags import is_win32
|
||||
|
|
@ -57,7 +55,7 @@ def reopen(repository, exclusive: bool | None = True, create=False):
|
|||
|
||||
def fchunk(data, meta=b""):
|
||||
# Format chunk: create a raw chunk that has a valid RepoObj layout, but does not use encryption or compression.
|
||||
hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest())
|
||||
hdr = RepoObj.obj_header.pack(len(meta), len(data))
|
||||
assert isinstance(data, bytes)
|
||||
chunk = hdr + meta + data
|
||||
return chunk
|
||||
|
|
@ -99,7 +97,7 @@ def test_basic_operations(repo_fixtures, request):
|
|||
def test_read_data(repo_fixtures, request):
|
||||
with get_repository_from_fixture(repo_fixtures, request) as repository:
|
||||
meta, data = b"meta", b"data"
|
||||
hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest())
|
||||
hdr = RepoObj.obj_header.pack(len(meta), len(data))
|
||||
chunk_complete = hdr + meta + data
|
||||
chunk_short = hdr + meta
|
||||
repository.put(H(0), chunk_complete)
|
||||
|
|
|
|||
Loading…
Reference in a new issue