repoobj: add chunk_id to blob header, extend to 49 bytes, refs #8572

Stores chunk_id unencrypted in the per-blob header so borg check can
rebuild the chunk_id -> pack location index without decryption. AEAD
uses chunk_id as additional data, making key-free recovery circular
without an explicit plaintext copy.

Header layout: OBJ_MAGIC(8) + version(1) + chunk_id(32) + meta_size(4)
+ data_size(4) = REPOOBJ_HEADER_SIZE = 49 bytes.
This commit is contained in:
Mrityunjay Raj 2026-05-31 22:51:03 +05:30
parent c37fe1ff92
commit beab9b2711
2 changed files with 11 additions and 8 deletions

View file

@ -13,11 +13,14 @@ AUTHENTICATED_NO_KEY = "authenticated_no_key" in workarounds
OBJ_MAGIC = b"BORG_OBJ"
OBJ_VERSION = 0x01
# Fixed header size per blob: OBJ_MAGIC(8) + version(1) + chunk_id(32) + meta_size(4) + data_size(4)
REPOOBJ_HEADER_SIZE = 49
class RepoObj:
# Object header: magic (8b), format version (1b), meta size (4b), data size (4b).
obj_header = Struct("<8sBII")
ObjHeader = namedtuple("ObjHeader", "magic version meta_size data_size")
# Object header: magic (8b), format version (1b), chunk_id (32b), meta size (4b), data size (4b).
obj_header = Struct("<8sB32sII")
ObjHeader = namedtuple("ObjHeader", "magic version chunk_id meta_size data_size")
@classmethod
def extract_crypted_data(cls, data: bytes) -> bytes:
@ -72,7 +75,7 @@ class RepoObj:
data_encrypted = self.key.encrypt(id, data_compressed)
meta_packed = msgpack.packb(meta)
meta_encrypted = self.key.encrypt(id, meta_packed)
hdr = self.ObjHeader(OBJ_MAGIC, OBJ_VERSION, len(meta_encrypted), len(data_encrypted))
hdr = self.ObjHeader(OBJ_MAGIC, OBJ_VERSION, id, len(meta_encrypted), len(data_encrypted))
hdr_packed = self.obj_header.pack(*hdr)
return hdr_packed + meta_encrypted + data_encrypted

View file

@ -53,9 +53,9 @@ def reopen(repository, exclusive: bool | None = True, create=False):
)
def fchunk(data, meta=b""):
def fchunk(data, meta=b"", chunk_id=b"\x00" * 32):
# Format chunk: create a raw chunk that has a valid RepoObj layout, but does not use encryption or compression.
hdr = RepoObj.obj_header.pack(OBJ_MAGIC, OBJ_VERSION, len(meta), len(data))
hdr = RepoObj.obj_header.pack(OBJ_MAGIC, OBJ_VERSION, chunk_id, len(meta), len(data))
assert isinstance(data, bytes)
chunk = hdr + meta + data
return chunk
@ -65,7 +65,7 @@ def pchunk(chunk):
# Parse chunk: extract data and metadata from a raw chunk made by fchunk.
hdr_size = RepoObj.obj_header.size
hdr = chunk[:hdr_size]
meta_size, data_size = RepoObj.obj_header.unpack(hdr)[2:4]
meta_size, data_size = RepoObj.obj_header.unpack(hdr)[3:5]
meta = chunk[hdr_size : hdr_size + meta_size]
data = chunk[hdr_size + meta_size : hdr_size + meta_size + data_size]
return data, meta
@ -97,7 +97,7 @@ def test_basic_operations(repo_fixtures, request):
def test_read_data(repo_fixtures, request):
with get_repository_from_fixture(repo_fixtures, request) as repository:
meta, data = b"meta", b"data"
hdr = RepoObj.obj_header.pack(OBJ_MAGIC, OBJ_VERSION, len(meta), len(data))
hdr = RepoObj.obj_header.pack(OBJ_MAGIC, OBJ_VERSION, H(0), len(meta), len(data))
chunk_complete = hdr + meta + data
chunk_short = hdr + meta
repository.put(H(0), chunk_complete)