repoobj: add OBJ_MAGIC blob header for pack file format, refs #8572

This commit is contained in:
Mrityunjay Raj 2026-05-30 21:26:51 +05:30
parent 4f90df660a
commit 8db4f4a19a
3 changed files with 37 additions and 16 deletions

View file

@ -10,16 +10,24 @@ from .compress import Compressor, LZ4_COMPRESSOR
AUTHENTICATED_NO_KEY = "authenticated_no_key" in workarounds
OBJ_MAGIC = b"BORG_OBJ"
OBJ_VERSION = 0x01
class RepoObj:
# Object header: sizes of the encrypted meta and data sections.
obj_header = Struct("<II") # meta size (32b), data size (32b)
ObjHeader = namedtuple("ObjHeader", "meta_size data_size")
# Object header: magic (8b), format version (1b), meta size (4b), data size (4b).
obj_header = Struct("<8sBII")
ObjHeader = namedtuple("ObjHeader", "magic version meta_size data_size")
@classmethod
def extract_crypted_data(cls, data: bytes) -> bytes:
# used for crypto type detection
hdr_size = cls.obj_header.size
hdr = cls.ObjHeader(*cls.obj_header.unpack(data[:hdr_size]))
if hdr.magic != OBJ_MAGIC:
raise IntegrityError("invalid object magic")
if hdr.version != OBJ_VERSION:
raise IntegrityError(f"unsupported object version: {hdr.version}")
return data[hdr_size + hdr.meta_size :]
def __init__(self, key):
@ -64,7 +72,7 @@ class RepoObj:
data_encrypted = self.key.encrypt(id, data_compressed)
meta_packed = msgpack.packb(meta)
meta_encrypted = self.key.encrypt(id, meta_packed)
hdr = self.ObjHeader(len(meta_encrypted), len(data_encrypted))
hdr = self.ObjHeader(OBJ_MAGIC, OBJ_VERSION, len(meta_encrypted), len(data_encrypted))
hdr_packed = self.obj_header.pack(*hdr)
return hdr_packed + meta_encrypted + data_encrypted
@ -77,6 +85,10 @@ class RepoObj:
obj = memoryview(cdata)
hdr_size = self.obj_header.size
hdr = self.ObjHeader(*self.obj_header.unpack(obj[:hdr_size]))
if hdr.magic != OBJ_MAGIC:
raise IntegrityError("invalid object magic")
if hdr.version != OBJ_VERSION:
raise IntegrityError(f"unsupported object version: {hdr.version}")
assert hdr_size + hdr.meta_size <= len(obj)
meta_encrypted = obj[hdr_size : hdr_size + hdr.meta_size]
meta_packed = self.key.decrypt(id, meta_encrypted)
@ -105,6 +117,10 @@ class RepoObj:
obj = memoryview(cdata)
hdr_size = self.obj_header.size
hdr = self.ObjHeader(*self.obj_header.unpack(obj[:hdr_size]))
if hdr.magic != OBJ_MAGIC:
raise IntegrityError("invalid object magic")
if hdr.version != OBJ_VERSION:
raise IntegrityError(f"unsupported object version: {hdr.version}")
assert hdr_size + hdr.meta_size <= len(obj)
meta_encrypted = obj[hdr_size : hdr_size + hdr.meta_size]
meta_packed = self.key.decrypt(id, meta_encrypted)

View file

@ -16,7 +16,7 @@ from .helpers import bin_to_hex, hex_to_bin
from .storelocking import Lock
from .logger import create_logger
from .manifest import NoManifestError
from .repoobj import RepoObj
from .repoobj import RepoObj, OBJ_MAGIC, OBJ_VERSION
logger = create_logger(__name__)
@ -302,12 +302,17 @@ class Repository:
obj_size = len(obj)
if obj_size >= hdr_size:
hdr = RepoObj.ObjHeader(*RepoObj.obj_header.unpack(obj[:hdr_size]))
meta = obj[hdr_size : hdr_size + hdr.meta_size]
if hdr.meta_size != len(meta):
log_error("metadata size incorrect.")
data = obj[hdr_size + hdr.meta_size : hdr_size + hdr.meta_size + hdr.data_size]
if hdr.data_size != len(data):
log_error("data size incorrect.")
if hdr.magic != OBJ_MAGIC:
log_error("invalid object magic.")
elif hdr.version != OBJ_VERSION:
log_error(f"unsupported object version: {hdr.version}.")
else:
meta = obj[hdr_size : hdr_size + hdr.meta_size]
if hdr.meta_size != len(meta):
log_error("metadata size incorrect.")
data = obj[hdr_size + hdr.meta_size : hdr_size + hdr.meta_size + hdr.data_size]
if hdr.data_size != len(data):
log_error("data size incorrect.")
else:
log_error("too small.")
@ -460,7 +465,7 @@ class Repository:
hdr = obj[0:hdr_size]
if len(hdr) != hdr_size:
raise IntegrityError(f"Object too small [id {id_hex}]: expected {hdr_size}, got {len(hdr)} bytes")
meta_size = RepoObj.obj_header.unpack(hdr)[0]
meta_size = RepoObj.ObjHeader(*RepoObj.obj_header.unpack(hdr)).meta_size
if meta_size > extra_size:
# we did not get enough, need to load more, but not all.
# this should be rare, as chunk metadata is rather small usually.

View file

@ -8,7 +8,7 @@ from ..helpers import IntegrityError
from ..platformflags import is_win32
from ..remote import RemoteRepository, InvalidRPCMethod, PathNotAllowed
from ..repository import Repository, StoreObjectNotFound, MAX_DATA_SIZE
from ..repoobj import RepoObj
from ..repoobj import RepoObj, OBJ_MAGIC, OBJ_VERSION
from .hashindex_test import H
@ -55,7 +55,7 @@ def reopen(repository, exclusive: bool | None = True, create=False):
def fchunk(data, meta=b""):
# Format chunk: create a raw chunk that has a valid RepoObj layout, but does not use encryption or compression.
hdr = RepoObj.obj_header.pack(len(meta), len(data))
hdr = RepoObj.obj_header.pack(OBJ_MAGIC, OBJ_VERSION, len(meta), len(data))
assert isinstance(data, bytes)
chunk = hdr + meta + data
return chunk
@ -65,7 +65,7 @@ def pchunk(chunk):
# Parse chunk: extract data and metadata from a raw chunk made by fchunk.
hdr_size = RepoObj.obj_header.size
hdr = chunk[:hdr_size]
meta_size, data_size = RepoObj.obj_header.unpack(hdr)[0:2]
meta_size, data_size = RepoObj.obj_header.unpack(hdr)[2:4]
meta = chunk[hdr_size : hdr_size + meta_size]
data = chunk[hdr_size + meta_size : hdr_size + meta_size + data_size]
return data, meta
@ -97,7 +97,7 @@ def test_basic_operations(repo_fixtures, request):
def test_read_data(repo_fixtures, request):
with get_repository_from_fixture(repo_fixtures, request) as repository:
meta, data = b"meta", b"data"
hdr = RepoObj.obj_header.pack(len(meta), len(data))
hdr = RepoObj.obj_header.pack(OBJ_MAGIC, OBJ_VERSION, len(meta), len(data))
chunk_complete = hdr + meta + data
chunk_short = hdr + meta
repository.put(H(0), chunk_complete)