mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-09 08:51:54 -04:00
repository: add BORGPACK pack header, bump repo version to 4, refs #8572
Wrap each pack file in a 13-byte header (magic + version + blob_len) so packs are self-identifying and the [len][blob] unit extends to N>1 without a format revision. Bump version 3->4: packs/ and 49-byte ObjHeader are incompatible with version-3 readers. Fix test_extra_chunks chunk_id mismatch.
This commit is contained in:
parent
1cb8d99425
commit
05ce0a1897
2 changed files with 59 additions and 28 deletions
|
|
@ -17,9 +17,16 @@ from .helpers import bin_to_hex, hex_to_bin
|
|||
from .storelocking import Lock
|
||||
from .logger import create_logger
|
||||
from .manifest import NoManifestError
|
||||
from struct import Struct
|
||||
|
||||
from .repoobj import RepoObj, OBJ_MAGIC, OBJ_VERSION
|
||||
from .crypto.key import is_keyfile
|
||||
|
||||
PACK_MAGIC = b"BORGPACK"
|
||||
PACK_VERSION = 0x01
|
||||
_pack_header = Struct("<8sBI") # magic(8) + version(1) + blob_len(4)
|
||||
PACK_HEADER_SIZE = _pack_header.size # 13 bytes
|
||||
|
||||
logger = create_logger(__name__)
|
||||
|
||||
|
||||
|
|
@ -174,7 +181,7 @@ class Repository:
|
|||
self._send_log = send_log_cb or (lambda: None)
|
||||
self.do_create = create
|
||||
self.created = False
|
||||
self.acceptable_repo_versions = (3,)
|
||||
self.acceptable_repo_versions = (4,)
|
||||
self.opened = False
|
||||
self.lock = None
|
||||
self.do_lock = lock
|
||||
|
|
@ -212,10 +219,10 @@ class Repository:
|
|||
self.store.open()
|
||||
try:
|
||||
self.store.store("config/readme", REPOSITORY_README.encode())
|
||||
self.version = 3
|
||||
self.version = 4
|
||||
self.store.store("config/version", str(self.version).encode())
|
||||
self.store.store("config/id", bin_to_hex(os.urandom(32)).encode())
|
||||
# we know repo/data/ still does not have any chunks stored in it,
|
||||
# we know repo/packs/ still does not have any chunks stored in it,
|
||||
# but for some stores, there might be a lot of empty directories and
|
||||
# listing them all might be rather slow, so we better cache an empty
|
||||
# ChunkIndex from here so that the first repo operation does not have
|
||||
|
|
@ -329,25 +336,38 @@ class Repository:
|
|||
|
||||
def check_object(obj):
|
||||
"""Check if obj looks valid."""
|
||||
hdr_size = RepoObj.obj_header.size
|
||||
obj_size = len(obj)
|
||||
if obj_size >= hdr_size:
|
||||
hdr = RepoObj.ObjHeader(*RepoObj.obj_header.unpack(obj[:hdr_size]))
|
||||
if hdr.magic != OBJ_MAGIC:
|
||||
log_error("invalid object magic.")
|
||||
elif hdr.version != OBJ_VERSION:
|
||||
log_error(f"unsupported object version: {hdr.version}.")
|
||||
elif hdr.chunk_id != hex_to_bin(info.name):
|
||||
log_error("chunk_id mismatch in header.")
|
||||
else:
|
||||
meta = obj[hdr_size : hdr_size + hdr.meta_size]
|
||||
if hdr.meta_size != len(meta):
|
||||
log_error("metadata size mismatch.")
|
||||
data = obj[hdr_size + hdr.meta_size : hdr_size + hdr.meta_size + hdr.data_size]
|
||||
if hdr.data_size != len(data):
|
||||
log_error("data size mismatch.")
|
||||
else:
|
||||
if len(obj) < PACK_HEADER_SIZE:
|
||||
log_error("too small.")
|
||||
return
|
||||
magic, version, blob_len = _pack_header.unpack(obj[:PACK_HEADER_SIZE])
|
||||
if magic != PACK_MAGIC:
|
||||
log_error("invalid pack magic.")
|
||||
return
|
||||
if version != PACK_VERSION:
|
||||
log_error(f"unsupported pack version: {version}.")
|
||||
return
|
||||
blob = obj[PACK_HEADER_SIZE:]
|
||||
if len(blob) != blob_len:
|
||||
log_error(f"pack blob_len mismatch: header says {blob_len}, actual {len(blob)}.")
|
||||
return
|
||||
hdr_size = RepoObj.obj_header.size
|
||||
if len(blob) < hdr_size:
|
||||
log_error("too small.")
|
||||
return
|
||||
hdr = RepoObj.ObjHeader(*RepoObj.obj_header.unpack(blob[:hdr_size]))
|
||||
if hdr.magic != OBJ_MAGIC:
|
||||
log_error("invalid object magic.")
|
||||
elif hdr.version != OBJ_VERSION:
|
||||
log_error(f"unsupported object version: {hdr.version}.")
|
||||
elif hdr.chunk_id != hex_to_bin(info.name):
|
||||
log_error("chunk_id mismatch in header.")
|
||||
else:
|
||||
meta = blob[hdr_size : hdr_size + hdr.meta_size]
|
||||
if hdr.meta_size != len(meta):
|
||||
log_error("metadata size mismatch.")
|
||||
data = blob[hdr_size + hdr.meta_size : hdr_size + hdr.meta_size + hdr.data_size]
|
||||
if hdr.data_size != len(data):
|
||||
log_error("data size mismatch.")
|
||||
|
||||
# TODO: progress indicator, ...
|
||||
partial = bool(max_duration)
|
||||
|
|
@ -488,14 +508,15 @@ class Repository:
|
|||
key = "packs/" + bin_to_hex(pack_id)
|
||||
try:
|
||||
if read_data:
|
||||
# read everything
|
||||
return self.store.load(key)
|
||||
raw = self.store.load(key)
|
||||
return raw[PACK_HEADER_SIZE:]
|
||||
else:
|
||||
# RepoObj layout supports separately encrypted metadata and data.
|
||||
# We return enough bytes so the client can decrypt the metadata.
|
||||
hdr_size = RepoObj.obj_header.size
|
||||
extra_size = 1024 - hdr_size # load a bit more, 1024b, reduces round trips
|
||||
obj = self.store.load(key, size=hdr_size + extra_size)
|
||||
raw = self.store.load(key, size=PACK_HEADER_SIZE + hdr_size + extra_size)
|
||||
obj = raw[PACK_HEADER_SIZE:]
|
||||
hdr = obj[0:hdr_size]
|
||||
if len(hdr) != hdr_size:
|
||||
raise IntegrityError(f"Object too small [id {id_hex}]: expected {hdr_size}, got {len(hdr)} bytes")
|
||||
|
|
@ -503,7 +524,8 @@ class Repository:
|
|||
if meta_size > extra_size:
|
||||
# we did not get enough, need to load more, but not all.
|
||||
# this should be rare, as chunk metadata is rather small usually.
|
||||
obj = self.store.load(key, size=hdr_size + meta_size)
|
||||
raw = self.store.load(key, size=PACK_HEADER_SIZE + hdr_size + meta_size)
|
||||
obj = raw[PACK_HEADER_SIZE:]
|
||||
meta = obj[hdr_size : hdr_size + meta_size]
|
||||
if len(meta) != meta_size:
|
||||
raise IntegrityError(f"Object too small [id {id_hex}]: expected {meta_size}, got {len(meta)} bytes")
|
||||
|
|
@ -531,13 +553,21 @@ class Repository:
|
|||
|
||||
pack_id = id # N=1: pack_id == chunk_id
|
||||
key = "packs/" + bin_to_hex(pack_id)
|
||||
self.store.store(key, data)
|
||||
pack_hdr = _pack_header.pack(PACK_MAGIC, PACK_VERSION, data_size)
|
||||
self.store.store(key, pack_hdr + data)
|
||||
|
||||
def delete(self, id, wait=True):
|
||||
"""delete a repo object
|
||||
|
||||
Note: when doing calls with wait=False this gets async and caller must
|
||||
deal with async results / exceptions later.
|
||||
|
||||
N=1: pack_id == chunk_id, so deleting the pack file is equivalent to
|
||||
deleting the chunk. Hard delete is safe here.
|
||||
N>1: a pack contains multiple chunks. Individual chunks cannot be deleted
|
||||
from a pack without rewriting it. This method must become a soft-delete
|
||||
(no-op) before N>1 is implemented; compact() will then be the sole
|
||||
mechanism for reclaiming space based on live-ratio thresholds.
|
||||
"""
|
||||
self._lock_refresh()
|
||||
pack_id = id # N=1: pack_id == chunk_id
|
||||
|
|
|
|||
|
|
@ -351,8 +351,9 @@ def test_extra_chunks(archivers, request):
|
|||
check_cmd_setup(archiver)
|
||||
cmd(archiver, "check", exit_code=0)
|
||||
with Repository(archiver.repository_location, exclusive=True) as repository:
|
||||
chunk = fchunk(b"xxxx")
|
||||
repository.put(b"01234567890123456789012345678901", chunk)
|
||||
key = b"01234567890123456789012345678901"
|
||||
chunk = fchunk(b"xxxx", chunk_id=key)
|
||||
repository.put(key, chunk)
|
||||
cmd(archiver, "check", "-v", exit_code=0) # check does not deal with orphans anymore
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue