mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-11 01:41:57 -04:00
Merge pull request #9724 from ThomasWaldmann/repoobj-integrity-checks
better repoobj integrity checks
This commit is contained in:
commit
e36d58d715
5 changed files with 86 additions and 28 deletions
|
|
@ -2163,7 +2163,7 @@ class ArchiveChecker:
|
|||
cdata = self.repository.get(archive_id)
|
||||
try:
|
||||
_, data = self.repo_objs.parse(archive_id, cdata, ro_type=ROBJ_ARCHIVE_META)
|
||||
except IntegrityError as integrity_error:
|
||||
except IntegrityErrorBase as integrity_error:
|
||||
logger.error(f"Archive metadata block {archive_id_hex} is corrupted: {integrity_error}")
|
||||
self.error_found = True
|
||||
if self.repair:
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ from .helpers.datastruct import StableDict
|
|||
from .helpers.parseformat import bin_to_hex, hex_to_bin
|
||||
from .helpers.time import parse_timestamp, calculate_relative_offset, archive_ts_now
|
||||
from .helpers.errors import Error, CommandError
|
||||
from .crypto.low_level import IntegrityError as IntegrityErrorBase
|
||||
from .item import ArchiveItem
|
||||
from .patterns import get_regex_from_pattern
|
||||
from .repoobj import RepoObj
|
||||
|
|
@ -161,25 +162,37 @@ class Archives:
|
|||
tags=(),
|
||||
)
|
||||
else:
|
||||
_, data = self.manifest.repo_objs.parse(id, cdata, ro_type=ROBJ_ARCHIVE_META)
|
||||
archive_dict = self.manifest.key.unpack_archive(data)
|
||||
archive_item = ArchiveItem(internal_dict=archive_dict)
|
||||
if archive_item.version not in (1, 2): # legacy: still need to read v1 archives
|
||||
raise Exception("Unknown archive metadata version")
|
||||
# callers expect a dict with dict["key"] access, not ArchiveItem.key access.
|
||||
# also, we need to put the id in there.
|
||||
metadata = dict(
|
||||
id=id,
|
||||
name=archive_item.name,
|
||||
time=archive_item.time,
|
||||
exists=True, # repo has a valid archive item
|
||||
username=archive_item.username,
|
||||
hostname=archive_item.hostname,
|
||||
size=archive_item.get("size", 0),
|
||||
nfiles=archive_item.get("nfiles", 0),
|
||||
comment=archive_item.get("comment", ""),
|
||||
tags=tuple(sorted(getattr(archive_item, "tags", []))), # must be hashable
|
||||
)
|
||||
try:
|
||||
_, data = self.manifest.repo_objs.parse(id, cdata, ro_type=ROBJ_ARCHIVE_META)
|
||||
except IntegrityErrorBase:
|
||||
metadata = dict(
|
||||
id=id,
|
||||
name="archive-metadata-has-integrity-error",
|
||||
time="1970-01-01T00:00:00.000000",
|
||||
exists=False, # we have the pointer, but the repo does not have an archive item
|
||||
username="",
|
||||
hostname="",
|
||||
tags=(),
|
||||
)
|
||||
else:
|
||||
archive_dict = self.manifest.key.unpack_archive(data)
|
||||
archive_item = ArchiveItem(internal_dict=archive_dict)
|
||||
if archive_item.version not in (1, 2): # legacy: still need to read v1 archives
|
||||
raise Exception("Unknown archive metadata version")
|
||||
# callers expect a dict with dict["key"] access, not ArchiveItem.key access.
|
||||
# also, we need to put the id in there.
|
||||
metadata = dict(
|
||||
id=id,
|
||||
name=archive_item.name,
|
||||
time=archive_item.time,
|
||||
exists=True, # repo has a valid archive item
|
||||
username=archive_item.username,
|
||||
hostname=archive_item.hostname,
|
||||
size=archive_item.get("size", 0),
|
||||
nfiles=archive_item.get("nfiles", 0),
|
||||
comment=archive_item.get("comment", ""),
|
||||
tags=tuple(sorted(getattr(archive_item, "tags", []))), # must be hashable
|
||||
)
|
||||
return metadata
|
||||
|
||||
def _infos(self, *, deleted=False):
|
||||
|
|
|
|||
|
|
@ -26,12 +26,17 @@ class RepoObj:
|
|||
def extract_crypted_data(cls, data: bytes) -> bytes:
|
||||
# used for crypto type detection
|
||||
hdr_size = cls.obj_header.size
|
||||
if len(data) < hdr_size:
|
||||
raise IntegrityError(f"object too small: expected at least {hdr_size} header bytes, got {len(data)}")
|
||||
hdr = cls.ObjHeader(*cls.obj_header.unpack(data[:hdr_size]))
|
||||
if hdr.magic != OBJ_MAGIC:
|
||||
raise IntegrityError("invalid object magic")
|
||||
if hdr.version != OBJ_VERSION:
|
||||
raise IntegrityError(f"unsupported object version: {hdr.version}")
|
||||
return data[hdr_size + hdr.meta_size :]
|
||||
overall_expected_size = hdr_size + hdr.meta_size + hdr.data_size
|
||||
if overall_expected_size != len(data):
|
||||
raise IntegrityError(f"object size inconsistent: expected {overall_expected_size} bytes, got {len(data)}")
|
||||
return data[hdr_size + hdr.meta_size :] # crypted data
|
||||
|
||||
def __init__(self, key):
|
||||
self.key = key
|
||||
|
|
@ -87,12 +92,17 @@ class RepoObj:
|
|||
assert isinstance(ro_type, str)
|
||||
obj = memoryview(cdata)
|
||||
hdr_size = self.obj_header.size
|
||||
if len(obj) < hdr_size:
|
||||
raise IntegrityError(f"object too small: expected at least {hdr_size} header bytes, got {len(obj)}")
|
||||
hdr = self.ObjHeader(*self.obj_header.unpack(obj[:hdr_size]))
|
||||
if hdr.magic != OBJ_MAGIC:
|
||||
raise IntegrityError("invalid object magic")
|
||||
if hdr.version != OBJ_VERSION:
|
||||
raise IntegrityError(f"unsupported object version: {hdr.version}")
|
||||
assert hdr_size + hdr.meta_size <= len(obj)
|
||||
if hdr_size + hdr.meta_size > len(obj):
|
||||
raise IntegrityError(
|
||||
f"object too small: expected at least {hdr_size + hdr.meta_size} bytes, got {len(obj)}"
|
||||
)
|
||||
meta_encrypted = obj[hdr_size : hdr_size + hdr.meta_size]
|
||||
meta_packed = self.key.decrypt(id, meta_encrypted)
|
||||
meta = msgpack.unpackb(meta_packed)
|
||||
|
|
@ -119,18 +129,21 @@ class RepoObj:
|
|||
assert isinstance(cdata, bytes)
|
||||
obj = memoryview(cdata)
|
||||
hdr_size = self.obj_header.size
|
||||
if len(obj) < hdr_size:
|
||||
raise IntegrityError(f"object too small: expected at least {hdr_size} header bytes, got {len(obj)}")
|
||||
hdr = self.ObjHeader(*self.obj_header.unpack(obj[:hdr_size]))
|
||||
if hdr.magic != OBJ_MAGIC:
|
||||
raise IntegrityError("invalid object magic")
|
||||
if hdr.version != OBJ_VERSION:
|
||||
raise IntegrityError(f"unsupported object version: {hdr.version}")
|
||||
assert hdr_size + hdr.meta_size <= len(obj)
|
||||
overall_expected_size = hdr_size + hdr.meta_size + hdr.data_size
|
||||
if overall_expected_size != len(obj):
|
||||
raise IntegrityError(f"object size inconsistent: expected {overall_expected_size} bytes, got {len(obj)}")
|
||||
meta_encrypted = obj[hdr_size : hdr_size + hdr.meta_size]
|
||||
meta_packed = self.key.decrypt(id, meta_encrypted)
|
||||
meta_compressed = msgpack.unpackb(meta_packed) # means: before adding more metadata in decompress block
|
||||
if ro_type != ROBJ_DONTCARE and meta_compressed["type"] != ro_type:
|
||||
raise IntegrityError(f"ro_type expected: {ro_type} got: {meta_compressed['type']}")
|
||||
assert hdr_size + hdr.meta_size + hdr.data_size <= len(obj)
|
||||
data_encrypted = obj[hdr_size + hdr.meta_size : hdr_size + hdr.meta_size + hdr.data_size]
|
||||
data_compressed = self.key.decrypt(id, data_encrypted) # does not include the type/level bytes
|
||||
if decompress:
|
||||
|
|
|
|||
|
|
@ -225,7 +225,7 @@ def test_corrupted_manifest(archivers, request):
|
|||
archive, repository = open_archive(archiver.repository_path, "archive1")
|
||||
with repository:
|
||||
manifest = repository.get_manifest()
|
||||
corrupted_manifest = manifest[:250] + b"corrupted!" + manifest[250:]
|
||||
corrupted_manifest = manifest[:250] + b"x" + manifest[251:]
|
||||
repository.put_manifest(corrupted_manifest)
|
||||
cmd(archiver, "check", exit_code=1)
|
||||
output = cmd(archiver, "check", "-v", "--repair", exit_code=0)
|
||||
|
|
@ -273,10 +273,10 @@ def test_manifest_rebuild_corrupted_chunk(archivers, request):
|
|||
archive, repository = open_archive(archiver.repository_path, "archive1")
|
||||
with repository:
|
||||
manifest = repository.get_manifest()
|
||||
corrupted_manifest = manifest[:250] + b"corrupted!" + manifest[250:]
|
||||
corrupted_manifest = manifest[:250] + b"x" + manifest[251:]
|
||||
repository.put_manifest(corrupted_manifest)
|
||||
chunk = repository.get(archive.id)
|
||||
corrupted_chunk = chunk + b"corrupted!"
|
||||
corrupted_chunk = chunk[:-1] + b"x"
|
||||
repository.put(archive.id, corrupted_chunk)
|
||||
cmd(archiver, "check", exit_code=1)
|
||||
output = cmd(archiver, "check", "-v", "--repair", exit_code=0)
|
||||
|
|
@ -312,7 +312,7 @@ def test_spoofed_archive(archivers, request):
|
|||
with repository:
|
||||
# attacker would corrupt or delete the manifest to trigger a rebuild of it:
|
||||
manifest = repository.get_manifest()
|
||||
corrupted_manifest = manifest[:250] + b"corrupted!" + manifest[250:]
|
||||
corrupted_manifest = manifest[:250] + b"x" + manifest[251:]
|
||||
repository.put_manifest(corrupted_manifest)
|
||||
archive_dict = {
|
||||
"command_line": "",
|
||||
|
|
|
|||
|
|
@ -107,6 +107,38 @@ def test_borg1_borg2_transition(key):
|
|||
assert meta2["size"] == len_data
|
||||
|
||||
|
||||
def test_malformed_object_too_short(key):
|
||||
# a malformed / truncated object (e.g. from a corrupted or malicious repo) must be
|
||||
# rejected with a clean IntegrityError, not an uncaught struct.error / IndexError.
|
||||
repo_objs = RepoObj(key)
|
||||
id = repo_objs.id_hash(b"x")
|
||||
hdr_size = RepoObj.obj_header.size
|
||||
for blob in [b"", b"BORG_OBJ", b"\x00" * (hdr_size - 1)]:
|
||||
with pytest.raises(IntegrityError):
|
||||
RepoObj.extract_crypted_data(blob)
|
||||
with pytest.raises(IntegrityError):
|
||||
repo_objs.parse_meta(id, blob, ro_type=ROBJ_FILE_STREAM)
|
||||
with pytest.raises(IntegrityError):
|
||||
repo_objs.parse(id, blob, ro_type=ROBJ_FILE_STREAM)
|
||||
|
||||
|
||||
def test_malformed_object_inconsistent_sizes(key):
|
||||
# a valid-looking header that claims more meta/data than the object actually contains
|
||||
# must be rejected cleanly with IntegrityError.
|
||||
from ..repoobj import OBJ_MAGIC, OBJ_VERSION
|
||||
|
||||
repo_objs = RepoObj(key)
|
||||
id = repo_objs.id_hash(b"x")
|
||||
# huge meta_size, but no actual meta/data bytes follow the header
|
||||
hdr = RepoObj.obj_header.pack(OBJ_MAGIC, OBJ_VERSION, id, 0xFFFFFFFF, 0)
|
||||
with pytest.raises(IntegrityError):
|
||||
RepoObj.extract_crypted_data(hdr)
|
||||
with pytest.raises(IntegrityError):
|
||||
repo_objs.parse_meta(id, hdr, ro_type=ROBJ_FILE_STREAM)
|
||||
with pytest.raises(IntegrityError):
|
||||
repo_objs.parse(id, hdr, ro_type=ROBJ_FILE_STREAM)
|
||||
|
||||
|
||||
def test_spoof_manifest(key):
|
||||
repo_objs = RepoObj(key)
|
||||
data = b"fake or malicious manifest data" # File content could be provided by an attacker.
|
||||
|
|
|
|||
Loading…
Reference in a new issue