diff --git a/src/borg/archive.py b/src/borg/archive.py index 81a5fe196..43f8434d3 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -2163,7 +2163,7 @@ class ArchiveChecker: cdata = self.repository.get(archive_id) try: _, data = self.repo_objs.parse(archive_id, cdata, ro_type=ROBJ_ARCHIVE_META) - except IntegrityError as integrity_error: + except IntegrityErrorBase as integrity_error: logger.error(f"Archive metadata block {archive_id_hex} is corrupted: {integrity_error}") self.error_found = True if self.repair: diff --git a/src/borg/manifest.py b/src/borg/manifest.py index 696fac71a..13b4a458d 100644 --- a/src/borg/manifest.py +++ b/src/borg/manifest.py @@ -17,6 +17,7 @@ from .helpers.datastruct import StableDict from .helpers.parseformat import bin_to_hex, hex_to_bin from .helpers.time import parse_timestamp, calculate_relative_offset, archive_ts_now from .helpers.errors import Error, CommandError +from .crypto.low_level import IntegrityError as IntegrityErrorBase from .item import ArchiveItem from .patterns import get_regex_from_pattern from .repoobj import RepoObj @@ -161,25 +162,37 @@ class Archives: tags=(), ) else: - _, data = self.manifest.repo_objs.parse(id, cdata, ro_type=ROBJ_ARCHIVE_META) - archive_dict = self.manifest.key.unpack_archive(data) - archive_item = ArchiveItem(internal_dict=archive_dict) - if archive_item.version not in (1, 2): # legacy: still need to read v1 archives - raise Exception("Unknown archive metadata version") - # callers expect a dict with dict["key"] access, not ArchiveItem.key access. - # also, we need to put the id in there. - metadata = dict( - id=id, - name=archive_item.name, - time=archive_item.time, - exists=True, # repo has a valid archive item - username=archive_item.username, - hostname=archive_item.hostname, - size=archive_item.get("size", 0), - nfiles=archive_item.get("nfiles", 0), - comment=archive_item.get("comment", ""), - tags=tuple(sorted(getattr(archive_item, "tags", []))), # must be hashable - ) + try: + _, data = self.manifest.repo_objs.parse(id, cdata, ro_type=ROBJ_ARCHIVE_META) + except IntegrityErrorBase: + metadata = dict( + id=id, + name="archive-metadata-has-integrity-error", + time="1970-01-01T00:00:00.000000", + exists=False, # we have the pointer, but the repo does not have an archive item + username="", + hostname="", + tags=(), + ) + else: + archive_dict = self.manifest.key.unpack_archive(data) + archive_item = ArchiveItem(internal_dict=archive_dict) + if archive_item.version not in (1, 2): # legacy: still need to read v1 archives + raise Exception("Unknown archive metadata version") + # callers expect a dict with dict["key"] access, not ArchiveItem.key access. + # also, we need to put the id in there. + metadata = dict( + id=id, + name=archive_item.name, + time=archive_item.time, + exists=True, # repo has a valid archive item + username=archive_item.username, + hostname=archive_item.hostname, + size=archive_item.get("size", 0), + nfiles=archive_item.get("nfiles", 0), + comment=archive_item.get("comment", ""), + tags=tuple(sorted(getattr(archive_item, "tags", []))), # must be hashable + ) return metadata def _infos(self, *, deleted=False): diff --git a/src/borg/repoobj.py b/src/borg/repoobj.py index f2a57f95a..65d530ca6 100644 --- a/src/borg/repoobj.py +++ b/src/borg/repoobj.py @@ -26,12 +26,17 @@ class RepoObj: def extract_crypted_data(cls, data: bytes) -> bytes: # used for crypto type detection hdr_size = cls.obj_header.size + if len(data) < hdr_size: + raise IntegrityError(f"object too small: expected at least {hdr_size} header bytes, got {len(data)}") hdr = cls.ObjHeader(*cls.obj_header.unpack(data[:hdr_size])) if hdr.magic != OBJ_MAGIC: raise IntegrityError("invalid object magic") if hdr.version != OBJ_VERSION: raise IntegrityError(f"unsupported object version: {hdr.version}") - return data[hdr_size + hdr.meta_size :] + overall_expected_size = hdr_size + hdr.meta_size + hdr.data_size + if overall_expected_size != len(data): + raise IntegrityError(f"object size inconsistent: expected {overall_expected_size} bytes, got {len(data)}") + return data[hdr_size + hdr.meta_size :] # crypted data def __init__(self, key): self.key = key @@ -87,12 +92,17 @@ class RepoObj: assert isinstance(ro_type, str) obj = memoryview(cdata) hdr_size = self.obj_header.size + if len(obj) < hdr_size: + raise IntegrityError(f"object too small: expected at least {hdr_size} header bytes, got {len(obj)}") hdr = self.ObjHeader(*self.obj_header.unpack(obj[:hdr_size])) if hdr.magic != OBJ_MAGIC: raise IntegrityError("invalid object magic") if hdr.version != OBJ_VERSION: raise IntegrityError(f"unsupported object version: {hdr.version}") - assert hdr_size + hdr.meta_size <= len(obj) + if hdr_size + hdr.meta_size > len(obj): + raise IntegrityError( + f"object too small: expected at least {hdr_size + hdr.meta_size} bytes, got {len(obj)}" + ) meta_encrypted = obj[hdr_size : hdr_size + hdr.meta_size] meta_packed = self.key.decrypt(id, meta_encrypted) meta = msgpack.unpackb(meta_packed) @@ -119,18 +129,21 @@ class RepoObj: assert isinstance(cdata, bytes) obj = memoryview(cdata) hdr_size = self.obj_header.size + if len(obj) < hdr_size: + raise IntegrityError(f"object too small: expected at least {hdr_size} header bytes, got {len(obj)}") hdr = self.ObjHeader(*self.obj_header.unpack(obj[:hdr_size])) if hdr.magic != OBJ_MAGIC: raise IntegrityError("invalid object magic") if hdr.version != OBJ_VERSION: raise IntegrityError(f"unsupported object version: {hdr.version}") - assert hdr_size + hdr.meta_size <= len(obj) + overall_expected_size = hdr_size + hdr.meta_size + hdr.data_size + if overall_expected_size != len(obj): + raise IntegrityError(f"object size inconsistent: expected {overall_expected_size} bytes, got {len(obj)}") meta_encrypted = obj[hdr_size : hdr_size + hdr.meta_size] meta_packed = self.key.decrypt(id, meta_encrypted) meta_compressed = msgpack.unpackb(meta_packed) # means: before adding more metadata in decompress block if ro_type != ROBJ_DONTCARE and meta_compressed["type"] != ro_type: raise IntegrityError(f"ro_type expected: {ro_type} got: {meta_compressed['type']}") - assert hdr_size + hdr.meta_size + hdr.data_size <= len(obj) data_encrypted = obj[hdr_size + hdr.meta_size : hdr_size + hdr.meta_size + hdr.data_size] data_compressed = self.key.decrypt(id, data_encrypted) # does not include the type/level bytes if decompress: diff --git a/src/borg/testsuite/archiver/check_cmd_test.py b/src/borg/testsuite/archiver/check_cmd_test.py index 162b4c198..0c23b6ef7 100644 --- a/src/borg/testsuite/archiver/check_cmd_test.py +++ b/src/borg/testsuite/archiver/check_cmd_test.py @@ -225,7 +225,7 @@ def test_corrupted_manifest(archivers, request): archive, repository = open_archive(archiver.repository_path, "archive1") with repository: manifest = repository.get_manifest() - corrupted_manifest = manifest[:250] + b"corrupted!" + manifest[250:] + corrupted_manifest = manifest[:250] + b"x" + manifest[251:] repository.put_manifest(corrupted_manifest) cmd(archiver, "check", exit_code=1) output = cmd(archiver, "check", "-v", "--repair", exit_code=0) @@ -273,10 +273,10 @@ def test_manifest_rebuild_corrupted_chunk(archivers, request): archive, repository = open_archive(archiver.repository_path, "archive1") with repository: manifest = repository.get_manifest() - corrupted_manifest = manifest[:250] + b"corrupted!" + manifest[250:] + corrupted_manifest = manifest[:250] + b"x" + manifest[251:] repository.put_manifest(corrupted_manifest) chunk = repository.get(archive.id) - corrupted_chunk = chunk + b"corrupted!" + corrupted_chunk = chunk[:-1] + b"x" repository.put(archive.id, corrupted_chunk) cmd(archiver, "check", exit_code=1) output = cmd(archiver, "check", "-v", "--repair", exit_code=0) @@ -312,7 +312,7 @@ def test_spoofed_archive(archivers, request): with repository: # attacker would corrupt or delete the manifest to trigger a rebuild of it: manifest = repository.get_manifest() - corrupted_manifest = manifest[:250] + b"corrupted!" + manifest[250:] + corrupted_manifest = manifest[:250] + b"x" + manifest[251:] repository.put_manifest(corrupted_manifest) archive_dict = { "command_line": "", diff --git a/src/borg/testsuite/repoobj_test.py b/src/borg/testsuite/repoobj_test.py index 748749ef2..1523acd41 100644 --- a/src/borg/testsuite/repoobj_test.py +++ b/src/borg/testsuite/repoobj_test.py @@ -107,6 +107,38 @@ def test_borg1_borg2_transition(key): assert meta2["size"] == len_data +def test_malformed_object_too_short(key): + # a malformed / truncated object (e.g. from a corrupted or malicious repo) must be + # rejected with a clean IntegrityError, not an uncaught struct.error / IndexError. + repo_objs = RepoObj(key) + id = repo_objs.id_hash(b"x") + hdr_size = RepoObj.obj_header.size + for blob in [b"", b"BORG_OBJ", b"\x00" * (hdr_size - 1)]: + with pytest.raises(IntegrityError): + RepoObj.extract_crypted_data(blob) + with pytest.raises(IntegrityError): + repo_objs.parse_meta(id, blob, ro_type=ROBJ_FILE_STREAM) + with pytest.raises(IntegrityError): + repo_objs.parse(id, blob, ro_type=ROBJ_FILE_STREAM) + + +def test_malformed_object_inconsistent_sizes(key): + # a valid-looking header that claims more meta/data than the object actually contains + # must be rejected cleanly with IntegrityError. + from ..repoobj import OBJ_MAGIC, OBJ_VERSION + + repo_objs = RepoObj(key) + id = repo_objs.id_hash(b"x") + # huge meta_size, but no actual meta/data bytes follow the header + hdr = RepoObj.obj_header.pack(OBJ_MAGIC, OBJ_VERSION, id, 0xFFFFFFFF, 0) + with pytest.raises(IntegrityError): + RepoObj.extract_crypted_data(hdr) + with pytest.raises(IntegrityError): + repo_objs.parse_meta(id, hdr, ro_type=ROBJ_FILE_STREAM) + with pytest.raises(IntegrityError): + repo_objs.parse(id, hdr, ro_type=ROBJ_FILE_STREAM) + + def test_spoof_manifest(key): repo_objs = RepoObj(key) data = b"fake or malicious manifest data" # File content could be provided by an attacker.