diff --git a/docs/internals/data-structures.rst b/docs/internals/data-structures.rst index 761478763..8d1562ff2 100644 --- a/docs/internals/data-structures.rst +++ b/docs/internals/data-structures.rst @@ -365,7 +365,6 @@ or modified. It looks like this: 'time': '2017-05-05T12:42:22.942864', }, }, - 'tam': ..., } The *version* field can be either 1 or 2. The versions differ in the @@ -379,10 +378,6 @@ the repository. It is used by *borg check*, which verifies that all keys in all items are a subset of these keys. Thus, an older version of *borg check* supporting this mechanism can correctly detect keys introduced in later versions. -The *tam* key is part of the :ref:`tertiary authentication mechanism ` -(formerly known as "tertiary authentication for metadata") and authenticates -the manifest, since an ID check is not possible. - *config* is a general-purpose location for additional metadata. All versions of Borg preserve its contents. diff --git a/docs/internals/security.rst b/docs/internals/security.rst index 38d693d0a..fef56d610 100644 --- a/docs/internals/security.rst +++ b/docs/internals/security.rst @@ -67,43 +67,26 @@ in a particular part of its own data structure assigns this meaning. This results in a directed acyclic graph of authentication from the manifest to the data chunks of individual files. -.. _tam_description: +Above used to be all for borg 1.x and was the reason why it needed the +tertiary authentication mechanism (TAM) for manifest and archives. -.. rubric:: Authenticating the manifest +borg 2 now stores the ro_type ("meaning") of a repo object's data into that +object's metadata (like e.g.: manifest vs. archive vs. user file content data). +When loading data from the repo, borg verifies that the type of object it got +matches the type it wanted. borg 2 does not use TAMs any more. -Since the manifest has a fixed ID (000...000) the aforementioned authentication -does not apply to it, indeed, cannot apply to it; it is impossible to authenticate -the root node of a DAG through its edges, since the root node has no incoming edges. +As both the object's metadata and data are AEAD encrypted and also bound to +the object ID (via giving the ID as AAD), there is no way an attacker (without +access to the borg key) could change the type of the object or move content +to a different object ID. -With the scheme as described so far an attacker could easily replace the manifest, -therefore Borg includes a tertiary authentication mechanism (TAM) that is applied -to the manifest (see :ref:`tam_vuln`). +This effectively 'anchors' the manifest (and also other metadata, like archives) +to the key, which is controlled by the client, thereby anchoring the entire DAG, +making it impossible for an attacker to add, remove or modify any part of the +DAG without Borg being able to detect the tampering. -TAM works by deriving a separate key through HKDF_ from the other encryption and -authentication keys and calculating the HMAC of the metadata to authenticate [#]_:: - - # RANDOM(n) returns n random bytes - salt = RANDOM(64) - - ikm = id_key || crypt_key - # *context* depends on the operation, for manifest authentication it is - # the ASCII string "borg-metadata-authentication-manifest". - tam_key = HKDF-SHA-512(ikm, salt, context) - - # *data* is a dict-like structure - data[hmac] = zeroes - packed = pack(data) - data[hmac] = HMAC(tam_key, packed) - packed_authenticated = pack(data) - -Since an attacker cannot gain access to this key and also cannot make the -client authenticate arbitrary data using this mechanism, the attacker is unable -to forge the authentication. - -This effectively 'anchors' the manifest to the key, which is controlled by the -client, thereby anchoring the entire DAG, making it impossible for an attacker -to add, remove or modify any part of the DAG without Borg being able to detect -the tampering. +Passphrase notes +---------------- Note that when using BORG_PASSPHRASE the attacker cannot swap the *entire* repository against a new repository with e.g. repokey mode and no passphrase, @@ -113,11 +96,6 @@ However, interactively a user might not notice this kind of attack immediately, if she assumes that the reason for the absent passphrase prompt is a set BORG_PASSPHRASE. See issue :issue:`2169` for details. -.. [#] The reason why the authentication tag is stored in the packed - data itself is that older Borg versions can still read the - manifest this way, while a changed layout would have broken - compatibility. - .. _security_encryption: Encryption diff --git a/src/borg/archive.py b/src/borg/archive.py index 8d85724c8..b048d5c16 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -296,7 +296,7 @@ class DownloadPipeline: """ hlids_preloaded = set() unpacker = msgpack.Unpacker(use_list=False) - for data in self.fetch_many(ids): + for data in self.fetch_many(ids, ro_type=ROBJ_ARCHIVE_STREAM): unpacker.feed(data) for _item in unpacker: item = Item(internal_dict=_item) @@ -318,9 +318,10 @@ class DownloadPipeline: self.repository.preload([c.id for c in item.chunks]) yield item - def fetch_many(self, ids, is_preloaded=False): + def fetch_many(self, ids, is_preloaded=False, ro_type=None): + assert ro_type is not None for id_, cdata in zip(ids, self.repository.get_many(ids, is_preloaded=is_preloaded)): - _, data = self.repo_objs.parse(id_, cdata) + _, data = self.repo_objs.parse(id_, cdata, ro_type=ro_type) yield data @@ -393,7 +394,9 @@ class CacheChunkBuffer(ChunkBuffer): self.stats = stats def write_chunk(self, chunk): - id_, _ = self.cache.add_chunk(self.key.id_hash(chunk), {}, chunk, stats=self.stats, wait=False) + id_, _ = self.cache.add_chunk( + self.key.id_hash(chunk), {}, chunk, stats=self.stats, wait=False, ro_type=ROBJ_ARCHIVE_STREAM + ) logger.debug(f"writing item metadata stream chunk {bin_to_hex(id_)}") self.cache.repository.async_response(wait=False) return id_ @@ -422,7 +425,7 @@ def archive_get_items(metadata, *, repo_objs, repository): assert "items" not in metadata items = [] for id, cdata in zip(metadata.item_ptrs, repository.get_many(metadata.item_ptrs)): - _, data = repo_objs.parse(id, cdata) + _, data = repo_objs.parse(id, cdata, ro_type=ROBJ_ARCHIVE_CHUNKIDS) ids = msgpack.unpackb(data) items.extend(ids) return items @@ -440,9 +443,9 @@ def archive_put_items(chunk_ids, *, repo_objs, cache=None, stats=None, add_refer id = repo_objs.id_hash(data) logger.debug(f"writing item_ptrs chunk {bin_to_hex(id)}") if cache is not None and stats is not None: - cache.add_chunk(id, {}, data, stats=stats) + cache.add_chunk(id, {}, data, stats=stats, ro_type=ROBJ_ARCHIVE_CHUNKIDS) elif add_reference is not None: - cdata = repo_objs.format(id, {}, data) + cdata = repo_objs.format(id, {}, data, ro_type=ROBJ_ARCHIVE_CHUNKIDS) add_reference(id, len(data), cdata) else: raise NotImplementedError @@ -531,8 +534,8 @@ class Archive: def _load_meta(self, id): cdata = self.repository.get(id) - _, data = self.repo_objs.parse(id, cdata) - archive, _ = self.key.unpack_and_verify_archive(data) + _, data = self.repo_objs.parse(id, cdata, ro_type=ROBJ_ARCHIVE_META) + archive = self.key.unpack_archive(data) metadata = ArchiveItem(internal_dict=archive) if metadata.version not in (1, 2): # legacy: still need to read v1 archives raise Exception("Unknown archive metadata version") @@ -699,10 +702,10 @@ Duration: {0.duration} metadata.update({"size": stats.osize, "nfiles": stats.nfiles}) metadata.update(additional_metadata or {}) metadata = ArchiveItem(metadata) - data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b"archive") + data = self.key.pack_metadata(metadata.as_dict()) self.id = self.repo_objs.id_hash(data) try: - self.cache.add_chunk(self.id, {}, data, stats=self.stats) + self.cache.add_chunk(self.id, {}, data, stats=self.stats, ro_type=ROBJ_ARCHIVE_META) except IntegrityError as err: err_msg = str(err) # hack to avoid changing the RPC protocol by introducing new (more specific) exception class @@ -740,7 +743,7 @@ Duration: {0.duration} for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)): pi.show(increase=1) add(id) - _, data = self.repo_objs.parse(id, chunk) + _, data = self.repo_objs.parse(id, chunk, ro_type=ROBJ_ARCHIVE_STREAM) sync.feed(data) unique_size = archive_index.stats_against(cache.chunks)[1] pi.finish() @@ -826,7 +829,9 @@ Duration: {0.duration} # it would get stuck. if "chunks" in item: item_chunks_size = 0 - for data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True): + for data in self.pipeline.fetch_many( + [c.id for c in item.chunks], is_preloaded=True, ro_type=ROBJ_FILE_STREAM + ): if pi: pi.show(increase=len(data), info=[remove_surrogates(item.path)]) if stdout: @@ -878,7 +883,7 @@ Duration: {0.duration} fd = open(path, "wb") with fd: ids = [c.id for c in item.chunks] - for data in self.pipeline.fetch_many(ids, is_preloaded=True): + for data in self.pipeline.fetch_many(ids, is_preloaded=True, ro_type=ROBJ_FILE_STREAM): if pi: pi.show(increase=len(data), info=[remove_surrogates(item.path)]) with backup_io("write"): @@ -1025,9 +1030,9 @@ Duration: {0.duration} setattr(metadata, key, value) if "items" in metadata: del metadata.items - data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b"archive") + data = self.key.pack_metadata(metadata.as_dict()) new_id = self.key.id_hash(data) - self.cache.add_chunk(new_id, {}, data, stats=self.stats) + self.cache.add_chunk(new_id, {}, data, stats=self.stats, ro_type=ROBJ_ARCHIVE_META) self.manifest.archives[self.name] = (new_id, metadata.time) self.cache.chunk_decref(self.id, self.stats) self.id = new_id @@ -1076,7 +1081,7 @@ Duration: {0.duration} for i, (items_id, data) in enumerate(zip(items_ids, self.repository.get_many(items_ids))): if progress: pi.show(i) - _, data = self.repo_objs.parse(items_id, data) + _, data = self.repo_objs.parse(items_id, data, ro_type=ROBJ_ARCHIVE_STREAM) unpacker.feed(data) chunk_decref(items_id, stats) try: @@ -1132,8 +1137,8 @@ Duration: {0.duration} path, item1, item2, - archive1.pipeline.fetch_many([c.id for c in item1.get("chunks", [])]), - archive2.pipeline.fetch_many([c.id for c in item2.get("chunks", [])]), + archive1.pipeline.fetch_many([c.id for c in item1.get("chunks", [])], ro_type=ROBJ_FILE_STREAM), + archive2.pipeline.fetch_many([c.id for c in item2.get("chunks", [])], ro_type=ROBJ_FILE_STREAM), can_compare_chunk_ids=can_compare_chunk_ids, ) @@ -1319,7 +1324,7 @@ class ChunksProcessor: started_hashing = time.monotonic() chunk_id, data = cached_hash(chunk, self.key.id_hash) stats.hashing_time += time.monotonic() - started_hashing - chunk_entry = cache.add_chunk(chunk_id, {}, data, stats=stats, wait=False) + chunk_entry = cache.add_chunk(chunk_id, {}, data, stats=stats, wait=False, ro_type=ROBJ_FILE_STREAM) self.cache.repository.async_response(wait=False) return chunk_entry @@ -1898,7 +1903,7 @@ class ArchiveChecker: else: try: # we must decompress, so it'll call assert_id() in there: - self.repo_objs.parse(chunk_id, encrypted_data, decompress=True) + self.repo_objs.parse(chunk_id, encrypted_data, decompress=True, ro_type=ROBJ_DONTCARE) except IntegrityErrorBase as integrity_error: self.error_found = True errors += 1 @@ -1930,7 +1935,7 @@ class ArchiveChecker: try: encrypted_data = self.repository.get(defect_chunk) # we must decompress, so it'll call assert_id() in there: - self.repo_objs.parse(defect_chunk, encrypted_data, decompress=True) + self.repo_objs.parse(defect_chunk, encrypted_data, decompress=True, ro_type=ROBJ_DONTCARE) except IntegrityErrorBase: # failed twice -> get rid of this chunk del self.chunks[defect_chunk] @@ -1970,7 +1975,6 @@ class ArchiveChecker: # lost manifest on a older borg version than the most recent one that was ever used # within this repository (assuming that newer borg versions support more item keys). manifest = Manifest(self.key, self.repository) - archive_keys_serialized = [msgpack.packb(name) for name in ARCHIVE_KEYS] pi = ProgressIndicatorPercent( total=len(self.chunks), msg="Rebuilding manifest %6.2f%%", step=0.01, msgid="check.rebuild_manifest" ) @@ -1978,14 +1982,12 @@ class ArchiveChecker: pi.show() cdata = self.repository.get(chunk_id) try: - _, data = self.repo_objs.parse(chunk_id, cdata) + meta, data = self.repo_objs.parse(chunk_id, cdata, ro_type=ROBJ_DONTCARE) except IntegrityErrorBase as exc: logger.error("Skipping corrupted chunk: %s", exc) self.error_found = True continue - if not valid_msgpacked_dict(data, archive_keys_serialized): - continue - if b"command_line" not in data or b"\xa7version\x02" not in data: + if meta["type"] != ROBJ_ARCHIVE_META: continue try: archive = msgpack.unpackb(data) @@ -1993,23 +1995,7 @@ class ArchiveChecker: except msgpack.UnpackException: continue if valid_archive(archive): - # **after** doing the low-level checks and having a strong indication that we - # are likely looking at an archive item here, also check the TAM authentication: - try: - archive, _ = self.key.unpack_and_verify_archive(data) - except IntegrityError as integrity_error: - # TAM issues - do not accept this archive! - # either somebody is trying to attack us with a fake archive data or - # we have an ancient archive made before TAM was a thing (borg < 1.0.9) **and** this repo - # was not correctly upgraded to borg 1.2.5 (see advisory at top of the changelog). - # borg can't tell the difference, so it has to assume this archive might be an attack - # and drops this archive. - name = archive.get(b"name", b"").decode("ascii", "replace") - logger.error("Archive TAM authentication issue for archive %s: %s", name, integrity_error) - logger.error("This archive will *not* be added to the rebuilt manifest! It will be deleted.") - self.error_found = True - continue - # note: if we get here and verified is False, a TAM is not required. + archive = self.key.unpack_archive(data) archive = ArchiveItem(internal_dict=archive) name = archive.name logger.info("Found archive %s", name) @@ -2043,7 +2029,7 @@ class ArchiveChecker: def add_callback(chunk): id_ = self.key.id_hash(chunk) - cdata = self.repo_objs.format(id_, {}, chunk) + cdata = self.repo_objs.format(id_, {}, chunk, ro_type=ROBJ_ARCHIVE_STREAM) add_reference(id_, len(chunk), cdata) return id_ @@ -2066,7 +2052,7 @@ class ArchiveChecker: def replacement_chunk(size): chunk = Chunk(None, allocation=CH_ALLOC, size=size) chunk_id, data = cached_hash(chunk, self.key.id_hash) - cdata = self.repo_objs.format(chunk_id, {}, data) + cdata = self.repo_objs.format(chunk_id, {}, data, ro_type=ROBJ_FILE_STREAM) return chunk_id, size, cdata offset = 0 @@ -2197,7 +2183,7 @@ class ArchiveChecker: unpacker.resync() for chunk_id, cdata in zip(items, repository.get_many(items)): try: - _, data = self.repo_objs.parse(chunk_id, cdata) + _, data = self.repo_objs.parse(chunk_id, cdata, ro_type=ROBJ_ARCHIVE_STREAM) unpacker.feed(data) for item in unpacker: valid, reason = valid_item(item) @@ -2260,23 +2246,13 @@ class ArchiveChecker: mark_as_possibly_superseded(archive_id) cdata = self.repository.get(archive_id) try: - _, data = self.repo_objs.parse(archive_id, cdata) + _, data = self.repo_objs.parse(archive_id, cdata, ro_type=ROBJ_ARCHIVE_META) except IntegrityError as integrity_error: logger.error("Archive metadata block %s is corrupted: %s", bin_to_hex(archive_id), integrity_error) self.error_found = True del self.manifest.archives[info.name] continue - try: - archive, salt = self.key.unpack_and_verify_archive(data) - except IntegrityError as integrity_error: - # looks like there is a TAM issue with this archive, this might be an attack! - # when upgrading to borg 1.2.5, users are expected to TAM-authenticate all archives they - # trust, so there shouldn't be any without TAM. - logger.error("Archive TAM authentication issue for archive %s: %s", info.name, integrity_error) - logger.error("This archive will be *removed* from the manifest! It will be deleted.") - self.error_found = True - del self.manifest.archives[info.name] - continue + archive = self.key.unpack_archive(data) archive = ArchiveItem(internal_dict=archive) if archive.version != 2: raise Exception("Unknown archive metadata version") @@ -2296,9 +2272,9 @@ class ArchiveChecker: archive.item_ptrs = archive_put_items( items_buffer.chunks, repo_objs=self.repo_objs, add_reference=add_reference ) - data = self.key.pack_and_authenticate_metadata(archive.as_dict(), context=b"archive", salt=salt) + data = self.key.pack_metadata(archive.as_dict()) new_archive_id = self.key.id_hash(data) - cdata = self.repo_objs.format(new_archive_id, {}, data) + cdata = self.repo_objs.format(new_archive_id, {}, data, ro_type=ROBJ_ARCHIVE_META) add_reference(new_archive_id, len(data), cdata) self.manifest.archives[info.name] = (new_archive_id, info.ts) pi.finish() @@ -2434,13 +2410,13 @@ class ArchiveRecreater: chunk_id, data = cached_hash(chunk, self.key.id_hash) if chunk_id in self.seen_chunks: return self.cache.chunk_incref(chunk_id, target.stats) - chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False) + chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False, ro_type=ROBJ_FILE_STREAM) self.cache.repository.async_response(wait=False) self.seen_chunks.add(chunk_entry.id) return chunk_entry def iter_chunks(self, archive, target, chunks): - chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _ in chunks]) + chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _ in chunks], ro_type=ROBJ_FILE_STREAM) if target.recreate_rechunkify: # The target.chunker will read the file contents through ChunkIteratorFileWrapper chunk-by-chunk # (does not load the entire file into memory) diff --git a/src/borg/archiver/debug_cmd.py b/src/borg/archiver/debug_cmd.py index 6c0d1892f..cab8075c8 100644 --- a/src/borg/archiver/debug_cmd.py +++ b/src/borg/archiver/debug_cmd.py @@ -35,7 +35,7 @@ class DebugMixIn: repo_objs = manifest.repo_objs archive = Archive(manifest, args.name) for i, item_id in enumerate(archive.metadata.items): - _, data = repo_objs.parse(item_id, repository.get(item_id)) + _, data = repo_objs.parse(item_id, repository.get(item_id), ro_type=ROBJ_ARCHIVE_STREAM) filename = "%06d_%s.items" % (i, bin_to_hex(item_id)) print("Dumping", filename) with open(filename, "wb") as fd: @@ -65,7 +65,8 @@ class DebugMixIn: fd.write(do_indent(prepare_dump_dict(archive_meta_orig))) fd.write(",\n") - _, data = repo_objs.parse(archive_meta_orig["id"], repository.get(archive_meta_orig["id"])) + archive_id = archive_meta_orig["id"] + _, data = repo_objs.parse(archive_id, repository.get(archive_id), ro_type=ROBJ_ARCHIVE_META) archive_org_dict = msgpack.unpackb(data, object_hook=StableDict) fd.write(' "_meta":\n') @@ -77,10 +78,10 @@ class DebugMixIn: first = True items = [] for chunk_id in archive_org_dict["item_ptrs"]: - _, data = repo_objs.parse(chunk_id, repository.get(chunk_id)) + _, data = repo_objs.parse(chunk_id, repository.get(chunk_id), ro_type=ROBJ_ARCHIVE_CHUNKIDS) items.extend(msgpack.unpackb(data)) for item_id in items: - _, data = repo_objs.parse(item_id, repository.get(item_id)) + _, data = repo_objs.parse(item_id, repository.get(item_id), ro_type=ROBJ_ARCHIVE_STREAM) unpacker.feed(data) for item in unpacker: item = prepare_dump_dict(item) @@ -101,7 +102,7 @@ class DebugMixIn: def do_debug_dump_manifest(self, args, repository, manifest): """dump decoded repository manifest""" repo_objs = manifest.repo_objs - _, data = repo_objs.parse(manifest.MANIFEST_ID, repository.get(manifest.MANIFEST_ID)) + _, data = repo_objs.parse(manifest.MANIFEST_ID, repository.get(manifest.MANIFEST_ID), ro_type=ROBJ_MANIFEST) meta = prepare_dump_dict(msgpack.unpackb(data, object_hook=StableDict)) @@ -116,7 +117,7 @@ class DebugMixIn: def decrypt_dump(i, id, cdata, tag=None, segment=None, offset=None): if cdata is not None: - _, data = repo_objs.parse(id, cdata) + _, data = repo_objs.parse(id, cdata, ro_type=ROBJ_DONTCARE) else: _, data = {}, b"" tag_str = "" if tag is None else "_" + tag @@ -211,7 +212,7 @@ class DebugMixIn: break for id in ids: cdata = repository.get(id) - _, data = repo_objs.parse(id, cdata) + _, data = repo_objs.parse(id, cdata, ro_type=ROBJ_DONTCARE) # try to locate wanted sequence crossing the border of last_data and data boundary_data = last_data[-(len(wanted) - 1) :] + data[: len(wanted) - 1] @@ -284,7 +285,7 @@ class DebugMixIn: cdata = f.read() repo_objs = manifest.repo_objs - meta, data = repo_objs.parse(id=id, cdata=cdata) + meta, data = repo_objs.parse(id=id, cdata=cdata, ro_type=ROBJ_DONTCARE) with open(args.json_path, "w") as f: json.dump(meta, f) @@ -315,7 +316,8 @@ class DebugMixIn: meta = json.load(f) repo_objs = manifest.repo_objs - data_encrypted = repo_objs.format(id=id, meta=meta, data=data) + # TODO: support misc repo object types other than ROBJ_FILE_STREAM + data_encrypted = repo_objs.format(id=id, meta=meta, data=data, ro_type=ROBJ_FILE_STREAM) with open(args.object_path, "wb") as f: f.write(data_encrypted) diff --git a/src/borg/archiver/rcompress_cmd.py b/src/borg/archiver/rcompress_cmd.py index 038fd70be..40d7571d0 100644 --- a/src/borg/archiver/rcompress_cmd.py +++ b/src/borg/archiver/rcompress_cmd.py @@ -37,7 +37,7 @@ def find_chunks(repository, repo_objs, stats, ctype, clevel, olevel): if not chunk_ids: break for id, chunk_no_data in zip(chunk_ids, repository.get_many(chunk_ids, read_data=False)): - meta = repo_objs.parse_meta(id, chunk_no_data) + meta = repo_objs.parse_meta(id, chunk_no_data, ro_type=ROBJ_DONTCARE) compr_found = meta["ctype"], meta["clevel"], meta.get("olevel", -1) if compr_found != compr_wanted: recompress_ids.append(id) @@ -57,13 +57,14 @@ def process_chunks(repository, repo_objs, stats, recompress_ids, olevel): for id, chunk in zip(recompress_ids, repository.get_many(recompress_ids, read_data=True)): old_size = len(chunk) stats["old_size"] += old_size - meta, data = repo_objs.parse(id, chunk) + meta, data = repo_objs.parse(id, chunk, ro_type=ROBJ_DONTCARE) + ro_type = meta.pop("type", None) compr_old = meta["ctype"], meta["clevel"], meta.get("olevel", -1) if olevel == -1: # if the chunk was obfuscated, but should not be in future, remove related metadata meta.pop("olevel", None) meta.pop("psize", None) - chunk = repo_objs.format(id, meta, data) + chunk = repo_objs.format(id, meta, data, ro_type=ro_type) compr_done = meta["ctype"], meta["clevel"], meta.get("olevel", -1) if compr_done != compr_old: # we actually changed something diff --git a/src/borg/archiver/tar_cmds.py b/src/borg/archiver/tar_cmds.py index b18cb24d0..0504f97e2 100644 --- a/src/borg/archiver/tar_cmds.py +++ b/src/borg/archiver/tar_cmds.py @@ -115,7 +115,9 @@ class TarMixIn: """ Return a file-like object that reads from the chunks of *item*. """ - chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _ in item.chunks], is_preloaded=True) + chunk_iterator = archive.pipeline.fetch_many( + [chunk_id for chunk_id, _ in item.chunks], is_preloaded=True, ro_type=ROBJ_FILE_STREAM + ) if pi: info = [remove_surrogates(item.path)] return ChunkIteratorFileWrapper( diff --git a/src/borg/archiver/transfer_cmd.py b/src/borg/archiver/transfer_cmd.py index 49529a6cf..f820ef63f 100644 --- a/src/borg/archiver/transfer_cmd.py +++ b/src/borg/archiver/transfer_cmd.py @@ -111,7 +111,11 @@ class TransferMixIn: # keep compressed payload same, verify via assert_id (that will # decompress, but avoid needing to compress it again): meta, data = other_manifest.repo_objs.parse( - chunk_id, cdata, decompress=True, want_compressed=True + chunk_id, + cdata, + decompress=True, + want_compressed=True, + ro_type=ROBJ_FILE_STREAM, ) meta, data = upgrader.upgrade_compressed_chunk(meta, data) chunk_entry = cache.add_chunk( @@ -124,12 +128,20 @@ class TransferMixIn: size=size, ctype=meta["ctype"], clevel=meta["clevel"], + ro_type=ROBJ_FILE_STREAM, ) elif args.recompress == "always": # always decompress and re-compress file data chunks - meta, data = other_manifest.repo_objs.parse(chunk_id, cdata) + meta, data = other_manifest.repo_objs.parse( + chunk_id, cdata, ro_type=ROBJ_FILE_STREAM + ) chunk_entry = cache.add_chunk( - chunk_id, meta, data, stats=archive.stats, wait=False + chunk_id, + meta, + data, + stats=archive.stats, + wait=False, + ro_type=ROBJ_FILE_STREAM, ) else: raise ValueError(f"unsupported recompress mode: {args.recompress}") diff --git a/src/borg/cache.py b/src/borg/cache.py index 4f0032e16..c18f315a2 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -12,7 +12,7 @@ logger = create_logger() files_cache_logger = create_logger("borg.debug.files_cache") -from .constants import CACHE_README, FILES_CACHE_MODE_DISABLED +from .constants import CACHE_README, FILES_CACHE_MODE_DISABLED, ROBJ_FILE_STREAM from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer from .helpers import Location from .helpers import Error @@ -755,7 +755,7 @@ class LocalCache(CacheStatsMixin): nonlocal processed_item_metadata_chunks csize, data = decrypted_repository.get(archive_id) chunk_idx.add(archive_id, 1, len(data)) - archive, _ = self.key.unpack_and_verify_archive(data) + archive = self.key.unpack_archive(data) archive = ArchiveItem(internal_dict=archive) if archive.version not in (1, 2): # legacy raise Exception("Unknown archive metadata version") @@ -939,7 +939,21 @@ class LocalCache(CacheStatsMixin): self.cache_config.ignored_features.update(repo_features - my_features) self.cache_config.mandatory_features.update(repo_features & my_features) - def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None, ctype=None, clevel=None): + def add_chunk( + self, + id, + meta, + data, + *, + stats, + wait=True, + compress=True, + size=None, + ctype=None, + clevel=None, + ro_type=ROBJ_FILE_STREAM, + ): + assert ro_type is not None if not self.txn_active: self.begin_txn() if size is None and compress: @@ -949,7 +963,9 @@ class LocalCache(CacheStatsMixin): return self.chunk_incref(id, stats) if size is None: raise ValueError("when giving compressed data for a new chunk, the uncompressed size must be given also") - cdata = self.repo_objs.format(id, meta, data, compress=compress, size=size, ctype=ctype, clevel=clevel) + cdata = self.repo_objs.format( + id, meta, data, compress=compress, size=size, ctype=ctype, clevel=clevel, ro_type=ro_type + ) self.repository.put(id, cdata, wait=wait) self.chunks.add(id, 1, size) stats.update(size, not refcount) @@ -1113,7 +1129,8 @@ Chunk index: {0.total_unique_chunks:20d} unknown""" def memorize_file(self, hashed_path, path_hash, st, ids): pass - def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None): + def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None, ro_type=ROBJ_FILE_STREAM): + assert ro_type is not None if not self._txn_active: self.begin_txn() if size is None and compress: @@ -1123,7 +1140,7 @@ Chunk index: {0.total_unique_chunks:20d} unknown""" refcount = self.seen_chunk(id, size) if refcount: return self.chunk_incref(id, stats, size=size) - cdata = self.repo_objs.format(id, meta, data, compress=compress) + cdata = self.repo_objs.format(id, meta, data, compress=compress, ro_type=ro_type) self.repository.put(id, cdata, wait=wait) self.chunks.add(id, 1, size) stats.update(size, not refcount) diff --git a/src/borg/constants.py b/src/borg/constants.py index 54528b61c..7f4cbc31d 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -33,6 +33,14 @@ UMASK_DEFAULT = 0o077 # forcing to 0o100XXX later STDIN_MODE_DEFAULT = 0o660 +# RepoObj types +ROBJ_MANIFEST = "M" # Manifest (directory of archives, other metadata) object +ROBJ_ARCHIVE_META = "A" # main archive metadata object +ROBJ_ARCHIVE_CHUNKIDS = "C" # objects with a list of archive metadata stream chunkids +ROBJ_ARCHIVE_STREAM = "S" # archive metadata stream chunk (containing items) +ROBJ_FILE_STREAM = "F" # file content stream chunk (containing user data) +ROBJ_DONTCARE = "*" # used to parse without type assertion (= accept any type) + # in borg < 1.3, this has been defined like this: # 20 MiB minus 41 bytes for a PUT header (because the "size" field in the Repository includes # the header, and the total size was set to precisely 20 MiB for borg < 1.3). diff --git a/src/borg/crypto/key.py b/src/borg/crypto/key.py index ef9f86efa..978007d1d 100644 --- a/src/borg/crypto/key.py +++ b/src/borg/crypto/key.py @@ -21,7 +21,7 @@ from ..helpers import bin_to_hex from ..helpers.passphrase import Passphrase, PasswordRetriesExceeded, PassphraseWrong from ..helpers import msgpack from ..helpers import workarounds -from ..item import Key, EncryptedKey, want_bytes +from ..item import Key, EncryptedKey from ..manifest import Manifest from ..platform import SaveFile from ..repoobj import RepoObj @@ -63,48 +63,6 @@ class UnsupportedKeyFormatError(Error): """Your borg key is stored in an unsupported format. Try using a newer version of borg.""" -class TAMRequiredError(IntegrityError): - __doc__ = textwrap.dedent( - """ - Manifest is unauthenticated, but it is required for this repository. Is somebody attacking you? - """ - ).strip() - traceback = False - - -class ArchiveTAMRequiredError(TAMRequiredError): - __doc__ = textwrap.dedent( - """ - Archive '{}' is unauthenticated, but it is required for this repository. - """ - ).strip() - traceback = False - - -class TAMInvalid(IntegrityError): - __doc__ = IntegrityError.__doc__ - traceback = False - - def __init__(self): - # Error message becomes: "Data integrity error: Manifest authentication did not verify" - super().__init__("Manifest authentication did not verify") - - -class ArchiveTAMInvalid(IntegrityError): - __doc__ = IntegrityError.__doc__ - traceback = False - - def __init__(self): - # Error message becomes: "Data integrity error: Archive authentication did not verify" - super().__init__("Archive authentication did not verify") - - -class TAMUnsupportedSuiteError(IntegrityError): - """Could not verify manifest: Unsupported suite {!r}; a newer version is needed.""" - - traceback = False - - def key_creator(repository, args, *, other_key=None): for key in AVAILABLE_KEY_TYPES: if key.ARG_NAME == args.encryption: @@ -224,25 +182,11 @@ class KeyBase: id_str = bin_to_hex(id) if id is not None else "(unknown)" raise IntegrityError(f"Chunk {id_str}: Invalid encryption envelope") - def _tam_key(self, salt, context): - return hkdf_hmac_sha512( - ikm=self.id_key + self.crypt_key, - salt=salt, - info=b"borg-metadata-authentication-" + context, - output_length=64, - ) - - def pack_and_authenticate_metadata(self, metadata_dict, context=b"manifest", salt=None): - if salt is None: - salt = os.urandom(64) + def pack_metadata(self, metadata_dict): metadata_dict = StableDict(metadata_dict) - tam = metadata_dict["tam"] = StableDict({"type": "HKDF_HMAC_SHA512", "hmac": bytes(64), "salt": salt}) - packed = msgpack.packb(metadata_dict) - tam_key = self._tam_key(salt, context) - tam["hmac"] = hmac.digest(tam_key, packed, "sha512") return msgpack.packb(metadata_dict) - def unpack_and_verify_manifest(self, data): + def unpack_manifest(self, data): """Unpack msgpacked *data* and return manifest.""" if data.startswith(b"\xc1" * 4): # This is a manifest from the future, we can't read it. @@ -251,60 +195,17 @@ class KeyBase: unpacker = get_limited_unpacker("manifest") unpacker.feed(data) unpacked = unpacker.unpack() - if AUTHENTICATED_NO_KEY: - return unpacked - if "tam" not in unpacked: - raise TAMRequiredError(self.repository._location.canonical_path()) - tam = unpacked.pop("tam", None) - if not isinstance(tam, dict): - raise TAMInvalid() - tam_type = tam.get("type", "") - if tam_type != "HKDF_HMAC_SHA512": - raise TAMUnsupportedSuiteError(repr(tam_type)) - tam_hmac = tam.get("hmac") - tam_salt = tam.get("salt") - if not isinstance(tam_salt, (bytes, str)) or not isinstance(tam_hmac, (bytes, str)): - raise TAMInvalid() - tam_hmac = want_bytes(tam_hmac) # legacy - tam_salt = want_bytes(tam_salt) # legacy - offset = data.index(tam_hmac) - data[offset : offset + 64] = bytes(64) - tam_key = self._tam_key(tam_salt, context=b"manifest") - calculated_hmac = hmac.digest(tam_key, data, "sha512") - if not hmac.compare_digest(calculated_hmac, tam_hmac): - raise TAMInvalid() - logger.debug("TAM-verified manifest") + unpacked.pop("tam", None) # legacy return unpacked - def unpack_and_verify_archive(self, data): - """Unpack msgpacked *data* and return (object, salt).""" + def unpack_archive(self, data): + """Unpack msgpacked *data* and return archive metadata dict.""" data = bytearray(data) unpacker = get_limited_unpacker("archive") unpacker.feed(data) unpacked = unpacker.unpack() - if "tam" not in unpacked: - archive_name = unpacked.get("name", "") - raise ArchiveTAMRequiredError(archive_name) - tam = unpacked.pop("tam", None) - if not isinstance(tam, dict): - raise ArchiveTAMInvalid() - tam_type = tam.get("type", "") - if tam_type != "HKDF_HMAC_SHA512": - raise TAMUnsupportedSuiteError(repr(tam_type)) - tam_hmac = tam.get("hmac") - tam_salt = tam.get("salt") - if not isinstance(tam_salt, (bytes, str)) or not isinstance(tam_hmac, (bytes, str)): - raise ArchiveTAMInvalid() - tam_hmac = want_bytes(tam_hmac) # legacy - tam_salt = want_bytes(tam_salt) # legacy - offset = data.index(tam_hmac) - data[offset : offset + 64] = bytes(64) - tam_key = self._tam_key(tam_salt, context=b"archive") - calculated_hmac = hmac.digest(tam_key, data, "sha512") - if not hmac.compare_digest(calculated_hmac, tam_hmac): - raise ArchiveTAMInvalid() - logger.debug("TAM-verified archive") - return unpacked, tam_salt + unpacked.pop("tam", None) # legacy + return unpacked class PlaintextKey(KeyBase): @@ -335,9 +236,6 @@ class PlaintextKey(KeyBase): self.assert_type(data[0], id) return memoryview(data)[1:] - def _tam_key(self, salt, context): - return salt + context - def random_blake2b_256_key(): # This might look a bit curious, but is the same construction used in the keyed mode of BLAKE2b. @@ -836,7 +734,6 @@ class AuthenticatedKeyBase(AESKeyBase, FlexiKey): self.enc_hmac_key = NOPE self.id_key = NOPE self.chunk_seed = 0 - self.tam_required = False return True return super()._load(key_data, passphrase) diff --git a/src/borg/fuse.py b/src/borg/fuse.py index 376020d90..92f145874 100644 --- a/src/borg/fuse.py +++ b/src/borg/fuse.py @@ -10,6 +10,7 @@ import time from collections import defaultdict from signal import SIGINT +from .constants import ROBJ_FILE_STREAM from .fuse_impl import llfuse, has_pyfuse3 @@ -688,7 +689,7 @@ class FuseOperations(llfuse.Operations, FuseBackend): # evict fully read chunk from cache del self.data_cache[id] else: - _, data = self.repo_objs.parse(id, self.repository_uncached.get(id)) + _, data = self.repo_objs.parse(id, self.repository_uncached.get(id), ro_type=ROBJ_FILE_STREAM) if offset + n < len(data): # chunk was only partially read, cache it self.data_cache[id] = data diff --git a/src/borg/helpers/misc.py b/src/borg/helpers/misc.py index d844b7634..1f687a0bb 100644 --- a/src/borg/helpers/misc.py +++ b/src/borg/helpers/misc.py @@ -13,6 +13,7 @@ logger = create_logger() from . import msgpack from .. import __version__ as borg_version +from ..constants import ROBJ_FILE_STREAM def sysinfo(): @@ -123,7 +124,7 @@ class ChunkIteratorFileWrapper: def open_item(archive, item): """Return file-like object for archived item (with chunks).""" - chunk_iterator = archive.pipeline.fetch_many([c.id for c in item.chunks]) + chunk_iterator = archive.pipeline.fetch_many([c.id for c in item.chunks], ro_type=ROBJ_FILE_STREAM) return ChunkIteratorFileWrapper(chunk_iterator) diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index b4a455a04..4b0caf6cd 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -723,12 +723,11 @@ class ArchiveFormatter(BaseFormatter): "id": "internal ID of the archive", "hostname": "hostname of host on which this archive was created", "username": "username of user who created this archive", - "tam": "TAM authentication state of this archive", "size": "size of this archive (data plus metadata, not considering compression and deduplication)", "nfiles": "count of files in this archive", } KEY_GROUPS = ( - ("archive", "name", "comment", "id", "tam"), + ("archive", "name", "comment", "id"), ("start", "time", "end", "command_line"), ("hostname", "username"), ("size", "nfiles"), @@ -933,7 +932,7 @@ class ItemFormatter(BaseFormatter): hash = self.xxh64() elif hash_function in self.hash_algorithms: hash = hashlib.new(hash_function) - for data in self.archive.pipeline.fetch_many([c.id for c in item.chunks]): + for data in self.archive.pipeline.fetch_many([c.id for c in item.chunks], ro_type=ROBJ_FILE_STREAM): hash.update(data) return hash.hexdigest() diff --git a/src/borg/manifest.py b/src/borg/manifest.py index a9609884e..aa7e6143e 100644 --- a/src/borg/manifest.py +++ b/src/borg/manifest.py @@ -250,8 +250,8 @@ class Manifest: if not key: key = key_factory(repository, cdata, ro_cls=ro_cls) manifest = cls(key, repository, ro_cls=ro_cls) - _, data = manifest.repo_objs.parse(cls.MANIFEST_ID, cdata) - manifest_dict = key.unpack_and_verify_manifest(data) + _, data = manifest.repo_objs.parse(cls.MANIFEST_ID, cdata, ro_type=ROBJ_MANIFEST) + manifest_dict = key.unpack_manifest(data) m = ManifestItem(internal_dict=manifest_dict) manifest.id = manifest.repo_objs.id_hash(data) if m.get("version") not in (1, 2): @@ -313,6 +313,6 @@ class Manifest: timestamp=self.timestamp, config=StableDict(self.config), ) - data = self.key.pack_and_authenticate_metadata(manifest.as_dict()) + data = self.key.pack_metadata(manifest.as_dict()) self.id = self.repo_objs.id_hash(data) - self.repository.put(self.MANIFEST_ID, self.repo_objs.format(self.MANIFEST_ID, {}, data)) + self.repository.put(self.MANIFEST_ID, self.repo_objs.format(self.MANIFEST_ID, {}, data, ro_type=ROBJ_MANIFEST)) diff --git a/src/borg/remote.py b/src/borg/remote.py index 40b8b62ff..4a1551f05 100644 --- a/src/borg/remote.py +++ b/src/borg/remote.py @@ -1204,7 +1204,7 @@ def cache_if_remote(repository, *, decrypted_cache=False, pack=None, unpack=None return csize, decrypted def transform(id_, data): - meta, decrypted = repo_objs.parse(id_, data) + meta, decrypted = repo_objs.parse(id_, data, ro_type=ROBJ_DONTCARE) csize = meta.get("csize", len(data)) return csize, decrypted diff --git a/src/borg/repoobj.py b/src/borg/repoobj.py index 557d4a604..3fb2534ad 100644 --- a/src/borg/repoobj.py +++ b/src/borg/repoobj.py @@ -1,6 +1,8 @@ from struct import Struct +from .constants import * # NOQA from .helpers import msgpack, workarounds +from .helpers.errors import IntegrityError from .compress import Compressor, LZ4_COMPRESSOR, get_compressor # workaround for lost passphrase or key in "authenticated" or "authenticated-blake2" mode @@ -35,7 +37,11 @@ class RepoObj: size: int = None, ctype: int = None, clevel: int = None, + ro_type: str = None, ) -> bytes: + assert isinstance(ro_type, str) + assert ro_type != ROBJ_DONTCARE + meta["type"] = ro_type assert isinstance(id, bytes) assert isinstance(meta, dict) assert isinstance(data, (bytes, memoryview)) @@ -58,11 +64,12 @@ class RepoObj: hdr = self.meta_len_hdr.pack(len(meta_encrypted)) return hdr + meta_encrypted + data_encrypted - def parse_meta(self, id: bytes, cdata: bytes) -> dict: + def parse_meta(self, id: bytes, cdata: bytes, ro_type: str) -> dict: # when calling parse_meta, enough cdata needs to be supplied to contain completely the # meta_len_hdr and the encrypted, packed metadata. it is allowed to provide more cdata. assert isinstance(id, bytes) assert isinstance(cdata, bytes) + assert isinstance(ro_type, str) obj = memoryview(cdata) offs = self.meta_len_hdr.size hdr = obj[:offs] @@ -71,10 +78,12 @@ class RepoObj: meta_encrypted = obj[offs : offs + len_meta_encrypted] meta_packed = self.key.decrypt(id, meta_encrypted) meta = msgpack.unpackb(meta_packed) + if ro_type != ROBJ_DONTCARE and meta["type"] != ro_type: + raise IntegrityError(f"ro_type expected: {ro_type} got: {meta['type']}") return meta def parse( - self, id: bytes, cdata: bytes, decompress: bool = True, want_compressed: bool = False + self, id: bytes, cdata: bytes, decompress: bool = True, want_compressed: bool = False, ro_type: str = None ) -> tuple[dict, bytes]: """ Parse a repo object into metadata and data (decrypt it, maybe decompress, maybe verify if the chunk plaintext @@ -86,6 +95,7 @@ class RepoObj: - decompress=False, want_compressed=True: quick, not verifying. returns compressed data (caller wants to reuse). - decompress=False, want_compressed=False: invalid """ + assert isinstance(ro_type, str) assert not (not decompress and not want_compressed), "invalid parameter combination!" assert isinstance(id, bytes) assert isinstance(cdata, bytes) @@ -98,6 +108,8 @@ class RepoObj: offs += len_meta_encrypted meta_packed = self.key.decrypt(id, meta_encrypted) meta_compressed = msgpack.unpackb(meta_packed) # means: before adding more metadata in decompress block + if ro_type != ROBJ_DONTCARE and meta_compressed["type"] != ro_type: + raise IntegrityError(f"ro_type expected: {ro_type} got: {meta_compressed['type']}") data_encrypted = obj[offs:] data_compressed = self.key.decrypt(id, data_encrypted) # does not include the type/level bytes if decompress: @@ -142,10 +154,12 @@ class RepoObj1: # legacy size: int = None, ctype: int = None, clevel: int = None, + ro_type: str = None, ) -> bytes: assert isinstance(id, bytes) assert meta == {} assert isinstance(data, (bytes, memoryview)) + assert ro_type is not None assert compress or size is not None and ctype is not None and clevel is not None if compress: assert size is None or size == len(data) @@ -160,11 +174,12 @@ class RepoObj1: # legacy raise NotImplementedError("parse_meta is not available for RepoObj1") def parse( - self, id: bytes, cdata: bytes, decompress: bool = True, want_compressed: bool = False + self, id: bytes, cdata: bytes, decompress: bool = True, want_compressed: bool = False, ro_type: str = None ) -> tuple[dict, bytes]: assert not (not decompress and not want_compressed), "invalid parameter combination!" assert isinstance(id, bytes) assert isinstance(cdata, bytes) + assert ro_type is not None data_compressed = self.key.decrypt(id, cdata) compressor_cls, compression_level = Compressor.detect(data_compressed[:2]) compressor = compressor_cls(level=compression_level, legacy_mode=True) diff --git a/src/borg/testsuite/archive.py b/src/borg/testsuite/archive.py index cefd6293b..361a82892 100644 --- a/src/borg/testsuite/archive.py +++ b/src/borg/testsuite/archive.py @@ -127,7 +127,8 @@ class MockCache: self.objects = {} self.repository = self.MockRepo() - def add_chunk(self, id, meta, data, stats=None, wait=True): + def add_chunk(self, id, meta, data, stats=None, wait=True, ro_type=None): + assert ro_type is not None self.objects[id] = data return id, len(data) diff --git a/src/borg/testsuite/archiver/check_cmd.py b/src/borg/testsuite/archiver/check_cmd.py index 5ddce3437..cf4c6baf2 100644 --- a/src/borg/testsuite/archiver/check_cmd.py +++ b/src/borg/testsuite/archiver/check_cmd.py @@ -1,3 +1,4 @@ +from datetime import datetime, timezone, timedelta import shutil from unittest.mock import patch @@ -5,7 +6,7 @@ import pytest from ...archive import ChunkBuffer from ...constants import * # NOQA -from ...helpers import bin_to_hex +from ...helpers import bin_to_hex, msgpack from ...manifest import Manifest from ...repository import Repository from . import cmd, src_file, create_src_archive, open_archive, generate_archiver_tests, RK_ENCRYPTION @@ -205,6 +206,40 @@ def test_corrupted_manifest(archivers, request): cmd(archiver, "check", exit_code=0) +def test_spoofed_manifest(archivers, request): + archiver = request.getfixturevalue(archivers) + check_cmd_setup(archiver) + archive, repository = open_archive(archiver.repository_path, "archive1") + with repository: + manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) + cdata = manifest.repo_objs.format( + Manifest.MANIFEST_ID, + {}, + msgpack.packb( + { + "version": 1, + "archives": {}, + "config": {}, + "timestamp": (datetime.now(tz=timezone.utc) + timedelta(days=1)).isoformat(timespec="microseconds"), + } + ), + # we assume that an attacker can put a file into backup src files that contains a fake manifest. + # but, the attacker can not influence the ro_type borg will use to store user file data: + ro_type=ROBJ_FILE_STREAM, # a real manifest is stored with ROBJ_MANIFEST + ) + # maybe a repo-side attacker could manage to move the fake manifest file chunk over to the manifest ID. + # we simulate this here by directly writing the fake manifest data to the manifest ID. + repository.put(Manifest.MANIFEST_ID, cdata) + repository.commit(compact=False) + # borg should notice that the manifest has the wrong ro_type. + cmd(archiver, "check", exit_code=1) + # borg check --repair should remove the corrupted manifest and rebuild a new one. + output = cmd(archiver, "check", "-v", "--repair", exit_code=0) + assert "archive1" in output + assert "archive2" in output + cmd(archiver, "check", exit_code=0) + + def test_manifest_rebuild_corrupted_chunk(archivers, request): archiver = request.getfixturevalue(archivers) check_cmd_setup(archiver) @@ -241,9 +276,9 @@ def test_manifest_rebuild_duplicate_archive(archivers, request): "time": "2016-12-15T18:49:51.849711", "version": 2, } - archive = repo_objs.key.pack_and_authenticate_metadata(archive_dict, context=b"archive") + archive = repo_objs.key.pack_metadata(archive_dict) archive_id = repo_objs.id_hash(archive) - repository.put(archive_id, repo_objs.format(archive_id, {}, archive)) + repository.put(archive_id, repo_objs.format(archive_id, {}, archive, ro_type=ROBJ_ARCHIVE_META)) repository.commit(compact=False) cmd(archiver, "check", exit_code=1) cmd(archiver, "check", "--repair", exit_code=0) @@ -253,6 +288,47 @@ def test_manifest_rebuild_duplicate_archive(archivers, request): assert "archive2" in output +def test_spoofed_archive(archivers, request): + archiver = request.getfixturevalue(archivers) + check_cmd_setup(archiver) + archive, repository = open_archive(archiver.repository_path, "archive1") + repo_objs = archive.repo_objs + with repository: + # attacker would corrupt or delete the manifest to trigger a rebuild of it: + manifest = repository.get(Manifest.MANIFEST_ID) + corrupted_manifest = manifest + b"corrupted!" + repository.put(Manifest.MANIFEST_ID, corrupted_manifest) + archive_dict = { + "command_line": "", + "item_ptrs": [], + "hostname": "foo", + "username": "bar", + "name": "archive_spoofed", + "time": "2016-12-15T18:49:51.849711", + "version": 2, + } + archive = repo_objs.key.pack_metadata(archive_dict) + archive_id = repo_objs.id_hash(archive) + repository.put( + archive_id, + repo_objs.format( + archive_id, + {}, + archive, + # we assume that an attacker can put a file into backup src files that contains a fake archive. + # but, the attacker can not influence the ro_type borg will use to store user file data: + ro_type=ROBJ_FILE_STREAM, # a real archive is stored with ROBJ_ARCHIVE_META + ), + ) + repository.commit(compact=False) + cmd(archiver, "check", exit_code=1) + cmd(archiver, "check", "--repair", "--debug", exit_code=0) + output = cmd(archiver, "rlist") + assert "archive1" in output + assert "archive2" in output + assert "archive_spoofed" not in output + + def test_extra_chunks(archivers, request): archiver = request.getfixturevalue(archivers) if archiver.get_kind() == "remote": diff --git a/src/borg/testsuite/archiver/checks.py b/src/borg/testsuite/archiver/checks.py index f6311c8e3..0fbf73439 100644 --- a/src/borg/testsuite/archiver/checks.py +++ b/src/borg/testsuite/archiver/checks.py @@ -1,22 +1,19 @@ import os import shutil -from datetime import datetime, timezone, timedelta from unittest.mock import patch import pytest from ...cache import Cache, LocalCache from ...constants import * # NOQA -from ...crypto.key import TAMRequiredError -from ...helpers import Location, get_security_dir, bin_to_hex, archive_ts_now +from ...helpers import Location, get_security_dir, bin_to_hex from ...helpers import EXIT_ERROR -from ...helpers import msgpack from ...manifest import Manifest, MandatoryFeatureUnsupported from ...remote import RemoteRepository, PathNotAllowed from ...repository import Repository from .. import llfuse from .. import changedir -from . import cmd, _extract_repository_id, open_repository, check_cache, create_test_files, create_src_archive +from . import cmd, _extract_repository_id, open_repository, check_cache, create_test_files from . import _set_repository_id, create_regular_file, assert_creates_file, generate_archiver_tests, RK_ENCRYPTION pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote") # NOQA @@ -322,117 +319,6 @@ def test_check_cache(archivers, request): check_cache(archiver) -# Begin manifest TAM tests -def spoof_manifest(repository): - with repository: - manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) - cdata = manifest.repo_objs.format( - Manifest.MANIFEST_ID, - {}, - msgpack.packb( - { - "version": 1, - "archives": {}, - "config": {}, - "timestamp": (datetime.now(tz=timezone.utc) + timedelta(days=1)).isoformat(timespec="microseconds"), - } - ), - ) - repository.put(Manifest.MANIFEST_ID, cdata) - repository.commit(compact=False) - - -def test_fresh_init_tam_required(archiver): - cmd(archiver, "rcreate", RK_ENCRYPTION) - repository = Repository(archiver.repository_path, exclusive=True) - with repository: - manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) - cdata = manifest.repo_objs.format( - Manifest.MANIFEST_ID, - {}, - msgpack.packb( - { - "version": 1, - "archives": {}, - "timestamp": (datetime.now(tz=timezone.utc) + timedelta(days=1)).isoformat(timespec="microseconds"), - } - ), - ) - repository.put(Manifest.MANIFEST_ID, cdata) - repository.commit(compact=False) - - with pytest.raises(TAMRequiredError): - cmd(archiver, "rlist") - - -def test_not_required(archiver): - cmd(archiver, "rcreate", RK_ENCRYPTION) - create_src_archive(archiver, "archive1234") - repository = Repository(archiver.repository_path, exclusive=True) - # Manifest must be authenticated now - output = cmd(archiver, "rlist", "--debug") - assert "archive1234" in output - assert "TAM-verified manifest" in output - # Try to spoof / modify pre-1.0.9 - spoof_manifest(repository) - # Fails - with pytest.raises(TAMRequiredError): - cmd(archiver, "rlist") - - -# Begin archive TAM tests -def write_archive_without_tam(repository, archive_name): - manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) - archive_data = msgpack.packb( - { - "version": 2, - "name": archive_name, - "item_ptrs": [], - "command_line": "", - "hostname": "", - "username": "", - "time": archive_ts_now().isoformat(timespec="microseconds"), - "size": 0, - "nfiles": 0, - } - ) - archive_id = manifest.repo_objs.id_hash(archive_data) - cdata = manifest.repo_objs.format(archive_id, {}, archive_data) - repository.put(archive_id, cdata) - manifest.archives[archive_name] = (archive_id, datetime.now()) - manifest.write() - repository.commit(compact=False) - - -def test_check_rebuild_manifest(archiver): - cmd(archiver, "rcreate", RK_ENCRYPTION) - create_src_archive(archiver, "archive_tam") - repository = Repository(archiver.repository_path, exclusive=True) - with repository: - write_archive_without_tam(repository, "archive_no_tam") - repository.delete(Manifest.MANIFEST_ID) # kill manifest, so check has to rebuild it - repository.commit(compact=False) - cmd(archiver, "check", "--repair") - output = cmd(archiver, "rlist", "--format='{name}{NL}'") - assert "archive_tam" in output # TAM-verified archive is in rebuilt manifest - assert "archive_no_tam" not in output # check got rid of untrusted not TAM-verified archive - - -def test_check_rebuild_refcounts(archiver): - cmd(archiver, "rcreate", RK_ENCRYPTION) - create_src_archive(archiver, "archive_tam") - archive_id_pre_check = cmd(archiver, "rlist", "--format='{name} {id}{NL}'") - repository = Repository(archiver.repository_path, exclusive=True) - with repository: - write_archive_without_tam(repository, "archive_no_tam") - cmd(archiver, "check", "--repair") - output = cmd(archiver, "rlist", "--format='{name}{NL}'") - assert "archive_tam" in output # TAM-verified archive still there - assert "archive_no_tam" not in output # check got rid of untrusted not TAM-verified archive - archive_id_post_check = cmd(archiver, "rlist", "--format='{name} {id}{NL}'") - assert archive_id_post_check == archive_id_pre_check # rebuild_refcounts didn't change archive_tam archive id - - # Begin Remote Tests def test_remote_repo_restrict_to_path(remote_archiver): original_location, repo_path = remote_archiver.repository_location, remote_archiver.repository_path diff --git a/src/borg/testsuite/archiver/rcompress_cmd.py b/src/borg/testsuite/archiver/rcompress_cmd.py index 3fa8a34bf..dbae44837 100644 --- a/src/borg/testsuite/archiver/rcompress_cmd.py +++ b/src/borg/testsuite/archiver/rcompress_cmd.py @@ -22,7 +22,9 @@ def test_rcompress(archiver): break for id in ids: chunk = repository.get(id, read_data=True) - meta, data = manifest.repo_objs.parse(id, chunk) # will also decompress according to metadata + meta, data = manifest.repo_objs.parse( + id, chunk, ro_type=ROBJ_DONTCARE + ) # will also decompress according to metadata m_olevel = meta.get("olevel", -1) m_psize = meta.get("psize", -1) print( diff --git a/src/borg/testsuite/key.py b/src/borg/testsuite/key.py index 749c792e4..7f4f8dce6 100644 --- a/src/borg/testsuite/key.py +++ b/src/borg/testsuite/key.py @@ -11,13 +11,11 @@ from ..crypto.key import AEADKeyBase from ..crypto.key import AESOCBRepoKey, AESOCBKeyfileKey, CHPORepoKey, CHPOKeyfileKey from ..crypto.key import Blake2AESOCBRepoKey, Blake2AESOCBKeyfileKey, Blake2CHPORepoKey, Blake2CHPOKeyfileKey from ..crypto.key import ID_HMAC_SHA_256, ID_BLAKE2b_256 -from ..crypto.key import TAMRequiredError, TAMInvalid, TAMUnsupportedSuiteError, ArchiveTAMInvalid from ..crypto.key import UnsupportedManifestError, UnsupportedKeyFormatError from ..crypto.key import identify_key from ..crypto.low_level import IntegrityError as IntegrityErrorBase from ..helpers import IntegrityError from ..helpers import Location -from ..helpers import StableDict from ..helpers import msgpack from ..constants import KEY_ALGORITHMS @@ -266,125 +264,25 @@ class TestTAM: def test_unpack_future(self, key): blob = b"\xc1\xc1\xc1\xc1foobar" with pytest.raises(UnsupportedManifestError): - key.unpack_and_verify_manifest(blob) + key.unpack_manifest(blob) blob = b"\xc1\xc1\xc1" with pytest.raises(msgpack.UnpackException): - key.unpack_and_verify_manifest(blob) - - def test_missing(self, key): - blob = msgpack.packb({}) - with pytest.raises(TAMRequiredError): - key.unpack_and_verify_manifest(blob) - with pytest.raises(TAMRequiredError): - key.unpack_and_verify_archive(blob) - - def test_unknown_type(self, key): - blob = msgpack.packb({"tam": {"type": "HMAC_VOLLBIT"}}) - with pytest.raises(TAMUnsupportedSuiteError): - key.unpack_and_verify_manifest(blob) - with pytest.raises(TAMUnsupportedSuiteError): - key.unpack_and_verify_archive(blob) - - @pytest.mark.parametrize( - "tam, exc", - ( - ({}, TAMUnsupportedSuiteError), - ({"type": b"\xff"}, TAMUnsupportedSuiteError), - (None, TAMInvalid), - (1234, TAMInvalid), - ), - ) - def test_invalid_manifest(self, key, tam, exc): - blob = msgpack.packb({"tam": tam}) - with pytest.raises(exc): - key.unpack_and_verify_manifest(blob) - - @pytest.mark.parametrize( - "tam, exc", - ( - ({}, TAMUnsupportedSuiteError), - ({"type": b"\xff"}, TAMUnsupportedSuiteError), - (None, ArchiveTAMInvalid), - (1234, ArchiveTAMInvalid), - ), - ) - def test_invalid_archive(self, key, tam, exc): - blob = msgpack.packb({"tam": tam}) - with pytest.raises(exc): - key.unpack_and_verify_archive(blob) - - @pytest.mark.parametrize( - "hmac, salt", - (({}, bytes(64)), (bytes(64), {}), (None, bytes(64)), (bytes(64), None)), - ids=["ed-b64", "b64-ed", "n-b64", "b64-n"], - ) - def test_wrong_types(self, key, hmac, salt): - data = {"tam": {"type": "HKDF_HMAC_SHA512", "hmac": hmac, "salt": salt}} - tam = data["tam"] - if hmac is None: - del tam["hmac"] - if salt is None: - del tam["salt"] - blob = msgpack.packb(data) - with pytest.raises(TAMInvalid): - key.unpack_and_verify_manifest(blob) - with pytest.raises(ArchiveTAMInvalid): - key.unpack_and_verify_archive(blob) + key.unpack_manifest(blob) def test_round_trip_manifest(self, key): data = {"foo": "bar"} - blob = key.pack_and_authenticate_metadata(data, context=b"manifest") - assert blob.startswith(b"\x82") - - unpacked = msgpack.unpackb(blob) - assert unpacked["tam"]["type"] == "HKDF_HMAC_SHA512" - - unpacked = key.unpack_and_verify_manifest(blob) + blob = key.pack_metadata(data) + unpacked = key.unpack_manifest(blob) assert unpacked["foo"] == "bar" - assert "tam" not in unpacked + assert "tam" not in unpacked # legacy def test_round_trip_archive(self, key): data = {"foo": "bar"} - blob = key.pack_and_authenticate_metadata(data, context=b"archive") - assert blob.startswith(b"\x82") - - unpacked = msgpack.unpackb(blob) - assert unpacked["tam"]["type"] == "HKDF_HMAC_SHA512" - - unpacked, _ = key.unpack_and_verify_archive(blob) + blob = key.pack_metadata(data) + unpacked = key.unpack_archive(blob) assert unpacked["foo"] == "bar" - assert "tam" not in unpacked - - @pytest.mark.parametrize("which", ("hmac", "salt")) - def test_tampered_manifest(self, key, which): - data = {"foo": "bar"} - blob = key.pack_and_authenticate_metadata(data, context=b"manifest") - assert blob.startswith(b"\x82") - - unpacked = msgpack.unpackb(blob, object_hook=StableDict) - assert len(unpacked["tam"][which]) == 64 - unpacked["tam"][which] = unpacked["tam"][which][0:32] + bytes(32) - assert len(unpacked["tam"][which]) == 64 - blob = msgpack.packb(unpacked) - - with pytest.raises(TAMInvalid): - key.unpack_and_verify_manifest(blob) - - @pytest.mark.parametrize("which", ("hmac", "salt")) - def test_tampered_archive(self, key, which): - data = {"foo": "bar"} - blob = key.pack_and_authenticate_metadata(data, context=b"archive") - assert blob.startswith(b"\x82") - - unpacked = msgpack.unpackb(blob, object_hook=StableDict) - assert len(unpacked["tam"][which]) == 64 - unpacked["tam"][which] = unpacked["tam"][which][0:32] + bytes(32) - assert len(unpacked["tam"][which]) == 64 - blob = msgpack.packb(unpacked) - - with pytest.raises(ArchiveTAMInvalid): - key.unpack_and_verify_archive(blob) + assert "tam" not in unpacked # legacy def test_decrypt_key_file_unsupported_algorithm(): diff --git a/src/borg/testsuite/remote.py b/src/borg/testsuite/remote.py index b5cb2e642..a2d84bcee 100644 --- a/src/borg/testsuite/remote.py +++ b/src/borg/testsuite/remote.py @@ -6,6 +6,7 @@ from unittest.mock import patch import pytest +from ..constants import ROBJ_FILE_STREAM from ..remote import SleepingBandwidthLimiter, RepositoryCache, cache_if_remote from ..repository import Repository from ..crypto.key import PlaintextKey @@ -205,7 +206,7 @@ class TestRepositoryCache: def _put_encrypted_object(self, repo_objs, repository, data): id_ = repo_objs.id_hash(data) - repository.put(id_, repo_objs.format(id_, {}, data)) + repository.put(id_, repo_objs.format(id_, {}, data, ro_type=ROBJ_FILE_STREAM)) return id_ @pytest.fixture diff --git a/src/borg/testsuite/repoobj.py b/src/borg/testsuite/repoobj.py index 5d11ad931..44c364d81 100644 --- a/src/borg/testsuite/repoobj.py +++ b/src/borg/testsuite/repoobj.py @@ -1,6 +1,8 @@ import pytest +from ..constants import ROBJ_FILE_STREAM, ROBJ_MANIFEST, ROBJ_ARCHIVE_META from ..crypto.key import PlaintextKey +from ..helpers.errors import IntegrityError from ..repository import Repository from ..repoobj import RepoObj, RepoObj1 from ..compress import LZ4 @@ -21,14 +23,14 @@ def test_format_parse_roundtrip(key): data = b"foobar" * 10 id = repo_objs.id_hash(data) meta = {"custom": "something"} # size and csize are computed automatically - cdata = repo_objs.format(id, meta, data) + cdata = repo_objs.format(id, meta, data, ro_type=ROBJ_FILE_STREAM) - got_meta = repo_objs.parse_meta(id, cdata) + got_meta = repo_objs.parse_meta(id, cdata, ro_type=ROBJ_FILE_STREAM) assert got_meta["size"] == len(data) assert got_meta["csize"] < len(data) assert got_meta["custom"] == "something" - got_meta, got_data = repo_objs.parse(id, cdata) + got_meta, got_data = repo_objs.parse(id, cdata, ro_type=ROBJ_FILE_STREAM) assert got_meta["size"] == len(data) assert got_meta["csize"] < len(data) assert got_meta["custom"] == "something" @@ -44,11 +46,11 @@ def test_format_parse_roundtrip_borg1(key): # legacy data = b"foobar" * 10 id = repo_objs.id_hash(data) meta = {} # borg1 does not support this kind of metadata - cdata = repo_objs.format(id, meta, data) + cdata = repo_objs.format(id, meta, data, ro_type=ROBJ_FILE_STREAM) # borg1 does not support separate metadata and borg2 does not invoke parse_meta for borg1 repos - got_meta, got_data = repo_objs.parse(id, cdata) + got_meta, got_data = repo_objs.parse(id, cdata, ro_type=ROBJ_FILE_STREAM) assert got_meta["size"] == len(data) assert got_meta["csize"] < len(data) assert data == got_data @@ -67,9 +69,9 @@ def test_borg1_borg2_transition(key): len_data = len(data) repo_objs1 = RepoObj1(key) id = repo_objs1.id_hash(data) - borg1_cdata = repo_objs1.format(id, meta, data) + borg1_cdata = repo_objs1.format(id, meta, data, ro_type=ROBJ_FILE_STREAM) meta1, compr_data1 = repo_objs1.parse( - id, borg1_cdata, decompress=True, want_compressed=True + id, borg1_cdata, decompress=True, want_compressed=True, ro_type=ROBJ_FILE_STREAM ) # avoid re-compression # in borg 1, we can only get this metadata after decrypting the whole chunk (and we do not have "size" here): assert meta1["ctype"] == LZ4.ID # default compression @@ -80,18 +82,49 @@ def test_borg1_borg2_transition(key): # note: as we did not decompress, we do not have "size" and we need to get it from somewhere else. # here, we just use len_data. for borg transfer, we also know the size from another metadata source. borg2_cdata = repo_objs2.format( - id, dict(meta1), compr_data1[2:], compress=False, size=len_data, ctype=meta1["ctype"], clevel=meta1["clevel"] + id, + dict(meta1), + compr_data1[2:], + compress=False, + size=len_data, + ctype=meta1["ctype"], + clevel=meta1["clevel"], + ro_type=ROBJ_FILE_STREAM, ) - meta2, data2 = repo_objs2.parse(id, borg2_cdata) + meta2, data2 = repo_objs2.parse(id, borg2_cdata, ro_type=ROBJ_FILE_STREAM) assert data2 == data assert meta2["ctype"] == LZ4.ID assert meta2["clevel"] == 0xFF assert meta2["csize"] == meta1["csize"] - 2 # borg2 does not store the type/level bytes there assert meta2["size"] == len_data - meta2 = repo_objs2.parse_meta(id, borg2_cdata) + meta2 = repo_objs2.parse_meta(id, borg2_cdata, ro_type=ROBJ_FILE_STREAM) # now, in borg 2, we have nice and separately decrypted metadata (no need to decrypt the whole chunk): assert meta2["ctype"] == LZ4.ID assert meta2["clevel"] == 0xFF assert meta2["csize"] == meta1["csize"] - 2 # borg2 does not store the type/level bytes there assert meta2["size"] == len_data + + +def test_spoof_manifest(key): + repo_objs = RepoObj(key) + data = b"fake or malicious manifest data" # file content could be provided by attacker. + id = repo_objs.id_hash(data) + # create a repo object containing user data (file content data). + cdata = repo_objs.format(id, {}, data, ro_type=ROBJ_FILE_STREAM) + # let's assume an attacker somehow managed to replace the manifest with that repo object. + # as borg always give the ro_type it wants to read, this should fail: + with pytest.raises(IntegrityError): + repo_objs.parse(id, cdata, ro_type=ROBJ_MANIFEST) + + +def test_spoof_archive(key): + repo_objs = RepoObj(key) + data = b"fake or malicious archive data" # file content could be provided by attacker. + id = repo_objs.id_hash(data) + # create a repo object containing user data (file content data). + cdata = repo_objs.format(id, {}, data, ro_type=ROBJ_FILE_STREAM) + # let's assume an attacker somehow managed to replace an archive with that repo object. + # as borg always give the ro_type it wants to read, this should fail: + with pytest.raises(IntegrityError): + repo_objs.parse(id, cdata, ro_type=ROBJ_ARCHIVE_META)