From 866417853d26a5b04b2b800cb66bb4541205cedd Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Jun 2016 15:07:49 +0200 Subject: [PATCH 1/3] rename valid_msgpacked_item to valid_msgpacked_dict the code is generic, it can also be used for other msgpacked dictionaries. --- borg/archive.py | 10 +++++----- borg/testsuite/archive.py | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 370134671..b677f5f23 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -595,8 +595,8 @@ ITEM_KEYS = frozenset([b'path', b'source', b'rdev', b'chunks', REQUIRED_ITEM_KEYS = frozenset([b'path', b'mtime', ]) -def valid_msgpacked_item(d, item_keys_serialized): - """check if the data looks like a msgpacked item metadata dict""" +def valid_msgpacked_dict(d, keys_serialized): + """check if the data looks like a msgpacked dict""" d_len = len(d) if d_len == 0: return False @@ -606,7 +606,7 @@ def valid_msgpacked_item(d, item_keys_serialized): offs = 3 else: # object is not a map (dict) - # note: we must not have item dicts with > 2^16-1 elements + # note: we must not have dicts with > 2^16-1 elements return False if d_len <= offs: return False @@ -620,7 +620,7 @@ def valid_msgpacked_item(d, item_keys_serialized): return False # is the bytestring any of the expected key names? key_serialized = d[offs:] - return any(key_serialized.startswith(pattern) for pattern in item_keys_serialized) + return any(key_serialized.startswith(pattern) for pattern in keys_serialized) class RobustUnpacker: @@ -654,7 +654,7 @@ class RobustUnpacker: if not data: raise StopIteration # Abort early if the data does not look like a serialized item dict - if not valid_msgpacked_item(data, self.item_keys): + if not valid_msgpacked_dict(data, self.item_keys): data = data[1:] continue self._unpacker = msgpack.Unpacker(object_hook=StableDict) diff --git a/borg/testsuite/archive.py b/borg/testsuite/archive.py index 66676f334..919d57a0c 100644 --- a/borg/testsuite/archive.py +++ b/borg/testsuite/archive.py @@ -4,7 +4,7 @@ from unittest.mock import Mock import msgpack import pytest -from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_item, ITEM_KEYS +from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS from ..key import PlaintextKey from ..helpers import Manifest from . import BaseTestCase @@ -127,7 +127,7 @@ def item_keys_serialized(): [42, 23.42, True, b'foobar', {b'foo': b'bar'}, [b'foo', b'bar'], (b'foo', b'bar')] )]) def test_invalid_msgpacked_item(packed, item_keys_serialized): - assert not valid_msgpacked_item(packed, item_keys_serialized) + assert not valid_msgpacked_dict(packed, item_keys_serialized) @pytest.mark.parametrize('packed', @@ -137,11 +137,11 @@ def test_invalid_msgpacked_item(packed, item_keys_serialized): dict((k, b'x' * 1000) for k in ITEM_KEYS), # as big (key count and volume) as it gets ]]) def test_valid_msgpacked_items(packed, item_keys_serialized): - assert valid_msgpacked_item(packed, item_keys_serialized) + assert valid_msgpacked_dict(packed, item_keys_serialized) def test_key_length_msgpacked_items(): key = b'x' * 32 # 31 bytes is the limit for fixstr msgpack type data = {key: b''} item_keys_serialized = [msgpack.packb(key), ] - assert valid_msgpacked_item(msgpack.packb(data), item_keys_serialized) + assert valid_msgpacked_dict(msgpack.packb(data), item_keys_serialized) From 03f6282eabbd796f2692ecd5b87812b53c39249c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Jun 2016 15:29:59 +0200 Subject: [PATCH 2/3] make rebuild_manifest more future-proof --- borg/archive.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index b677f5f23..3dedbe8ce 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -594,6 +594,9 @@ ITEM_KEYS = frozenset([b'path', b'source', b'rdev', b'chunks', # this is the set of keys that are always present in items: REQUIRED_ITEM_KEYS = frozenset([b'path', b'mtime', ]) +# this set must be kept complete, otherwise rebuild_manifest might malfunction: +ARCHIVE_KEYS = set([b'version', b'name', b'items', b'cmdline', b'hostname', b'username', b'time', b'time_end']) + def valid_msgpacked_dict(d, keys_serialized): """check if the data looks like a msgpacked dict""" @@ -738,11 +741,11 @@ class ArchiveChecker: # lost manifest on a older borg version than the most recent one that was ever used # within this repository (assuming that newer borg versions support more item keys). manifest = Manifest(self.key, self.repository) + archive_keys_serialized = [msgpack.packb(name) for name in ARCHIVE_KEYS] for chunk_id, _ in self.chunks.iteritems(): cdata = self.repository.get(chunk_id) data = self.key.decrypt(chunk_id, cdata) - # Some basic sanity checks of the payload before feeding it into msgpack - if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0): + if not valid_msgpacked_dict(data, archive_keys_serialized): continue if b'cmdline' not in data or b'\xa7version\x01' not in data: continue From 69c3b5e196a2d7afec36fde4fea8166a4ff9b21d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Jun 2016 16:06:16 +0200 Subject: [PATCH 3/3] rebuild_manifest: refactor archive metadata dict validation this was already done in a similar way for item metadata dict validation. also: check for some more required keys - the old code would crash if 'name' or 'time' key were missing. --- borg/archive.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 3dedbe8ce..1894ad7a7 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -595,7 +595,10 @@ ITEM_KEYS = frozenset([b'path', b'source', b'rdev', b'chunks', REQUIRED_ITEM_KEYS = frozenset([b'path', b'mtime', ]) # this set must be kept complete, otherwise rebuild_manifest might malfunction: -ARCHIVE_KEYS = set([b'version', b'name', b'items', b'cmdline', b'hostname', b'username', b'time', b'time_end']) +ARCHIVE_KEYS = frozenset([b'version', b'name', b'items', b'cmdline', b'hostname', b'username', b'time', b'time_end', ]) + +# this is the set of keys that are always present in archives: +REQUIRED_ARCHIVE_KEYS = frozenset([b'version', b'name', b'items', b'cmdline', b'time', ]) def valid_msgpacked_dict(d, keys_serialized): @@ -734,6 +737,12 @@ class ArchiveChecker: Iterates through all objects in the repository looking for archive metadata blocks. """ + def valid_archive(obj): + if not isinstance(obj, dict): + return False + keys = set(obj) + return REQUIRED_ARCHIVE_KEYS.issubset(keys) + logger.info('Rebuilding missing manifest, this might take some time...') # as we have lost the manifest, we do not know any more what valid item keys we had. # collecting any key we encounter in a damaged repo seems unwise, thus we just use @@ -755,7 +764,7 @@ class ArchiveChecker: # msgpack with invalid data except (TypeError, ValueError, StopIteration): continue - if isinstance(archive, dict) and b'items' in archive and b'cmdline' in archive: + if valid_archive(archive): logger.info('Found archive %s', archive[b'name'].decode('utf-8')) manifest.archives[archive[b'name'].decode('utf-8')] = {b'id': chunk_id, b'time': archive[b'time']} logger.info('Manifest rebuild complete.')