diff --git a/borg/archive.py b/borg/archive.py index e21c1aab5..5a5d6125c 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -592,6 +592,34 @@ ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ]) +def valid_msgpacked_item(d, item_keys_serialized): + """check if the data looks like a msgpacked item metadata dict""" + d_len = len(d) + if d_len == 0: + return False + if d[0] & 0xf0 == 0x80: # object is a fixmap (up to 15 elements) + offs = 1 + elif d[0] == 0xde: # object is a map16 (up to 2^16-1 elements) + offs = 3 + else: + # object is not a map (dict) + # note: we must not have item dicts with > 2^16-1 elements + return False + if d_len <= offs: + return False + # is the first dict key a bytestring? + if d[offs] & 0xe0 == 0xa0: # key is a small bytestring (up to 31 chars) + pass + elif d[offs] in (0xd9, 0xda, 0xdb): # key is a str8, str16 or str32 + pass + else: + # key is not a bytestring + return False + # is the bytestring any of the expected key names? + key_serialized = d[offs:] + return any(key_serialized.startswith(pattern) for pattern in item_keys_serialized) + + class RobustUnpacker: """A restartable/robust version of the streaming msgpack unpacker """ @@ -622,18 +650,10 @@ class RobustUnpacker: while self._resync: if not data: raise StopIteration - # Abort early if the data does not look like a serialized dict - if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0): + # Abort early if the data does not look like a serialized item dict + if not valid_msgpacked_item(data, self.item_keys): data = data[1:] continue - # Make sure it looks like an item dict - for pattern in self.item_keys: - if data[1:].startswith(pattern): - break - else: - data = data[1:] - continue - self._unpacker = msgpack.Unpacker(object_hook=StableDict) self._unpacker.feed(data) try: diff --git a/borg/testsuite/archive.py b/borg/testsuite/archive.py index 662d776c9..78ea25465 100644 --- a/borg/testsuite/archive.py +++ b/borg/testsuite/archive.py @@ -2,8 +2,9 @@ from datetime import datetime, timezone from unittest.mock import Mock import msgpack +import pytest -from ..archive import Archive, CacheChunkBuffer, RobustUnpacker +from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_item, ITEM_KEYS from ..key import PlaintextKey from ..helpers import Manifest from . import BaseTestCase @@ -112,3 +113,35 @@ class RobustUnpackerTestCase(BaseTestCase): input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])] result = self.process(input) self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}]) + + +@pytest.fixture +def item_keys_serialized(): + return [msgpack.packb(name) for name in ITEM_KEYS] + + +@pytest.mark.parametrize('packed', + [b'', b'x', b'foobar', ] + + [msgpack.packb(o) for o in ( + [None, 0, 0.0, False, '', {}, [], ()] + + [42, 23.42, True, b'foobar', {b'foo': b'bar'}, [b'foo', b'bar'], (b'foo', b'bar')] + )]) +def test_invalid_msgpacked_item(packed, item_keys_serialized): + assert not valid_msgpacked_item(packed, item_keys_serialized) + + +@pytest.mark.parametrize('packed', + [msgpack.packb(o) for o in [ + {b'path': b'/a/b/c'}, # small (different msgpack mapping type!) + dict((k, b'') for k in ITEM_KEYS), # as big (key count) as it gets + dict((k, b'x' * 1000) for k in ITEM_KEYS), # as big (key count and volume) as it gets + ]]) +def test_valid_msgpacked_items(packed, item_keys_serialized): + assert valid_msgpacked_item(packed, item_keys_serialized) + + +def test_key_length_msgpacked_items(): + key = b'x' * 32 # 31 bytes is the limit for fixstr msgpack type + data = {key: b''} + item_keys_serialized = [msgpack.packb(key), ] + assert valid_msgpacked_item(msgpack.packb(data), item_keys_serialized)