diff --git a/src/borg/constants.py b/src/borg/constants.py index 9813a8afe..bcdaa07da 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -31,6 +31,22 @@ DEFAULT_MAX_SEGMENT_SIZE = 500 * 1024 * 1024 # the header, and the total size was set to 20 MiB). MAX_DATA_SIZE = 20971479 +# MAX_OBJECT_SIZE = <20 MiB (MAX_DATA_SIZE) + 41 bytes for a Repository PUT header, which consists of +# a 1 byte tag ID, 4 byte CRC, 4 byte size and 32 bytes for the ID. +MAX_OBJECT_SIZE = MAX_DATA_SIZE + 41 # see LoggedIO.put_header_fmt.size assertion in repository module +assert MAX_OBJECT_SIZE == 20971520 == 20 * 1024 * 1024 + +# borg.remote read() buffer size +BUFSIZE = 10 * 1024 * 1024 + +# to use a safe, limited unpacker, we need to set a upper limit to the archive count in the manifest. +# this does not mean that you can always really reach that number, because it also needs to be less than +# MAX_DATA_SIZE or it will trigger the check for that. +MAX_ARCHIVES = 400000 + +# repo.list() / .scan() result count limit the borg client uses +LIST_SCAN_LIMIT = 100000 + DEFAULT_SEGMENTS_PER_DIR = 1000 CHUNK_MIN_EXP = 19 # 2**19 == 512kiB diff --git a/src/borg/crypto/key.py b/src/borg/crypto/key.py index aba1f35be..256a3c405 100644 --- a/src/borg/crypto/key.py +++ b/src/borg/crypto/key.py @@ -21,6 +21,7 @@ from ..helpers import StableDict from ..helpers import Error, IntegrityError from ..helpers import yes from ..helpers import get_keys_dir, get_security_dir +from ..helpers import get_limited_unpacker from ..helpers import bin_to_hex from ..item import Key, EncryptedKey from ..platform import SaveFile @@ -216,9 +217,9 @@ class KeyBase: logger.warning('Manifest authentication DISABLED.') tam_required = False data = bytearray(data) - # Since we don't trust these bytes we use the slower Python unpacker, - # which is assumed to have a lower probability of security issues. - unpacked = msgpack.fallback.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape') + unpacker = get_limited_unpacker('manifest') + unpacker.feed(data) + unpacked = unpacker.unpack() if b'tam' not in unpacked: if tam_required: raise TAMRequiredError(self.repository._location.canonical_path()) diff --git a/src/borg/helpers.py b/src/borg/helpers.py index 92c573b2d..1554eefee 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -145,6 +145,35 @@ def check_extension_modules(): raise ExtensionModuleError +def get_limited_unpacker(kind): + """return a limited Unpacker because we should not trust msgpack data received from remote""" + args = dict(use_list=False, # return tuples, not lists + max_bin_len=0, # not used + max_ext_len=0, # not used + max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE), + max_str_len=MAX_OBJECT_SIZE, # a chunk or other repo object + ) + if kind == 'server': + args.update(dict(max_array_len=100, # misc. cmd tuples + max_map_len=100, # misc. cmd dicts + )) + elif kind == 'client': + args.update(dict(max_array_len=LIST_SCAN_LIMIT, # result list from repo.list() / .scan() + max_map_len=100, # misc. result dicts + )) + elif kind == 'manifest': + args.update(dict(use_list=True, # default value + max_array_len=100, # ITEM_KEYS ~= 22 + max_map_len=MAX_ARCHIVES, # list of archives + max_str_len=255, # archive name + object_hook=StableDict, + unicode_errors='surrogateescape', + )) + else: + raise ValueError('kind must be "server", "client" or "manifest"') + return msgpack.Unpacker(**args) + + ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts') @@ -351,6 +380,10 @@ class Manifest: prev_ts = self.last_timestamp incremented = (prev_ts + timedelta(microseconds=1)).isoformat() self.timestamp = max(incremented, datetime.utcnow().isoformat()) + # include checks for limits as enforced by limited unpacker (used by load()) + assert len(self.archives) <= MAX_ARCHIVES + assert all(len(name) <= 255 for name in self.archives) + assert len(self.item_keys) <= 100 manifest = ManifestItem( version=1, archives=StableDict(self.archives.get_raw_dict()), diff --git a/src/borg/remote.py b/src/borg/remote.py index 670179a44..d131a8266 100644 --- a/src/borg/remote.py +++ b/src/borg/remote.py @@ -20,16 +20,18 @@ import msgpack from . import __version__ from .compress import LZ4 +from .constants import * # NOQA from .helpers import Error, IntegrityError from .helpers import bin_to_hex from .helpers import get_home_dir +from .helpers import get_limited_unpacker from .helpers import hostname_is_unique from .helpers import replace_placeholders from .helpers import sysinfo from .helpers import format_file_size from .helpers import truncate_and_unlink from .logger import create_logger, setup_logging -from .repository import Repository, MAX_OBJECT_SIZE, LIST_SCAN_LIMIT +from .repository import Repository from .version import parse_version, format_version from .algorithms.checksums import xxh64 @@ -39,8 +41,6 @@ RPC_PROTOCOL_VERSION = 2 BORG_VERSION = parse_version(__version__) MSGID, MSG, ARGS, RESULT = b'i', b'm', b'a', b'r' -BUFSIZE = 10 * 1024 * 1024 - MAX_INFLIGHT = 100 RATELIMIT_PERIOD = 0.1 @@ -65,27 +65,6 @@ def os_write(fd, data): return amount -def get_limited_unpacker(kind): - """return a limited Unpacker because we should not trust msgpack data received from remote""" - args = dict(use_list=False, # return tuples, not lists - max_bin_len=0, # not used - max_ext_len=0, # not used - max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE), - max_str_len=MAX_OBJECT_SIZE, # a chunk or other repo object - ) - if kind == 'server': - args.update(dict(max_array_len=100, # misc. cmd tuples - max_map_len=100, # misc. cmd dicts - )) - elif kind == 'client': - args.update(dict(max_array_len=LIST_SCAN_LIMIT, # result list from repo.list() / .scan() - max_map_len=100, # misc. result dicts - )) - else: - raise ValueError('kind must be "server" or "client"') - return msgpack.Unpacker(**args) - - class ConnectionClosed(Error): """Connection closed by remote host""" diff --git a/src/borg/repository.py b/src/borg/repository.py index f73e9cf54..5a3aa735c 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -34,8 +34,6 @@ TAG_PUT = 0 TAG_DELETE = 1 TAG_COMMIT = 2 -LIST_SCAN_LIMIT = 100000 # repo.list() / .scan() result count limit the borg client uses - FreeSpace = partial(defaultdict, int) @@ -1411,7 +1409,4 @@ class LoggedIO: return self.segment - 1 # close_segment() increments it -# MAX_OBJECT_SIZE = <20 MiB (MAX_DATA_SIZE) + 41 bytes for a Repository PUT header, which consists of -# a 1 byte tag ID, 4 byte CRC, 4 byte size and 32 bytes for the ID. -MAX_OBJECT_SIZE = MAX_DATA_SIZE + LoggedIO.put_header_fmt.size -assert MAX_OBJECT_SIZE == 20971520 == 20 * 1024 * 1024 +assert LoggedIO.put_header_fmt.size == 41 # see constants.MAX_OBJECT_SIZE diff --git a/src/borg/testsuite/key.py b/src/borg/testsuite/key.py index df7f81e5e..6a7a6c8d7 100644 --- a/src/borg/testsuite/key.py +++ b/src/borg/testsuite/key.py @@ -331,7 +331,7 @@ class TestTAM: key.unpack_and_verify_manifest(blob) blob = b'\xc1\xc1\xc1' - with pytest.raises(msgpack.UnpackException): + with pytest.raises((ValueError, msgpack.UnpackException)): key.unpack_and_verify_manifest(blob) def test_missing_when_required(self, key):