From f7b16112dc3a1722f264410d76cd0f8262ab4906 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 24 Jun 2017 18:31:34 +0200 Subject: [PATCH] move get_limited_unpacker to helpers also: move some constants to borg.constants (cherry picked from commit 89f3cab6cd44903e096fe0237e9b0cc1eeac1e87) --- borg/helpers.py | 43 +++++++++++++++++++++++++++++++++++++++++++ borg/key.py | 4 ++-- borg/remote.py | 38 +++----------------------------------- borg/repository.py | 6 ++---- 4 files changed, 50 insertions(+), 41 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index 8bee0bf05..0ad99133f 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -39,12 +39,26 @@ import msgpack.fallback import socket +# 20 MiB minus 41 bytes for a Repository header (because the "size" field in the Repository includes +# the header, and the total size was set to 20 MiB). +MAX_DATA_SIZE = 20971479 + +# MAX_OBJECT_SIZE = <20 MiB (MAX_DATA_SIZE) + 41 bytes for a Repository PUT header, which consists of +# a 1 byte tag ID, 4 byte CRC, 4 byte size and 32 bytes for the ID. +MAX_OBJECT_SIZE = MAX_DATA_SIZE + 41 # see LoggedIO.put_header_fmt.size assertion in repository module +assert MAX_OBJECT_SIZE == 20971520 == 20 * 1024 * 1024 + +# borg.remote read() buffer size +BUFSIZE = 10 * 1024 * 1024 # to use a safe, limited unpacker, we need to set a upper limit to the archive count in the manifest. # this does not mean that you can always really reach that number, because it also needs to be less than # MAX_DATA_SIZE or it will trigger the check for that. MAX_ARCHIVES = 400000 +# repo.list() / .scan() result count limit the borg client uses +LIST_SCAN_LIMIT = 10000 + # return codes returned by borg command # when borg is killed by signal N, rc = 128 + N EXIT_SUCCESS = 0 # everything done, no problems @@ -145,6 +159,35 @@ def check_extension_modules(): raise ExtensionModuleError +def get_limited_unpacker(kind): + """return a limited Unpacker because we should not trust msgpack data received from remote""" + args = dict(use_list=False, # return tuples, not lists + max_bin_len=0, # not used + max_ext_len=0, # not used + max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE), + max_str_len=MAX_OBJECT_SIZE, # a chunk or other repo object + ) + if kind == 'server': + args.update(dict(max_array_len=100, # misc. cmd tuples + max_map_len=100, # misc. cmd dicts + )) + elif kind == 'client': + args.update(dict(max_array_len=LIST_SCAN_LIMIT, # result list from repo.list() / .scan() + max_map_len=100, # misc. result dicts + )) + elif kind == 'manifest': + args.update(dict(use_list=True, # default value + max_array_len=100, # ITEM_KEYS ~= 22 + max_map_len=MAX_ARCHIVES, # list of archives + max_str_len=255, # archive name + object_hook=StableDict, + unicode_errors='surrogateescape', + )) + else: + raise ValueError('kind must be "server", "client" or "manifest"') + return msgpack.Unpacker(**args) + + class Manifest: @enum.unique diff --git a/borg/key.py b/borg/key.py index 628056947..4db269c46 100644 --- a/borg/key.py +++ b/borg/key.py @@ -16,7 +16,7 @@ logger = create_logger() from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks from .crypto import hkdf_hmac_sha512 from .compress import Compressor, CNONE -from . import remote +from .helpers import get_limited_unpacker PREFIX = b'\0' * 8 @@ -157,7 +157,7 @@ class KeyBase: logger.warning('Manifest authentication DISABLED.') tam_required = False data = bytearray(data) - unpacker = remote.get_limited_unpacker('manifest') + unpacker = get_limited_unpacker('manifest') unpacker.feed(data) unpacked = unpacker.unpack() if b'tam' not in unpacked: diff --git a/borg/remote.py b/borg/remote.py index b4db293ad..45bdb4fd1 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -14,10 +14,9 @@ from . import __version__ from .helpers import Error, IntegrityError, sysinfo from .helpers import replace_placeholders -from .helpers import bin_to_hex -from .helpers import StableDict -from .helpers import MAX_ARCHIVES -from .repository import Repository, LIST_SCAN_LIMIT, MAX_OBJECT_SIZE +from .helpers import BUFSIZE +from .helpers import get_limited_unpacker +from .repository import Repository from .logger import create_logger import msgpack @@ -26,8 +25,6 @@ logger = create_logger(__name__) RPC_PROTOCOL_VERSION = 2 -BUFSIZE = 10 * 1024 * 1024 - MAX_INFLIGHT = 100 @@ -50,35 +47,6 @@ def os_write(fd, data): return amount -def get_limited_unpacker(kind): - """return a limited Unpacker because we should not trust msgpack data received from remote""" - args = dict(use_list=False, # return tuples, not lists - max_bin_len=0, # not used - max_ext_len=0, # not used - max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE), - max_str_len=MAX_OBJECT_SIZE, # a chunk or other repo object - ) - if kind == 'server': - args.update(dict(max_array_len=100, # misc. cmd tuples - max_map_len=100, # misc. cmd dicts - )) - elif kind == 'client': - args.update(dict(max_array_len=LIST_SCAN_LIMIT, # result list from repo.list() / .scan() - max_map_len=100, # misc. result dicts - )) - elif kind == 'manifest': - args.update(dict(use_list=True, # default value - max_array_len=100, # ITEM_KEYS ~= 22 - max_map_len=MAX_ARCHIVES, # list of archives - max_str_len=255, # archive name - object_hook=StableDict, - unicode_errors='surrogateescape', - )) - else: - raise ValueError('kind must be "server", "client" or "manifest"') - return msgpack.Unpacker(**args) - - class ConnectionClosed(Error): """Connection closed by remote host""" diff --git a/borg/repository.py b/borg/repository.py index 933b77de8..a31e5058f 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -14,20 +14,18 @@ from .logger import create_logger logger = create_logger() from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, ProgressIndicatorPercent, bin_to_hex +from .helpers import LIST_SCAN_LIMIT, MAX_OBJECT_SIZE, MAX_DATA_SIZE from .hashindex import NSIndex from .locking import Lock, LockError, LockErrorT from .lrucache import LRUCache from .platform import sync_dir -MAX_OBJECT_SIZE = 20 * 1024 * 1024 MAGIC = b'BORG_SEG' MAGIC_LEN = len(MAGIC) TAG_PUT = 0 TAG_DELETE = 1 TAG_COMMIT = 2 -LIST_SCAN_LIMIT = 10000 # repo.list() / .scan() result count limit the borg client uses - class Repository: """Filesystem based transactional key value store @@ -860,4 +858,4 @@ class LoggedIO: sync_dir(dirname) -MAX_DATA_SIZE = MAX_OBJECT_SIZE - LoggedIO.put_header_fmt.size +assert LoggedIO.put_header_fmt.size == 41 # see helpers.MAX_OBJECT_SIZE