move get_limited_unpacker to helpers

also: move some constants to borg.constants
(cherry picked from commit 89f3cab6cd)
This commit is contained in:
Thomas Waldmann 2017-06-24 18:31:34 +02:00
parent 35b0f1f4f9
commit f7b16112dc
4 changed files with 50 additions and 41 deletions

View file

@ -39,12 +39,26 @@ import msgpack.fallback
import socket
# 20 MiB minus 41 bytes for a Repository header (because the "size" field in the Repository includes
# the header, and the total size was set to 20 MiB).
MAX_DATA_SIZE = 20971479
# MAX_OBJECT_SIZE = <20 MiB (MAX_DATA_SIZE) + 41 bytes for a Repository PUT header, which consists of
# a 1 byte tag ID, 4 byte CRC, 4 byte size and 32 bytes for the ID.
MAX_OBJECT_SIZE = MAX_DATA_SIZE + 41 # see LoggedIO.put_header_fmt.size assertion in repository module
assert MAX_OBJECT_SIZE == 20971520 == 20 * 1024 * 1024
# borg.remote read() buffer size
BUFSIZE = 10 * 1024 * 1024
# to use a safe, limited unpacker, we need to set a upper limit to the archive count in the manifest.
# this does not mean that you can always really reach that number, because it also needs to be less than
# MAX_DATA_SIZE or it will trigger the check for that.
MAX_ARCHIVES = 400000
# repo.list() / .scan() result count limit the borg client uses
LIST_SCAN_LIMIT = 10000
# return codes returned by borg command
# when borg is killed by signal N, rc = 128 + N
EXIT_SUCCESS = 0 # everything done, no problems
@ -145,6 +159,35 @@ def check_extension_modules():
raise ExtensionModuleError
def get_limited_unpacker(kind):
"""return a limited Unpacker because we should not trust msgpack data received from remote"""
args = dict(use_list=False, # return tuples, not lists
max_bin_len=0, # not used
max_ext_len=0, # not used
max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE),
max_str_len=MAX_OBJECT_SIZE, # a chunk or other repo object
)
if kind == 'server':
args.update(dict(max_array_len=100, # misc. cmd tuples
max_map_len=100, # misc. cmd dicts
))
elif kind == 'client':
args.update(dict(max_array_len=LIST_SCAN_LIMIT, # result list from repo.list() / .scan()
max_map_len=100, # misc. result dicts
))
elif kind == 'manifest':
args.update(dict(use_list=True, # default value
max_array_len=100, # ITEM_KEYS ~= 22
max_map_len=MAX_ARCHIVES, # list of archives
max_str_len=255, # archive name
object_hook=StableDict,
unicode_errors='surrogateescape',
))
else:
raise ValueError('kind must be "server", "client" or "manifest"')
return msgpack.Unpacker(**args)
class Manifest:
@enum.unique

View file

@ -16,7 +16,7 @@ logger = create_logger()
from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks
from .crypto import hkdf_hmac_sha512
from .compress import Compressor, CNONE
from . import remote
from .helpers import get_limited_unpacker
PREFIX = b'\0' * 8
@ -157,7 +157,7 @@ class KeyBase:
logger.warning('Manifest authentication DISABLED.')
tam_required = False
data = bytearray(data)
unpacker = remote.get_limited_unpacker('manifest')
unpacker = get_limited_unpacker('manifest')
unpacker.feed(data)
unpacked = unpacker.unpack()
if b'tam' not in unpacked:

View file

@ -14,10 +14,9 @@ from . import __version__
from .helpers import Error, IntegrityError, sysinfo
from .helpers import replace_placeholders
from .helpers import bin_to_hex
from .helpers import StableDict
from .helpers import MAX_ARCHIVES
from .repository import Repository, LIST_SCAN_LIMIT, MAX_OBJECT_SIZE
from .helpers import BUFSIZE
from .helpers import get_limited_unpacker
from .repository import Repository
from .logger import create_logger
import msgpack
@ -26,8 +25,6 @@ logger = create_logger(__name__)
RPC_PROTOCOL_VERSION = 2
BUFSIZE = 10 * 1024 * 1024
MAX_INFLIGHT = 100
@ -50,35 +47,6 @@ def os_write(fd, data):
return amount
def get_limited_unpacker(kind):
"""return a limited Unpacker because we should not trust msgpack data received from remote"""
args = dict(use_list=False, # return tuples, not lists
max_bin_len=0, # not used
max_ext_len=0, # not used
max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE),
max_str_len=MAX_OBJECT_SIZE, # a chunk or other repo object
)
if kind == 'server':
args.update(dict(max_array_len=100, # misc. cmd tuples
max_map_len=100, # misc. cmd dicts
))
elif kind == 'client':
args.update(dict(max_array_len=LIST_SCAN_LIMIT, # result list from repo.list() / .scan()
max_map_len=100, # misc. result dicts
))
elif kind == 'manifest':
args.update(dict(use_list=True, # default value
max_array_len=100, # ITEM_KEYS ~= 22
max_map_len=MAX_ARCHIVES, # list of archives
max_str_len=255, # archive name
object_hook=StableDict,
unicode_errors='surrogateescape',
))
else:
raise ValueError('kind must be "server", "client" or "manifest"')
return msgpack.Unpacker(**args)
class ConnectionClosed(Error):
"""Connection closed by remote host"""

View file

@ -14,20 +14,18 @@ from .logger import create_logger
logger = create_logger()
from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, ProgressIndicatorPercent, bin_to_hex
from .helpers import LIST_SCAN_LIMIT, MAX_OBJECT_SIZE, MAX_DATA_SIZE
from .hashindex import NSIndex
from .locking import Lock, LockError, LockErrorT
from .lrucache import LRUCache
from .platform import sync_dir
MAX_OBJECT_SIZE = 20 * 1024 * 1024
MAGIC = b'BORG_SEG'
MAGIC_LEN = len(MAGIC)
TAG_PUT = 0
TAG_DELETE = 1
TAG_COMMIT = 2
LIST_SCAN_LIMIT = 10000 # repo.list() / .scan() result count limit the borg client uses
class Repository:
"""Filesystem based transactional key value store
@ -860,4 +858,4 @@ class LoggedIO:
sync_dir(dirname)
MAX_DATA_SIZE = MAX_OBJECT_SIZE - LoggedIO.put_header_fmt.size
assert LoggedIO.put_header_fmt.size == 41 # see helpers.MAX_OBJECT_SIZE