mirror of
https://github.com/borgbackup/borg.git
synced 2026-03-17 16:18:10 -04:00
Merge pull request #2731 from ThomasWaldmann/limit-unpacker-manifest
Manifest: use limited unpacker
This commit is contained in:
commit
1932fe8ea9
6 changed files with 58 additions and 34 deletions
|
|
@ -31,6 +31,22 @@ DEFAULT_MAX_SEGMENT_SIZE = 500 * 1024 * 1024
|
|||
# the header, and the total size was set to 20 MiB).
|
||||
MAX_DATA_SIZE = 20971479
|
||||
|
||||
# MAX_OBJECT_SIZE = <20 MiB (MAX_DATA_SIZE) + 41 bytes for a Repository PUT header, which consists of
|
||||
# a 1 byte tag ID, 4 byte CRC, 4 byte size and 32 bytes for the ID.
|
||||
MAX_OBJECT_SIZE = MAX_DATA_SIZE + 41 # see LoggedIO.put_header_fmt.size assertion in repository module
|
||||
assert MAX_OBJECT_SIZE == 20971520 == 20 * 1024 * 1024
|
||||
|
||||
# borg.remote read() buffer size
|
||||
BUFSIZE = 10 * 1024 * 1024
|
||||
|
||||
# to use a safe, limited unpacker, we need to set a upper limit to the archive count in the manifest.
|
||||
# this does not mean that you can always really reach that number, because it also needs to be less than
|
||||
# MAX_DATA_SIZE or it will trigger the check for that.
|
||||
MAX_ARCHIVES = 400000
|
||||
|
||||
# repo.list() / .scan() result count limit the borg client uses
|
||||
LIST_SCAN_LIMIT = 100000
|
||||
|
||||
DEFAULT_SEGMENTS_PER_DIR = 1000
|
||||
|
||||
CHUNK_MIN_EXP = 19 # 2**19 == 512kiB
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ from ..helpers import StableDict
|
|||
from ..helpers import Error, IntegrityError
|
||||
from ..helpers import yes
|
||||
from ..helpers import get_keys_dir, get_security_dir
|
||||
from ..helpers import get_limited_unpacker
|
||||
from ..helpers import bin_to_hex
|
||||
from ..item import Key, EncryptedKey
|
||||
from ..platform import SaveFile
|
||||
|
|
@ -216,9 +217,9 @@ class KeyBase:
|
|||
logger.warning('Manifest authentication DISABLED.')
|
||||
tam_required = False
|
||||
data = bytearray(data)
|
||||
# Since we don't trust these bytes we use the slower Python unpacker,
|
||||
# which is assumed to have a lower probability of security issues.
|
||||
unpacked = msgpack.fallback.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape')
|
||||
unpacker = get_limited_unpacker('manifest')
|
||||
unpacker.feed(data)
|
||||
unpacked = unpacker.unpack()
|
||||
if b'tam' not in unpacked:
|
||||
if tam_required:
|
||||
raise TAMRequiredError(self.repository._location.canonical_path())
|
||||
|
|
|
|||
|
|
@ -145,6 +145,35 @@ def check_extension_modules():
|
|||
raise ExtensionModuleError
|
||||
|
||||
|
||||
def get_limited_unpacker(kind):
|
||||
"""return a limited Unpacker because we should not trust msgpack data received from remote"""
|
||||
args = dict(use_list=False, # return tuples, not lists
|
||||
max_bin_len=0, # not used
|
||||
max_ext_len=0, # not used
|
||||
max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE),
|
||||
max_str_len=MAX_OBJECT_SIZE, # a chunk or other repo object
|
||||
)
|
||||
if kind == 'server':
|
||||
args.update(dict(max_array_len=100, # misc. cmd tuples
|
||||
max_map_len=100, # misc. cmd dicts
|
||||
))
|
||||
elif kind == 'client':
|
||||
args.update(dict(max_array_len=LIST_SCAN_LIMIT, # result list from repo.list() / .scan()
|
||||
max_map_len=100, # misc. result dicts
|
||||
))
|
||||
elif kind == 'manifest':
|
||||
args.update(dict(use_list=True, # default value
|
||||
max_array_len=100, # ITEM_KEYS ~= 22
|
||||
max_map_len=MAX_ARCHIVES, # list of archives
|
||||
max_str_len=255, # archive name
|
||||
object_hook=StableDict,
|
||||
unicode_errors='surrogateescape',
|
||||
))
|
||||
else:
|
||||
raise ValueError('kind must be "server", "client" or "manifest"')
|
||||
return msgpack.Unpacker(**args)
|
||||
|
||||
|
||||
ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
|
||||
|
||||
|
||||
|
|
@ -351,6 +380,10 @@ class Manifest:
|
|||
prev_ts = self.last_timestamp
|
||||
incremented = (prev_ts + timedelta(microseconds=1)).isoformat()
|
||||
self.timestamp = max(incremented, datetime.utcnow().isoformat())
|
||||
# include checks for limits as enforced by limited unpacker (used by load())
|
||||
assert len(self.archives) <= MAX_ARCHIVES
|
||||
assert all(len(name) <= 255 for name in self.archives)
|
||||
assert len(self.item_keys) <= 100
|
||||
manifest = ManifestItem(
|
||||
version=1,
|
||||
archives=StableDict(self.archives.get_raw_dict()),
|
||||
|
|
|
|||
|
|
@ -20,16 +20,18 @@ import msgpack
|
|||
|
||||
from . import __version__
|
||||
from .compress import LZ4
|
||||
from .constants import * # NOQA
|
||||
from .helpers import Error, IntegrityError
|
||||
from .helpers import bin_to_hex
|
||||
from .helpers import get_home_dir
|
||||
from .helpers import get_limited_unpacker
|
||||
from .helpers import hostname_is_unique
|
||||
from .helpers import replace_placeholders
|
||||
from .helpers import sysinfo
|
||||
from .helpers import format_file_size
|
||||
from .helpers import truncate_and_unlink
|
||||
from .logger import create_logger, setup_logging
|
||||
from .repository import Repository, MAX_OBJECT_SIZE, LIST_SCAN_LIMIT
|
||||
from .repository import Repository
|
||||
from .version import parse_version, format_version
|
||||
from .algorithms.checksums import xxh64
|
||||
|
||||
|
|
@ -39,8 +41,6 @@ RPC_PROTOCOL_VERSION = 2
|
|||
BORG_VERSION = parse_version(__version__)
|
||||
MSGID, MSG, ARGS, RESULT = b'i', b'm', b'a', b'r'
|
||||
|
||||
BUFSIZE = 10 * 1024 * 1024
|
||||
|
||||
MAX_INFLIGHT = 100
|
||||
|
||||
RATELIMIT_PERIOD = 0.1
|
||||
|
|
@ -65,27 +65,6 @@ def os_write(fd, data):
|
|||
return amount
|
||||
|
||||
|
||||
def get_limited_unpacker(kind):
|
||||
"""return a limited Unpacker because we should not trust msgpack data received from remote"""
|
||||
args = dict(use_list=False, # return tuples, not lists
|
||||
max_bin_len=0, # not used
|
||||
max_ext_len=0, # not used
|
||||
max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE),
|
||||
max_str_len=MAX_OBJECT_SIZE, # a chunk or other repo object
|
||||
)
|
||||
if kind == 'server':
|
||||
args.update(dict(max_array_len=100, # misc. cmd tuples
|
||||
max_map_len=100, # misc. cmd dicts
|
||||
))
|
||||
elif kind == 'client':
|
||||
args.update(dict(max_array_len=LIST_SCAN_LIMIT, # result list from repo.list() / .scan()
|
||||
max_map_len=100, # misc. result dicts
|
||||
))
|
||||
else:
|
||||
raise ValueError('kind must be "server" or "client"')
|
||||
return msgpack.Unpacker(**args)
|
||||
|
||||
|
||||
class ConnectionClosed(Error):
|
||||
"""Connection closed by remote host"""
|
||||
|
||||
|
|
|
|||
|
|
@ -34,8 +34,6 @@ TAG_PUT = 0
|
|||
TAG_DELETE = 1
|
||||
TAG_COMMIT = 2
|
||||
|
||||
LIST_SCAN_LIMIT = 100000 # repo.list() / .scan() result count limit the borg client uses
|
||||
|
||||
FreeSpace = partial(defaultdict, int)
|
||||
|
||||
|
||||
|
|
@ -1411,7 +1409,4 @@ class LoggedIO:
|
|||
return self.segment - 1 # close_segment() increments it
|
||||
|
||||
|
||||
# MAX_OBJECT_SIZE = <20 MiB (MAX_DATA_SIZE) + 41 bytes for a Repository PUT header, which consists of
|
||||
# a 1 byte tag ID, 4 byte CRC, 4 byte size and 32 bytes for the ID.
|
||||
MAX_OBJECT_SIZE = MAX_DATA_SIZE + LoggedIO.put_header_fmt.size
|
||||
assert MAX_OBJECT_SIZE == 20971520 == 20 * 1024 * 1024
|
||||
assert LoggedIO.put_header_fmt.size == 41 # see constants.MAX_OBJECT_SIZE
|
||||
|
|
|
|||
|
|
@ -331,7 +331,7 @@ class TestTAM:
|
|||
key.unpack_and_verify_manifest(blob)
|
||||
|
||||
blob = b'\xc1\xc1\xc1'
|
||||
with pytest.raises(msgpack.UnpackException):
|
||||
with pytest.raises((ValueError, msgpack.UnpackException)):
|
||||
key.unpack_and_verify_manifest(blob)
|
||||
|
||||
def test_missing_when_required(self, key):
|
||||
|
|
|
|||
Loading…
Reference in a new issue