mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-11 01:41:57 -04:00
Merge pull request #7589 from ThomasWaldmann/chunker-params-1.2
relax chunker params validation, tests (1.2-maint)
This commit is contained in:
commit
53bedfb63b
3 changed files with 47 additions and 19 deletions
|
|
@ -95,10 +95,16 @@ def interval(s):
|
|||
|
||||
|
||||
def ChunkerParams(s):
|
||||
def reject_or_warn(msg, reject):
|
||||
if reject:
|
||||
raise argparse.ArgumentTypeError(msg)
|
||||
else:
|
||||
logger.warning(msg)
|
||||
|
||||
params = s.strip().split(',')
|
||||
count = len(params)
|
||||
if count == 0:
|
||||
raise argparse.ArgumentTypeError('no chunker params given')
|
||||
reject_or_warn('no chunker params given', True)
|
||||
algo = params[0].lower()
|
||||
if algo == CH_FIXED and 2 <= count <= 3: # fixed, block_size[, header_size]
|
||||
block_size = int(params[1])
|
||||
|
|
@ -109,11 +115,9 @@ def ChunkerParams(s):
|
|||
# or in-memory chunk management.
|
||||
# choose the block (chunk) size wisely: if you have a lot of data and you cut
|
||||
# it into very small chunks, you are asking for trouble!
|
||||
raise argparse.ArgumentTypeError('block_size must not be less than 64 Bytes')
|
||||
reject_or_warn('block_size must not be less than 64 Bytes', False)
|
||||
if block_size > MAX_DATA_SIZE or header_size > MAX_DATA_SIZE:
|
||||
raise argparse.ArgumentTypeError(
|
||||
'block_size and header_size must not exceed MAX_DATA_SIZE [%d]' % MAX_DATA_SIZE
|
||||
)
|
||||
reject_or_warn('block_size and header_size must not exceed MAX_DATA_SIZE [%d]' % MAX_DATA_SIZE, True)
|
||||
return algo, block_size, header_size
|
||||
if algo == 'default' and count == 1: # default
|
||||
return CHUNKER_PARAMS
|
||||
|
|
@ -121,16 +125,12 @@ def ChunkerParams(s):
|
|||
if algo == CH_BUZHASH and count == 5 or count == 4: # [buzhash, ]chunk_min, chunk_max, chunk_mask, window_size
|
||||
chunk_min, chunk_max, chunk_mask, window_size = (int(p) for p in params[count - 4:])
|
||||
if not (chunk_min <= chunk_mask <= chunk_max):
|
||||
raise argparse.ArgumentTypeError('required: chunk_min <= chunk_mask <= chunk_max')
|
||||
reject_or_warn('required: chunk_min <= chunk_mask <= chunk_max', False)
|
||||
if chunk_min < 6:
|
||||
# see comment in 'fixed' algo check
|
||||
raise argparse.ArgumentTypeError(
|
||||
'min. chunk size exponent must not be less than 6 (2^6 = 64B min. chunk size)'
|
||||
)
|
||||
reject_or_warn('min. chunk size exponent must not be less than 6 (2^6 = 64B min. chunk size)', False)
|
||||
if chunk_max > 23:
|
||||
raise argparse.ArgumentTypeError(
|
||||
'max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)'
|
||||
)
|
||||
reject_or_warn('max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)', True)
|
||||
return CH_BUZHASH, chunk_min, chunk_max, chunk_mask, window_size
|
||||
raise argparse.ArgumentTypeError('invalid chunker params')
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import tempfile
|
|||
import pytest
|
||||
|
||||
from .chunker import cf
|
||||
from ..chunker import ChunkerFixed, sparsemap, has_seek_hole
|
||||
from ..chunker import Chunker, ChunkerFixed, sparsemap, has_seek_hole
|
||||
from ..constants import * # NOQA
|
||||
|
||||
BS = 4096 # fs block size
|
||||
|
|
@ -136,3 +136,27 @@ def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse):
|
|||
fn = str(tmpdir / fname)
|
||||
make_sparsefile(fn, sparse_map, header_size=header_size)
|
||||
get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size)
|
||||
|
||||
|
||||
def test_buzhash_chunksize_distribution():
|
||||
data = os.urandom(1048576)
|
||||
min_exp, max_exp, mask = 10, 16, 14 # chunk size target 16kiB, clip at 1kiB and 64kiB
|
||||
chunker = Chunker(0, min_exp, max_exp, mask, 4095)
|
||||
f = BytesIO(data)
|
||||
chunks = cf(chunker.chunkify(f))
|
||||
chunk_sizes = [len(chunk) for chunk in chunks]
|
||||
chunks_count = len(chunks)
|
||||
min_chunksize_observed = min(chunk_sizes)
|
||||
max_chunksize_observed = max(chunk_sizes)
|
||||
min_count = sum((int(size == 2 ** min_exp) for size in chunk_sizes))
|
||||
max_count = sum((int(size == 2 ** max_exp) for size in chunk_sizes))
|
||||
print(f"count: {chunks_count} min: {min_chunksize_observed} max: {max_chunksize_observed} "
|
||||
f"min count: {min_count} max count: {max_count}")
|
||||
# usually there will about 64 chunks
|
||||
assert 32 < chunks_count < 128
|
||||
# chunks always must be between min and max (clipping must work):
|
||||
assert min_chunksize_observed >= 2 ** min_exp
|
||||
assert max_chunksize_observed <= 2 ** max_exp
|
||||
# most chunks should be cut due to buzhash triggering, not due to clipping at min/max size:
|
||||
assert min_count < 10
|
||||
assert max_count < 10
|
||||
|
|
|
|||
|
|
@ -349,16 +349,20 @@ def test_chunkerparams():
|
|||
assert ChunkerParams('fixed,4096') == ('fixed', 4096, 0)
|
||||
assert ChunkerParams('fixed,4096,200') == ('fixed', 4096, 200)
|
||||
# invalid values checking
|
||||
borg2 = False # for borg < 2, we only emit a warning, but do not raise ArgumentTypeError for some cases
|
||||
with pytest.raises(ArgumentTypeError):
|
||||
ChunkerParams('crap,1,2,3,4') # invalid algo
|
||||
with pytest.raises(ArgumentTypeError):
|
||||
ChunkerParams('buzhash,5,7,6,4095') # too small min. size
|
||||
if borg2:
|
||||
with pytest.raises(ArgumentTypeError):
|
||||
ChunkerParams('buzhash,5,7,6,4095') # too small min. size
|
||||
with pytest.raises(ArgumentTypeError):
|
||||
ChunkerParams('buzhash,19,24,21,4095') # too big max. size
|
||||
with pytest.raises(ArgumentTypeError):
|
||||
ChunkerParams('buzhash,23,19,21,4095') # violates min <= mask <= max
|
||||
with pytest.raises(ArgumentTypeError):
|
||||
ChunkerParams('fixed,63') # too small block size
|
||||
if borg2:
|
||||
with pytest.raises(ArgumentTypeError):
|
||||
ChunkerParams('buzhash,23,19,21,4095') # violates min <= mask <= max
|
||||
if borg2:
|
||||
with pytest.raises(ArgumentTypeError):
|
||||
ChunkerParams('fixed,63') # too small block size
|
||||
with pytest.raises(ArgumentTypeError):
|
||||
ChunkerParams('fixed,%d,%d' % (MAX_DATA_SIZE + 1, 4096)) # too big block size
|
||||
with pytest.raises(ArgumentTypeError):
|
||||
|
|
|
|||
Loading…
Reference in a new issue