mirror of
https://github.com/borgbackup/borg.git
synced 2026-02-20 00:10:35 -05:00
add fuzzing tests for chunkers
This commit is contained in:
parent
f401ca00f9
commit
084750ad48
4 changed files with 128 additions and 3 deletions
|
|
@ -22,6 +22,11 @@ def cf(chunks):
|
|||
return [_cf(chunk) for chunk in chunks]
|
||||
|
||||
|
||||
def cf_expand(chunks):
|
||||
"""same as cf, but do not return ints for HOLE and ALLOC, but all-zero bytestrings"""
|
||||
return [ch if isinstance(ch, bytes) else b"\0" * ch for ch in cf(chunks)]
|
||||
|
||||
|
||||
def make_sparsefile(fname, sparsemap, header_size=0):
|
||||
with open(fname, "wb") as fd:
|
||||
total = 0
|
||||
|
|
|
|||
|
|
@ -1,8 +1,11 @@
|
|||
from hashlib import sha256
|
||||
from io import BytesIO
|
||||
import os
|
||||
import random
|
||||
|
||||
from . import cf
|
||||
import pytest
|
||||
|
||||
from . import cf, cf_expand
|
||||
from ...chunkers import ChunkerBuzHash64
|
||||
from ...chunkers.buzhash64 import buzhash64_get_table
|
||||
from ...constants import * # NOQA
|
||||
|
|
@ -98,3 +101,42 @@ def test_buzhash64_table():
|
|||
for bit_pos in range(64):
|
||||
bit_count = sum(1 for value in table0 if value & (1 << bit_pos))
|
||||
assert bit_count == 128 # 50% of 256 = 128
|
||||
|
||||
|
||||
@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1")
|
||||
@pytest.mark.parametrize("worker", range(os.cpu_count() or 1))
|
||||
def test_fuzz_bh64(worker):
|
||||
# Fuzz buzhash64 with random and uniform data of misc. sizes and misc keys.
|
||||
def rnd_key():
|
||||
return os.urandom(32)
|
||||
|
||||
# decompose CHUNKER64_PARAMS = (algo, min_exp, max_exp, mask_bits, window_size)
|
||||
algo, min_exp, max_exp, mask_bits, win_size = CHUNKER64_PARAMS
|
||||
assert algo == CH_BUZHASH64 # default chunker must be buzhash64 here
|
||||
|
||||
keys = [b"\0" * 32] + [rnd_key() for _ in range(10)]
|
||||
sizes = [random.randint(1, 4 * 1024 * 1024) for _ in range(50)]
|
||||
|
||||
for key in keys:
|
||||
chunker = ChunkerBuzHash64(key, min_exp, max_exp, mask_bits, win_size)
|
||||
for size in sizes:
|
||||
# Random data
|
||||
data = os.urandom(size)
|
||||
with BytesIO(data) as bio:
|
||||
parts = cf_expand(chunker.chunkify(bio))
|
||||
reconstructed = b"".join(parts)
|
||||
assert reconstructed == data
|
||||
|
||||
# All-same data (non-zero)
|
||||
data = b"\x42" * size
|
||||
with BytesIO(data) as bio:
|
||||
parts = cf_expand(chunker.chunkify(bio))
|
||||
reconstructed = b"".join(parts)
|
||||
assert reconstructed == data
|
||||
|
||||
# All-zero data
|
||||
data = b"\x00" * size
|
||||
with BytesIO(data) as bio:
|
||||
parts = cf_expand(chunker.chunkify(bio))
|
||||
reconstructed = b"".join(parts)
|
||||
assert reconstructed == data
|
||||
|
|
|
|||
|
|
@ -1,8 +1,11 @@
|
|||
from hashlib import sha256
|
||||
from io import BytesIO
|
||||
import os
|
||||
import random
|
||||
|
||||
from . import cf
|
||||
import pytest
|
||||
|
||||
from . import cf, cf_expand
|
||||
from ...chunkers import Chunker
|
||||
from ...constants import * # NOQA
|
||||
from ...helpers import hex_to_bin
|
||||
|
|
@ -67,3 +70,43 @@ def test_buzhash_chunksize_distribution():
|
|||
# most chunks should be cut due to buzhash triggering, not due to clipping at min/max size:
|
||||
assert min_count < 10
|
||||
assert max_count < 10
|
||||
|
||||
|
||||
@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1")
|
||||
@pytest.mark.parametrize("worker", range(os.cpu_count() or 1))
|
||||
def test_fuzz_buzhash(worker):
|
||||
# Fuzz the default chunker (buzhash) with random and uniform data of misc. sizes and seeds 0 or random int32 values.
|
||||
def rnd_int32():
|
||||
uint = random.getrandbits(32)
|
||||
return uint if uint < 2**31 else uint - 2**32
|
||||
|
||||
# decompose CHUNKER_PARAMS = (algo, min_exp, max_exp, mask_bits, window_size)
|
||||
algo, min_exp, max_exp, mask_bits, win_size = CHUNKER_PARAMS
|
||||
assert algo == CH_BUZHASH # default chunker must be buzhash here
|
||||
|
||||
seeds = [0] + [rnd_int32() for _ in range(50)]
|
||||
sizes = [random.randint(1, 4 * 1024 * 1024) for _ in range(50)]
|
||||
|
||||
for seed in seeds:
|
||||
chunker = Chunker(seed, min_exp, max_exp, mask_bits, win_size)
|
||||
for size in sizes:
|
||||
# Random data
|
||||
data = os.urandom(size)
|
||||
with BytesIO(data) as bio:
|
||||
parts = cf_expand(chunker.chunkify(bio))
|
||||
reconstructed = b"".join(parts)
|
||||
assert reconstructed == data
|
||||
|
||||
# All-same data (non-zero)
|
||||
data = b"\x42" * size
|
||||
with BytesIO(data) as bio:
|
||||
parts = cf_expand(chunker.chunkify(bio))
|
||||
reconstructed = b"".join(parts)
|
||||
assert reconstructed == data
|
||||
|
||||
# All-zero data
|
||||
data = b"\x00" * size
|
||||
with BytesIO(data) as bio:
|
||||
parts = cf_expand(chunker.chunkify(bio))
|
||||
reconstructed = b"".join(parts)
|
||||
assert reconstructed == data
|
||||
|
|
|
|||
|
|
@ -1,6 +1,10 @@
|
|||
from io import BytesIO
|
||||
import os
|
||||
import random
|
||||
|
||||
import pytest
|
||||
|
||||
from . import cf, make_sparsefile, make_content, fs_supports_sparse
|
||||
from . import cf, cf_expand, make_sparsefile, make_content, fs_supports_sparse
|
||||
from . import BS, map_sparse1, map_sparse2, map_onlysparse, map_notsparse
|
||||
from ...chunkers import ChunkerFixed
|
||||
from ...constants import * # NOQA
|
||||
|
|
@ -37,3 +41,34 @@ def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse):
|
|||
fn = str(tmpdir / fname)
|
||||
make_sparsefile(fn, sparse_map, header_size=header_size)
|
||||
get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size)
|
||||
|
||||
|
||||
@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1")
|
||||
@pytest.mark.parametrize("worker", range(os.cpu_count() or 1))
|
||||
def test_fuzz_fixed(worker):
|
||||
# Fuzz fixed chunker with random and uniform data of misc. sizes.
|
||||
sizes = [random.randint(1, 4 * 1024 * 1024) for _ in range(50)]
|
||||
|
||||
for block_size, header_size in [(1024, 64), (1234, 0), (4321, 123)]:
|
||||
chunker = ChunkerFixed(block_size, header_size)
|
||||
for size in sizes:
|
||||
# Random data
|
||||
data = os.urandom(size)
|
||||
with BytesIO(data) as bio:
|
||||
parts = cf_expand(chunker.chunkify(bio))
|
||||
reconstructed = b"".join(parts)
|
||||
assert reconstructed == data
|
||||
|
||||
# All-same data (non-zero)
|
||||
data = b"\x42" * size
|
||||
with BytesIO(data) as bio:
|
||||
parts = cf_expand(chunker.chunkify(bio))
|
||||
reconstructed = b"".join(parts)
|
||||
assert reconstructed == data
|
||||
|
||||
# All-zero data
|
||||
data = b"\x00" * size
|
||||
with BytesIO(data) as bio:
|
||||
parts = cf_expand(chunker.chunkify(bio))
|
||||
reconstructed = b"".join(parts)
|
||||
assert reconstructed == data
|
||||
|
|
|
|||
Loading…
Reference in a new issue