add fuzzing tests for chunkers

This commit is contained in:
Thomas Waldmann 2025-10-15 23:48:13 +02:00
parent f401ca00f9
commit 084750ad48
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
4 changed files with 128 additions and 3 deletions

View file

@ -22,6 +22,11 @@ def cf(chunks):
return [_cf(chunk) for chunk in chunks]
def cf_expand(chunks):
"""same as cf, but do not return ints for HOLE and ALLOC, but all-zero bytestrings"""
return [ch if isinstance(ch, bytes) else b"\0" * ch for ch in cf(chunks)]
def make_sparsefile(fname, sparsemap, header_size=0):
with open(fname, "wb") as fd:
total = 0

View file

@ -1,8 +1,11 @@
from hashlib import sha256
from io import BytesIO
import os
import random
from . import cf
import pytest
from . import cf, cf_expand
from ...chunkers import ChunkerBuzHash64
from ...chunkers.buzhash64 import buzhash64_get_table
from ...constants import * # NOQA
@ -98,3 +101,42 @@ def test_buzhash64_table():
for bit_pos in range(64):
bit_count = sum(1 for value in table0 if value & (1 << bit_pos))
assert bit_count == 128 # 50% of 256 = 128
@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1")
@pytest.mark.parametrize("worker", range(os.cpu_count() or 1))
def test_fuzz_bh64(worker):
# Fuzz buzhash64 with random and uniform data of misc. sizes and misc keys.
def rnd_key():
return os.urandom(32)
# decompose CHUNKER64_PARAMS = (algo, min_exp, max_exp, mask_bits, window_size)
algo, min_exp, max_exp, mask_bits, win_size = CHUNKER64_PARAMS
assert algo == CH_BUZHASH64 # default chunker must be buzhash64 here
keys = [b"\0" * 32] + [rnd_key() for _ in range(10)]
sizes = [random.randint(1, 4 * 1024 * 1024) for _ in range(50)]
for key in keys:
chunker = ChunkerBuzHash64(key, min_exp, max_exp, mask_bits, win_size)
for size in sizes:
# Random data
data = os.urandom(size)
with BytesIO(data) as bio:
parts = cf_expand(chunker.chunkify(bio))
reconstructed = b"".join(parts)
assert reconstructed == data
# All-same data (non-zero)
data = b"\x42" * size
with BytesIO(data) as bio:
parts = cf_expand(chunker.chunkify(bio))
reconstructed = b"".join(parts)
assert reconstructed == data
# All-zero data
data = b"\x00" * size
with BytesIO(data) as bio:
parts = cf_expand(chunker.chunkify(bio))
reconstructed = b"".join(parts)
assert reconstructed == data

View file

@ -1,8 +1,11 @@
from hashlib import sha256
from io import BytesIO
import os
import random
from . import cf
import pytest
from . import cf, cf_expand
from ...chunkers import Chunker
from ...constants import * # NOQA
from ...helpers import hex_to_bin
@ -67,3 +70,43 @@ def test_buzhash_chunksize_distribution():
# most chunks should be cut due to buzhash triggering, not due to clipping at min/max size:
assert min_count < 10
assert max_count < 10
@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1")
@pytest.mark.parametrize("worker", range(os.cpu_count() or 1))
def test_fuzz_buzhash(worker):
# Fuzz the default chunker (buzhash) with random and uniform data of misc. sizes and seeds 0 or random int32 values.
def rnd_int32():
uint = random.getrandbits(32)
return uint if uint < 2**31 else uint - 2**32
# decompose CHUNKER_PARAMS = (algo, min_exp, max_exp, mask_bits, window_size)
algo, min_exp, max_exp, mask_bits, win_size = CHUNKER_PARAMS
assert algo == CH_BUZHASH # default chunker must be buzhash here
seeds = [0] + [rnd_int32() for _ in range(50)]
sizes = [random.randint(1, 4 * 1024 * 1024) for _ in range(50)]
for seed in seeds:
chunker = Chunker(seed, min_exp, max_exp, mask_bits, win_size)
for size in sizes:
# Random data
data = os.urandom(size)
with BytesIO(data) as bio:
parts = cf_expand(chunker.chunkify(bio))
reconstructed = b"".join(parts)
assert reconstructed == data
# All-same data (non-zero)
data = b"\x42" * size
with BytesIO(data) as bio:
parts = cf_expand(chunker.chunkify(bio))
reconstructed = b"".join(parts)
assert reconstructed == data
# All-zero data
data = b"\x00" * size
with BytesIO(data) as bio:
parts = cf_expand(chunker.chunkify(bio))
reconstructed = b"".join(parts)
assert reconstructed == data

View file

@ -1,6 +1,10 @@
from io import BytesIO
import os
import random
import pytest
from . import cf, make_sparsefile, make_content, fs_supports_sparse
from . import cf, cf_expand, make_sparsefile, make_content, fs_supports_sparse
from . import BS, map_sparse1, map_sparse2, map_onlysparse, map_notsparse
from ...chunkers import ChunkerFixed
from ...constants import * # NOQA
@ -37,3 +41,34 @@ def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse):
fn = str(tmpdir / fname)
make_sparsefile(fn, sparse_map, header_size=header_size)
get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size)
@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1")
@pytest.mark.parametrize("worker", range(os.cpu_count() or 1))
def test_fuzz_fixed(worker):
# Fuzz fixed chunker with random and uniform data of misc. sizes.
sizes = [random.randint(1, 4 * 1024 * 1024) for _ in range(50)]
for block_size, header_size in [(1024, 64), (1234, 0), (4321, 123)]:
chunker = ChunkerFixed(block_size, header_size)
for size in sizes:
# Random data
data = os.urandom(size)
with BytesIO(data) as bio:
parts = cf_expand(chunker.chunkify(bio))
reconstructed = b"".join(parts)
assert reconstructed == data
# All-same data (non-zero)
data = b"\x42" * size
with BytesIO(data) as bio:
parts = cf_expand(chunker.chunkify(bio))
reconstructed = b"".join(parts)
assert reconstructed == data
# All-zero data
data = b"\x00" * size
with BytesIO(data) as bio:
parts = cf_expand(chunker.chunkify(bio))
reconstructed = b"".join(parts)
assert reconstructed == data