From 084750ad48d694cdc4c81696680648475c27a5cd Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 15 Oct 2025 23:48:13 +0200 Subject: [PATCH] add fuzzing tests for chunkers --- src/borg/testsuite/chunkers/__init__.py | 5 +++ src/borg/testsuite/chunkers/buzhash64_test.py | 44 +++++++++++++++++- src/borg/testsuite/chunkers/buzhash_test.py | 45 ++++++++++++++++++- src/borg/testsuite/chunkers/fixed_test.py | 37 ++++++++++++++- 4 files changed, 128 insertions(+), 3 deletions(-) diff --git a/src/borg/testsuite/chunkers/__init__.py b/src/borg/testsuite/chunkers/__init__.py index 82d2689e6..2e01e98e3 100644 --- a/src/borg/testsuite/chunkers/__init__.py +++ b/src/borg/testsuite/chunkers/__init__.py @@ -22,6 +22,11 @@ def cf(chunks): return [_cf(chunk) for chunk in chunks] +def cf_expand(chunks): + """same as cf, but do not return ints for HOLE and ALLOC, but all-zero bytestrings""" + return [ch if isinstance(ch, bytes) else b"\0" * ch for ch in cf(chunks)] + + def make_sparsefile(fname, sparsemap, header_size=0): with open(fname, "wb") as fd: total = 0 diff --git a/src/borg/testsuite/chunkers/buzhash64_test.py b/src/borg/testsuite/chunkers/buzhash64_test.py index 41e0b06f6..0bbeb4d3d 100644 --- a/src/borg/testsuite/chunkers/buzhash64_test.py +++ b/src/borg/testsuite/chunkers/buzhash64_test.py @@ -1,8 +1,11 @@ from hashlib import sha256 from io import BytesIO import os +import random -from . import cf +import pytest + +from . import cf, cf_expand from ...chunkers import ChunkerBuzHash64 from ...chunkers.buzhash64 import buzhash64_get_table from ...constants import * # NOQA @@ -98,3 +101,42 @@ def test_buzhash64_table(): for bit_pos in range(64): bit_count = sum(1 for value in table0 if value & (1 << bit_pos)) assert bit_count == 128 # 50% of 256 = 128 + + +@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1") +@pytest.mark.parametrize("worker", range(os.cpu_count() or 1)) +def test_fuzz_bh64(worker): + # Fuzz buzhash64 with random and uniform data of misc. sizes and misc keys. + def rnd_key(): + return os.urandom(32) + + # decompose CHUNKER64_PARAMS = (algo, min_exp, max_exp, mask_bits, window_size) + algo, min_exp, max_exp, mask_bits, win_size = CHUNKER64_PARAMS + assert algo == CH_BUZHASH64 # default chunker must be buzhash64 here + + keys = [b"\0" * 32] + [rnd_key() for _ in range(10)] + sizes = [random.randint(1, 4 * 1024 * 1024) for _ in range(50)] + + for key in keys: + chunker = ChunkerBuzHash64(key, min_exp, max_exp, mask_bits, win_size) + for size in sizes: + # Random data + data = os.urandom(size) + with BytesIO(data) as bio: + parts = cf_expand(chunker.chunkify(bio)) + reconstructed = b"".join(parts) + assert reconstructed == data + + # All-same data (non-zero) + data = b"\x42" * size + with BytesIO(data) as bio: + parts = cf_expand(chunker.chunkify(bio)) + reconstructed = b"".join(parts) + assert reconstructed == data + + # All-zero data + data = b"\x00" * size + with BytesIO(data) as bio: + parts = cf_expand(chunker.chunkify(bio)) + reconstructed = b"".join(parts) + assert reconstructed == data diff --git a/src/borg/testsuite/chunkers/buzhash_test.py b/src/borg/testsuite/chunkers/buzhash_test.py index 4434d5486..c1b00d6a4 100644 --- a/src/borg/testsuite/chunkers/buzhash_test.py +++ b/src/borg/testsuite/chunkers/buzhash_test.py @@ -1,8 +1,11 @@ from hashlib import sha256 from io import BytesIO import os +import random -from . import cf +import pytest + +from . import cf, cf_expand from ...chunkers import Chunker from ...constants import * # NOQA from ...helpers import hex_to_bin @@ -67,3 +70,43 @@ def test_buzhash_chunksize_distribution(): # most chunks should be cut due to buzhash triggering, not due to clipping at min/max size: assert min_count < 10 assert max_count < 10 + + +@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1") +@pytest.mark.parametrize("worker", range(os.cpu_count() or 1)) +def test_fuzz_buzhash(worker): + # Fuzz the default chunker (buzhash) with random and uniform data of misc. sizes and seeds 0 or random int32 values. + def rnd_int32(): + uint = random.getrandbits(32) + return uint if uint < 2**31 else uint - 2**32 + + # decompose CHUNKER_PARAMS = (algo, min_exp, max_exp, mask_bits, window_size) + algo, min_exp, max_exp, mask_bits, win_size = CHUNKER_PARAMS + assert algo == CH_BUZHASH # default chunker must be buzhash here + + seeds = [0] + [rnd_int32() for _ in range(50)] + sizes = [random.randint(1, 4 * 1024 * 1024) for _ in range(50)] + + for seed in seeds: + chunker = Chunker(seed, min_exp, max_exp, mask_bits, win_size) + for size in sizes: + # Random data + data = os.urandom(size) + with BytesIO(data) as bio: + parts = cf_expand(chunker.chunkify(bio)) + reconstructed = b"".join(parts) + assert reconstructed == data + + # All-same data (non-zero) + data = b"\x42" * size + with BytesIO(data) as bio: + parts = cf_expand(chunker.chunkify(bio)) + reconstructed = b"".join(parts) + assert reconstructed == data + + # All-zero data + data = b"\x00" * size + with BytesIO(data) as bio: + parts = cf_expand(chunker.chunkify(bio)) + reconstructed = b"".join(parts) + assert reconstructed == data diff --git a/src/borg/testsuite/chunkers/fixed_test.py b/src/borg/testsuite/chunkers/fixed_test.py index 2d4971866..b8598a926 100644 --- a/src/borg/testsuite/chunkers/fixed_test.py +++ b/src/borg/testsuite/chunkers/fixed_test.py @@ -1,6 +1,10 @@ +from io import BytesIO +import os +import random + import pytest -from . import cf, make_sparsefile, make_content, fs_supports_sparse +from . import cf, cf_expand, make_sparsefile, make_content, fs_supports_sparse from . import BS, map_sparse1, map_sparse2, map_onlysparse, map_notsparse from ...chunkers import ChunkerFixed from ...constants import * # NOQA @@ -37,3 +41,34 @@ def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse): fn = str(tmpdir / fname) make_sparsefile(fn, sparse_map, header_size=header_size) get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size) + + +@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1") +@pytest.mark.parametrize("worker", range(os.cpu_count() or 1)) +def test_fuzz_fixed(worker): + # Fuzz fixed chunker with random and uniform data of misc. sizes. + sizes = [random.randint(1, 4 * 1024 * 1024) for _ in range(50)] + + for block_size, header_size in [(1024, 64), (1234, 0), (4321, 123)]: + chunker = ChunkerFixed(block_size, header_size) + for size in sizes: + # Random data + data = os.urandom(size) + with BytesIO(data) as bio: + parts = cf_expand(chunker.chunkify(bio)) + reconstructed = b"".join(parts) + assert reconstructed == data + + # All-same data (non-zero) + data = b"\x42" * size + with BytesIO(data) as bio: + parts = cf_expand(chunker.chunkify(bio)) + reconstructed = b"".join(parts) + assert reconstructed == data + + # All-zero data + data = b"\x00" * size + with BytesIO(data) as bio: + parts = cf_expand(chunker.chunkify(bio)) + reconstructed = b"".join(parts) + assert reconstructed == data