From 441741130fc3cd019d5c69bcb9bf7a4d54f29be6 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 22 May 2025 16:57:50 +0200 Subject: [PATCH] ChunkerParams: reject even window size for buzhash, fixes #8868 --- docs/internals/data-structures.rst | 2 +- docs/misc/create_chunker-params.txt | 3 +-- src/borg/helpers/parseformat.py | 2 ++ 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/internals/data-structures.rst b/docs/internals/data-structures.rst index f0202b728..f5cf8150b 100644 --- a/docs/internals/data-structures.rst +++ b/docs/internals/data-structures.rst @@ -657,7 +657,7 @@ can be used to tune the chunker parameters, the default is: - CHUNK_MIN_EXP = 19 (minimum chunk size = 2^19 B = 512 kiB) - CHUNK_MAX_EXP = 23 (maximum chunk size = 2^23 B = 8 MiB) - HASH_MASK_BITS = 21 (target chunk size ~= 2^21 B = 2 MiB) -- HASH_WINDOW_SIZE = 4095 [B] (`0xFFF`) +- HASH_WINDOW_SIZE = 4095 [B] (`0xFFF`) (must be an odd number) The buzhash table is altered by XORing it with a seed randomly generated once for the repository, and stored encrypted in the keyfile. This is to prevent diff --git a/docs/misc/create_chunker-params.txt b/docs/misc/create_chunker-params.txt index af602c5c5..c24dd3ed4 100644 --- a/docs/misc/create_chunker-params.txt +++ b/docs/misc/create_chunker-params.txt @@ -18,7 +18,7 @@ determined by the windows contents rather than the min/max. chunk size). Default: 21 (statistically, chunks will be about 2^21 == 2MiB in size) HASH_WINDOW_SIZE: the size of the window used for the rolling hash computation. -Default: 4095B +Must be an odd number. Default: 4095B Trying it out @@ -114,4 +114,3 @@ $ ls -l /extra/repo-xl/index* $ du -sk /extra/repo-xl/ 14253464 /extra/repo-xl/ - diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index 03e9c20a4..dd0430be9 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -131,6 +131,8 @@ def ChunkerParams(s): reject_or_warn('min. chunk size exponent must not be less than 6 (2^6 = 64B min. chunk size)', False) if chunk_max > 23: reject_or_warn('max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)', True) + if window_size % 2 == 0: + reject_or_warn("window_size must be an uneven (odd) number", False) return CH_BUZHASH, chunk_min, chunk_max, chunk_mask, window_size raise argparse.ArgumentTypeError('invalid chunker params')