From 544b3f41a90eea4200cfea13ab83b0ab7adb2815 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 6 Jun 2025 01:52:29 +0200 Subject: [PATCH] get_chunker: give it the key instead of the seed the buzhash seed only has 32bits, but we rather want 64bits for buzhash64. just take them from crypt_key for now. --- src/borg/archive.py | 8 ++++---- src/borg/archiver/benchmark_cmd.py | 4 ++-- src/borg/archiver/transfer_cmd.py | 2 +- src/borg/chunkers/__init__.py | 17 +++++++++++------ .../testsuite/chunkers/buzhash64_self_test.py | 2 +- .../testsuite/chunkers/buzhash_self_test.py | 2 +- 6 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 462da3136..5bf8faaec 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -351,7 +351,7 @@ class ChunkBuffer: self.packer = msgpack.Packer() self.chunks = [] self.key = key - self.chunker = get_chunker(*chunker_params, seed=self.key.chunk_seed, sparse=False) + self.chunker = get_chunker(*chunker_params, key=self.key, sparse=False) self.saved_chunks_len = None def add(self, item): @@ -1227,7 +1227,7 @@ class FilesystemObjectProcessors: self.hlm = HardLinkManager(id_type=tuple, info_type=(list, type(None))) # (dev, ino) -> chunks or None self.stats = Statistics(output_json=log_json, iec=iec) # threading: done by cache (including progress) self.cwd = os.getcwd() - self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=sparse) + self.chunker = get_chunker(*chunker_params, key=key, sparse=sparse) @contextmanager def create_helper(self, path, st, status=None, hardlinkable=True, strip_prefix=None): @@ -1502,7 +1502,7 @@ class TarfileObjectProcessors: self.print_file_status = file_status_printer or (lambda *args: None) self.stats = Statistics(output_json=log_json, iec=iec) # threading: done by cache (including progress) - self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=False) + self.chunker = get_chunker(*chunker_params, key=key, sparse=False) self.hlm = HardLinkManager(id_type=str, info_type=list) # path -> chunks @contextmanager @@ -2325,7 +2325,7 @@ class ArchiveRecreater: target.process_file_chunks = ChunksProcessor( cache=self.cache, key=self.key, add_item=target.add_item, rechunkify=target.recreate_rechunkify ).process_file_chunks - target.chunker = get_chunker(*target.chunker_params, seed=self.key.chunk_seed, sparse=False) + target.chunker = get_chunker(*target.chunker_params, key=self.key, sparse=False) return target def create_target_archive(self, name): diff --git a/src/borg/archiver/benchmark_cmd.py b/src/borg/archiver/benchmark_cmd.py index 175b940d6..2818435f1 100644 --- a/src/borg/archiver/benchmark_cmd.py +++ b/src/borg/archiver/benchmark_cmd.py @@ -146,8 +146,8 @@ class BenchmarkMixIn: pass for spec, func in [ - ("buzhash,19,23,21,4095", lambda: chunkit("buzhash", 19, 23, 21, 4095, seed=0, sparse=False)), - ("buzhash64,19,23,21,4095", lambda: chunkit("buzhash64", 19, 23, 21, 4095, seed=0, sparse=False)), + ("buzhash,19,23,21,4095", lambda: chunkit("buzhash", 19, 23, 21, 4095, sparse=False)), + ("buzhash64,19,23,21,4095", lambda: chunkit("buzhash64", 19, 23, 21, 4095, sparse=False)), ("fixed,1048576", lambda: chunkit("fixed", 1048576, sparse=False)), ]: print(f"{spec:<24} {size:<10} {timeit(func, number=100):.3f}s") diff --git a/src/borg/archiver/transfer_cmd.py b/src/borg/archiver/transfer_cmd.py index 617c8abec..4ada0b848 100644 --- a/src/borg/archiver/transfer_cmd.py +++ b/src/borg/archiver/transfer_cmd.py @@ -41,7 +41,7 @@ def transfer_chunks( file = ChunkIteratorFileWrapper(chunk_iterator) # Create a chunker with the specified parameters - chunker = get_chunker(*chunker_params, seed=archive.key.chunk_seed, sparse=False) + chunker = get_chunker(*chunker_params, key=archive.key, sparse=False) for chunk in chunker.chunkify(file): if not dry_run: chunk_id, data = cached_hash(chunk, archive.key.id_hash) diff --git a/src/borg/chunkers/__init__.py b/src/borg/chunkers/__init__.py index 5f3ded4fc..c3c625760 100644 --- a/src/borg/chunkers/__init__.py +++ b/src/borg/chunkers/__init__.py @@ -3,21 +3,26 @@ from .buzhash64 import ChunkerBuzHash64 from .failing import ChunkerFailing from .fixed import ChunkerFixed from .reader import * # noqa +from ..crypto.key import PlaintextKey API_VERSION = "1.2_01" def get_chunker(algo, *params, **kw): + key = kw.get("key", None) + sparse = kw.get("sparse", False) + # key.chunk_seed only has 32bits + seed = key.chunk_seed if key is not None else 0 + # we want 64bits for buzhash64, get them from crypt_key + if key is None or isinstance(key, PlaintextKey): + seed64 = 0 + else: + seed64 = int.from_bytes(key.crypt_key[:8], byteorder="little") if algo == "buzhash": - seed = kw["seed"] - sparse = kw["sparse"] return Chunker(seed, *params, sparse=sparse) if algo == "buzhash64": - seed = kw["seed"] - sparse = kw["sparse"] - return ChunkerBuzHash64(seed, *params, sparse=sparse) + return ChunkerBuzHash64(seed64, *params, sparse=sparse) if algo == "fixed": - sparse = kw["sparse"] return ChunkerFixed(*params, sparse=sparse) if algo == "fail": return ChunkerFailing(*params) diff --git a/src/borg/testsuite/chunkers/buzhash64_self_test.py b/src/borg/testsuite/chunkers/buzhash64_self_test.py index 60189b4b2..a356afbbf 100644 --- a/src/borg/testsuite/chunkers/buzhash64_self_test.py +++ b/src/borg/testsuite/chunkers/buzhash64_self_test.py @@ -72,6 +72,6 @@ class ChunkerBuzHash64TestCase(BaseTestCase): self.input = self.input[:-1] return self.input[:1] - chunker = get_chunker(*CHUNKER64_PARAMS, seed=0, sparse=False) + chunker = get_chunker(*CHUNKER64_PARAMS, sparse=False) reconstructed = b"".join(cf(chunker.chunkify(SmallReadFile()))) assert reconstructed == b"a" * 20 diff --git a/src/borg/testsuite/chunkers/buzhash_self_test.py b/src/borg/testsuite/chunkers/buzhash_self_test.py index 1c6337047..9baf862f3 100644 --- a/src/borg/testsuite/chunkers/buzhash_self_test.py +++ b/src/borg/testsuite/chunkers/buzhash_self_test.py @@ -69,6 +69,6 @@ class ChunkerTestCase(BaseTestCase): self.input = self.input[:-1] return self.input[:1] - chunker = get_chunker(*CHUNKER_PARAMS, seed=0, sparse=False) + chunker = get_chunker(*CHUNKER_PARAMS, sparse=False) reconstructed = b"".join(cf(chunker.chunkify(SmallReadFile()))) assert reconstructed == b"a" * 20