get_chunker: give it the key instead of the seed

the buzhash seed only has 32bits, but we rather
want 64bits for buzhash64.

just take them from crypt_key for now.
This commit is contained in:
Thomas Waldmann 2025-06-06 01:52:29 +02:00
parent 6f55cba0ce
commit 544b3f41a9
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
6 changed files with 20 additions and 15 deletions

View file

@ -351,7 +351,7 @@ class ChunkBuffer:
self.packer = msgpack.Packer()
self.chunks = []
self.key = key
self.chunker = get_chunker(*chunker_params, seed=self.key.chunk_seed, sparse=False)
self.chunker = get_chunker(*chunker_params, key=self.key, sparse=False)
self.saved_chunks_len = None
def add(self, item):
@ -1227,7 +1227,7 @@ class FilesystemObjectProcessors:
self.hlm = HardLinkManager(id_type=tuple, info_type=(list, type(None))) # (dev, ino) -> chunks or None
self.stats = Statistics(output_json=log_json, iec=iec) # threading: done by cache (including progress)
self.cwd = os.getcwd()
self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=sparse)
self.chunker = get_chunker(*chunker_params, key=key, sparse=sparse)
@contextmanager
def create_helper(self, path, st, status=None, hardlinkable=True, strip_prefix=None):
@ -1502,7 +1502,7 @@ class TarfileObjectProcessors:
self.print_file_status = file_status_printer or (lambda *args: None)
self.stats = Statistics(output_json=log_json, iec=iec) # threading: done by cache (including progress)
self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=False)
self.chunker = get_chunker(*chunker_params, key=key, sparse=False)
self.hlm = HardLinkManager(id_type=str, info_type=list) # path -> chunks
@contextmanager
@ -2325,7 +2325,7 @@ class ArchiveRecreater:
target.process_file_chunks = ChunksProcessor(
cache=self.cache, key=self.key, add_item=target.add_item, rechunkify=target.recreate_rechunkify
).process_file_chunks
target.chunker = get_chunker(*target.chunker_params, seed=self.key.chunk_seed, sparse=False)
target.chunker = get_chunker(*target.chunker_params, key=self.key, sparse=False)
return target
def create_target_archive(self, name):

View file

@ -146,8 +146,8 @@ class BenchmarkMixIn:
pass
for spec, func in [
("buzhash,19,23,21,4095", lambda: chunkit("buzhash", 19, 23, 21, 4095, seed=0, sparse=False)),
("buzhash64,19,23,21,4095", lambda: chunkit("buzhash64", 19, 23, 21, 4095, seed=0, sparse=False)),
("buzhash,19,23,21,4095", lambda: chunkit("buzhash", 19, 23, 21, 4095, sparse=False)),
("buzhash64,19,23,21,4095", lambda: chunkit("buzhash64", 19, 23, 21, 4095, sparse=False)),
("fixed,1048576", lambda: chunkit("fixed", 1048576, sparse=False)),
]:
print(f"{spec:<24} {size:<10} {timeit(func, number=100):.3f}s")

View file

@ -41,7 +41,7 @@ def transfer_chunks(
file = ChunkIteratorFileWrapper(chunk_iterator)
# Create a chunker with the specified parameters
chunker = get_chunker(*chunker_params, seed=archive.key.chunk_seed, sparse=False)
chunker = get_chunker(*chunker_params, key=archive.key, sparse=False)
for chunk in chunker.chunkify(file):
if not dry_run:
chunk_id, data = cached_hash(chunk, archive.key.id_hash)

View file

@ -3,21 +3,26 @@ from .buzhash64 import ChunkerBuzHash64
from .failing import ChunkerFailing
from .fixed import ChunkerFixed
from .reader import * # noqa
from ..crypto.key import PlaintextKey
API_VERSION = "1.2_01"
def get_chunker(algo, *params, **kw):
key = kw.get("key", None)
sparse = kw.get("sparse", False)
# key.chunk_seed only has 32bits
seed = key.chunk_seed if key is not None else 0
# we want 64bits for buzhash64, get them from crypt_key
if key is None or isinstance(key, PlaintextKey):
seed64 = 0
else:
seed64 = int.from_bytes(key.crypt_key[:8], byteorder="little")
if algo == "buzhash":
seed = kw["seed"]
sparse = kw["sparse"]
return Chunker(seed, *params, sparse=sparse)
if algo == "buzhash64":
seed = kw["seed"]
sparse = kw["sparse"]
return ChunkerBuzHash64(seed, *params, sparse=sparse)
return ChunkerBuzHash64(seed64, *params, sparse=sparse)
if algo == "fixed":
sparse = kw["sparse"]
return ChunkerFixed(*params, sparse=sparse)
if algo == "fail":
return ChunkerFailing(*params)

View file

@ -72,6 +72,6 @@ class ChunkerBuzHash64TestCase(BaseTestCase):
self.input = self.input[:-1]
return self.input[:1]
chunker = get_chunker(*CHUNKER64_PARAMS, seed=0, sparse=False)
chunker = get_chunker(*CHUNKER64_PARAMS, sparse=False)
reconstructed = b"".join(cf(chunker.chunkify(SmallReadFile())))
assert reconstructed == b"a" * 20

View file

@ -69,6 +69,6 @@ class ChunkerTestCase(BaseTestCase):
self.input = self.input[:-1]
return self.input[:1]
chunker = get_chunker(*CHUNKER_PARAMS, seed=0, sparse=False)
chunker = get_chunker(*CHUNKER_PARAMS, sparse=False)
reconstructed = b"".join(cf(chunker.chunkify(SmallReadFile())))
assert reconstructed == b"a" * 20