mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-10 17:32:13 -04:00
buzhash64: use own CSPRNG
This commit is contained in:
parent
bb7a4647ea
commit
3617b63336
4 changed files with 21 additions and 18 deletions
|
|
@ -13,7 +13,9 @@ def get_chunker(algo, *params, **kw):
|
|||
# key.chunk_seed only has 32bits
|
||||
seed = key.chunk_seed if key is not None else 0
|
||||
# for buzhash64, we want a much longer key, so we derive it from the id key
|
||||
bh64_key = key.derive_key(salt=b"", domain=b"buzhash64", size=32, from_id_key=True) if key is not None else b""
|
||||
bh64_key = (
|
||||
key.derive_key(salt=b"", domain=b"buzhash64", size=32, from_id_key=True) if key is not None else b"\0" * 32
|
||||
)
|
||||
if algo == "buzhash":
|
||||
return Chunker(seed, *params, sparse=sparse)
|
||||
if algo == "buzhash64":
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
API_VERSION = '1.2_01'
|
||||
|
||||
import cython
|
||||
import random
|
||||
import time
|
||||
|
||||
from cpython.bytes cimport PyBytes_AsString
|
||||
|
|
@ -11,6 +10,8 @@ from libc.stdint cimport uint8_t, uint64_t
|
|||
from libc.stdlib cimport malloc, free
|
||||
from libc.string cimport memcpy, memmove
|
||||
|
||||
from ..crypto.low_level import CSPRNG
|
||||
|
||||
from ..constants import CH_DATA, CH_ALLOC, CH_HOLE, zeros
|
||||
from .reader import FileReader, Chunk
|
||||
|
||||
|
|
@ -45,7 +46,7 @@ cdef uint64_t* buzhash64_init_table(bytes key):
|
|||
Balanced means that for each bit position 0..63, exactly 50% of the table values have the bit set to 1.
|
||||
"""
|
||||
# Create deterministic random number generator
|
||||
rng = random.Random(int.from_bytes(key, 'big'))
|
||||
rng = CSPRNG(key)
|
||||
|
||||
cdef int i, j, bit_pos
|
||||
cdef uint64_t* table = <uint64_t*>malloc(2048) # 256 * sizeof(uint64_t)
|
||||
|
|
|
|||
|
|
@ -25,50 +25,50 @@ class ChunkerBuzHash64TestCase(BaseTestCase):
|
|||
self.assert_equal(cf(ChunkerBuzHash64(key0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b""))), [])
|
||||
self.assert_equal(
|
||||
cf(ChunkerBuzHash64(key0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b"foobarboobaz" * 3))),
|
||||
[b"foobarboobaz", b"foobarboobaz", b"foobarboobaz"],
|
||||
[b"foobarb", b"ooba", b"zf", b"oobarb", b"ooba", b"zf", b"oobarb", b"oobaz"],
|
||||
)
|
||||
self.assert_equal(
|
||||
cf(ChunkerBuzHash64(key1, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b"foobarboobaz" * 3))),
|
||||
[b"foobar", b"boob", b"az", b"foobar", b"boob", b"az", b"foobar", b"boobaz"],
|
||||
[b"fo", b"oba", b"rb", b"oob", b"azf", b"ooba", b"rb", b"oob", b"azf", b"ooba", b"rb", b"oobaz"],
|
||||
)
|
||||
self.assert_equal(
|
||||
cf(ChunkerBuzHash64(key2, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b"foobarboobaz" * 3))),
|
||||
[b"foobarb", b"oob", b"az", b"foobarb", b"oob", b"az", b"foobarb", b"oobaz"],
|
||||
[b"foobar", b"booba", b"zfoobar", b"booba", b"zfoobar", b"boobaz"],
|
||||
)
|
||||
self.assert_equal(
|
||||
cf(ChunkerBuzHash64(key0, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
|
||||
[b"foobarb", b"oobazf", b"oobarb", b"oobazf", b"oobarb", b"oobaz"],
|
||||
[b"foobarbo", b"obaz", b"foobarbo", b"obaz", b"foobarbo", b"obaz"],
|
||||
)
|
||||
self.assert_equal(
|
||||
cf(ChunkerBuzHash64(key1, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
|
||||
[b"foobarb", b"oobaz", b"foobarb", b"oobaz", b"foobarb", b"oobaz"],
|
||||
[b"foobarboob", b"azfoobarboob", b"azfoobarboobaz"],
|
||||
)
|
||||
self.assert_equal(
|
||||
cf(ChunkerBuzHash64(key2, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
|
||||
[b"foobarbooba", b"zfoobarbooba", b"zfoobarboobaz"],
|
||||
[b"foob", b"arboobazfoob", b"arboobazfoob", b"arboobaz"],
|
||||
)
|
||||
self.assert_equal(
|
||||
cf(ChunkerBuzHash64(key0, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
|
||||
[b"foobarboobazf", b"oobarboobazf", b"oobarboobaz"],
|
||||
[b"foobarbo", b"obazfoobarbo", b"obazfoobarbo", b"obaz"],
|
||||
)
|
||||
self.assert_equal(
|
||||
cf(ChunkerBuzHash64(key1, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
|
||||
[b"foobarbo", b"obazfoobarb", b"oobazfoobarb", b"oobaz"],
|
||||
[b"foobarboob", b"azfoobarboob", b"azfoobarboobaz"],
|
||||
)
|
||||
self.assert_equal(
|
||||
cf(ChunkerBuzHash64(key2, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
|
||||
[b"foobarbooba", b"zfoobarbooba", b"zfoobarboobaz"],
|
||||
[b"foobarboobazfoob", b"arboobazfoob", b"arboobaz"],
|
||||
)
|
||||
|
||||
def test_buzhash64(self):
|
||||
self.assert_equal(buzhash64(b"abcdefghijklmnop", key0), 15080163834872228739)
|
||||
self.assert_equal(buzhash64(b"abcdefghijklmnop", key1), 9505908538285923444)
|
||||
self.assert_equal(buzhash64(b"abcdefghijklmnop", key0), 17414563089559790077)
|
||||
self.assert_equal(buzhash64(b"abcdefghijklmnop", key1), 1397285894609271345)
|
||||
expected = buzhash64(b"abcdefghijklmnop", key0)
|
||||
previous = buzhash64(b"Xabcdefghijklmno", key0)
|
||||
this = buzhash64_update(previous, ord("X"), ord("p"), 16, key0)
|
||||
self.assert_equal(this, expected)
|
||||
# Test with more than 63 bytes to make sure our barrel_shift macro works correctly
|
||||
self.assert_equal(buzhash64(b"abcdefghijklmnopqrstuvwxyz" * 4, key0), 1936382207158378368)
|
||||
self.assert_equal(buzhash64(b"abcdefghijklmnopqrstuvwxyz" * 4, key0), 17683050804041322250)
|
||||
|
||||
def test_small_reads64(self):
|
||||
class SmallReadFile:
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ def test_chunkpoints64_unchanged():
|
|||
if minexp >= maxexp:
|
||||
continue
|
||||
for maskbits in (4, 7, 10, 12):
|
||||
for key in (b"first_key", b"second_key"):
|
||||
for key in (key0, key1):
|
||||
fh = BytesIO(data)
|
||||
chunker = ChunkerBuzHash64(key, minexp, maxexp, maskbits, winsize)
|
||||
chunks = [H(c) for c in cf(chunker.chunkify(fh, -1))]
|
||||
|
|
@ -46,13 +46,13 @@ def test_chunkpoints64_unchanged():
|
|||
# Future chunker optimisations must not change this, or existing repos will bloat.
|
||||
overall_hash = H(b"".join(runs))
|
||||
print(overall_hash.hex())
|
||||
assert overall_hash == hex_to_bin("db4b37fbe0cb841d79cfbb52bff8ac2f11040bf83a7d389640c7afb314fc4bfb")
|
||||
assert overall_hash == hex_to_bin("676676133fb3621ada0f6cc1b18002c3e37016c9469217d18f8e382fadaf23fd")
|
||||
|
||||
|
||||
def test_buzhash64_chunksize_distribution():
|
||||
data = os.urandom(1048576)
|
||||
min_exp, max_exp, mask = 10, 16, 14 # chunk size target 16kiB, clip at 1kiB and 64kiB
|
||||
chunker = ChunkerBuzHash64(b"", min_exp, max_exp, mask, 4095)
|
||||
chunker = ChunkerBuzHash64(key0, min_exp, max_exp, mask, 4095)
|
||||
f = BytesIO(data)
|
||||
chunks = cf(chunker.chunkify(f))
|
||||
del chunks[-1] # get rid of the last chunk, it can be smaller than 2**min_exp
|
||||
|
|
|
|||
Loading…
Reference in a new issue