diff --git a/src/borg/archive.py b/src/borg/archive.py index 1555536d5..c0e2fe0f5 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -19,7 +19,7 @@ from .logger import create_logger logger = create_logger() from . import xattr -from .chunker import get_chunker, max_chunk_size +from .chunker import get_chunker, Chunk from .cache import ChunkListEntry from .crypto.key import key_factory from .compress import Compressor, CompressionSpec @@ -41,6 +41,7 @@ from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi from .helpers import os_open, flags_normal, flags_dir from .helpers import msgpack from .helpers import sig_int +from .lrucache import LRUCache from .patterns import PathPrefixPattern, FnmatchPattern, IECommand from .item import Item, ArchiveItem, ItemDiff from .platform import acl_get, acl_set, set_flags, get_flags, swidth, hostname @@ -336,7 +337,10 @@ class ChunkBuffer: self.buffer.seek(0) # The chunker returns a memoryview to its internal buffer, # thus a copy is needed before resuming the chunker iterator. - chunks = list(bytes(s) for s in self.chunker.chunkify(self.buffer)) + # note: this is the items metadata stream chunker, we only will get CH_DATA allocation here (because there are, + # no all-zero chunks in a metadata stream), thus chunk.data will always be bytes/memoryview and allocation + # is always CH_DATA and never CH_ALLOC/CH_HOLE). + chunks = list(bytes(chunk.data) for chunk in self.chunker.chunkify(self.buffer)) self.buffer.seek(0) self.buffer.truncate(0) # Leave the last partial chunk in the buffer unless flush is True @@ -422,7 +426,6 @@ class Archive: if info is None: raise self.DoesNotExist(name) self.load(info.id) - self.zeros = None def _load_meta(self, id): data = self.key.decrypt(id, self.repository.get(id)) @@ -735,8 +738,6 @@ Utilization of max. archive size: {csize_max:.0%} hardlink_masters) as hardlink_set: if hardlink_set: return - if sparse and self.zeros is None: - self.zeros = b'\0' * max_chunk_size(*self.chunker_params) with backup_io('open'): fd = open(path, 'wb') with fd: @@ -745,7 +746,7 @@ Utilization of max. archive size: {csize_max:.0%} if pi: pi.show(increase=len(data), info=[remove_surrogates(item.path)]) with backup_io('write'): - if sparse and self.zeros.startswith(data): + if sparse and zeros.startswith(data): # all-zero chunk: create a hole in a sparse file fd.seek(len(data), 1) else: @@ -1089,6 +1090,32 @@ class MetadataCollector: return attrs +# remember a few recently used all-zero chunk hashes in this mapping. +# (hash_func, chunk_length) -> chunk_hash +# we play safe and have the hash_func in the mapping key, in case we +# have different hash_funcs within the same borg run. +zero_chunk_ids = LRUCache(10, dispose=lambda _: None) + + +def cached_hash(chunk, id_hash): + allocation = chunk.meta['allocation'] + if allocation == CH_DATA: + data = chunk.data + chunk_id = id_hash(data) + elif allocation in (CH_HOLE, CH_ALLOC): + size = chunk.meta['size'] + assert size <= len(zeros) + data = memoryview(zeros)[:size] + try: + chunk_id = zero_chunk_ids[(id_hash, size)] + except KeyError: + chunk_id = id_hash(data) + zero_chunk_ids[(id_hash, size)] = chunk_id + else: + raise ValueError('unexpected allocation type') + return chunk_id, data + + class ChunksProcessor: # Processes an iterator of chunks for an Item @@ -1133,8 +1160,9 @@ class ChunksProcessor: def process_file_chunks(self, item, cache, stats, show_progress, chunk_iter, chunk_processor=None): if not chunk_processor: - def chunk_processor(data): - chunk_entry = cache.add_chunk(self.key.id_hash(data), data, stats, wait=False) + def chunk_processor(chunk): + chunk_id, data = cached_hash(chunk, self.key.id_hash) + chunk_entry = cache.add_chunk(chunk_id, data, stats, wait=False) self.cache.repository.async_response(wait=False) return chunk_entry @@ -1145,8 +1173,8 @@ class ChunksProcessor: del item.chunks_healthy from_chunk = 0 part_number = 1 - for data in chunk_iter: - item.chunks.append(chunk_processor(data)) + for chunk in chunk_iter: + item.chunks.append(chunk_processor(chunk)) if show_progress: stats.show_progress(item=item, dt=0.2) from_chunk, part_number = self.maybe_checkpoint(item, from_chunk, part_number, forced=False) @@ -1662,8 +1690,8 @@ class ArchiveChecker: If a previously missing file chunk re-appears, the replacement chunk is replaced by the correct one. """ def replacement_chunk(size): - data = bytes(size) - chunk_id = self.key.id_hash(data) + chunk = Chunk(None, allocation=CH_ALLOC, size=size) + chunk_id, data = cached_hash(chunk, self.key.id_hash) cdata = self.key.encrypt(data) csize = len(cdata) return chunk_id, size, csize, cdata @@ -1982,8 +2010,8 @@ class ArchiveRecreater: chunk_processor = partial(self.chunk_processor, target) target.process_file_chunks(item, self.cache, target.stats, self.progress, chunk_iterator, chunk_processor) - def chunk_processor(self, target, data): - chunk_id = self.key.id_hash(data) + def chunk_processor(self, target, chunk): + chunk_id, data = cached_hash(chunk, self.key.id_hash) if chunk_id in self.seen_chunks: return self.cache.chunk_incref(chunk_id, target.stats) overwrite = self.recompress @@ -2007,7 +2035,7 @@ class ArchiveRecreater: yield from target.chunker.chunkify(file) else: for chunk in chunk_iterator: - yield chunk + yield Chunk(chunk, size=len(chunk), allocation=CH_DATA) def save(self, archive, target, comment=None, replace_original=True): if self.dry_run: diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 15abc806d..5ad9030e8 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -453,9 +453,10 @@ class Archiver: def test_files(path, count, size, random): path = os.path.join(path, 'borg-test-data') os.makedirs(path) + z_buff = None if random else memoryview(zeros)[:size] if size <= len(zeros) else b'\0' * size for i in range(count): fname = os.path.join(path, 'file_%d' % i) - data = b'\0' * size if not random else os.urandom(size) + data = z_buff if not random else os.urandom(size) with SyncFile(fname, binary=True) as fd: # used for posix_fadvise's sake fd.write(data) yield path diff --git a/src/borg/chunker.pyx b/src/borg/chunker.pyx index 03122ec4b..ee9773be4 100644 --- a/src/borg/chunker.pyx +++ b/src/borg/chunker.pyx @@ -4,6 +4,9 @@ API_VERSION = '1.2_01' import errno import os +from collections import namedtuple + +from .constants import CH_DATA, CH_ALLOC, CH_HOLE, MAX_DATA_SIZE, zeros from libc.stdlib cimport free @@ -26,6 +29,29 @@ cdef extern from "_chunker.c": has_seek_hole = hasattr(os, 'SEEK_DATA') and hasattr(os, 'SEEK_HOLE') +_Chunk = namedtuple('_Chunk', 'meta data') +_Chunk.__doc__ = """\ + Chunk namedtuple + + meta is always a dictionary, data depends on allocation. + + data chunk read from a DATA range of a file (not from a sparse hole): + meta = {'allocation' = CH_DATA, 'size' = size_of_chunk } + data = read_data [bytes or memoryview] + + all-zero chunk read from a DATA range of a file (not from a sparse hole, but detected to be all-zero): + meta = {'allocation' = CH_ALLOC, 'size' = size_of_chunk } + data = None + + all-zero chunk from a HOLE range of a file (from a sparse hole): + meta = {'allocation' = CH_HOLE, 'size' = size_of_chunk } + data = None +""" + +def Chunk(data, **meta): + return _Chunk(meta, data) + + def dread(offset, size, fd=None, fh=-1): use_fh = fh >= 0 if use_fh: @@ -124,7 +150,7 @@ class ChunkerFixed: # should borg try to do sparse input processing? # whether it actually can be done depends on the input file being seekable. self.try_sparse = sparse and has_seek_hole - self.zeros = memoryview(bytes(block_size)) + assert block_size <= len(zeros) def chunkify(self, fd=None, fh=-1, fmap=None): """ @@ -178,15 +204,22 @@ class ChunkerFixed: if is_data: # read block from the range data = dread(offset, wanted, fd, fh) + got = len(data) + if zeros.startswith(data): + data = None + allocation = CH_ALLOC + else: + allocation = CH_DATA else: # hole # seek over block from the range pos = dseek(wanted, os.SEEK_CUR, fd, fh) - data = self.zeros[:pos - offset] # for now, create zero-bytes here - got = len(data) + got = pos - offset + data = None + allocation = CH_HOLE if got > 0: offset += got range_size -= got - yield data # later, use a better api that tags data vs. hole + yield Chunk(data, size=got, allocation=allocation) if got < wanted: # we did not get enough data, looks like EOF. return @@ -209,6 +242,7 @@ cdef class Chunker: def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size): min_size = 1 << chunk_min_exp max_size = 1 << chunk_max_exp + assert max_size <= len(zeros) # see chunker_process, first while loop condition, first term must be able to get True: assert hash_window_size + min_size + 1 <= max_size, "too small max_size" hash_mask = (1 << hash_mask_bits) - 1 @@ -233,7 +267,17 @@ cdef class Chunker: return self def __next__(self): - return chunker_process(self.chunker) + data = chunker_process(self.chunker) + got = len(data) + # we do not have SEEK_DATA/SEEK_HOLE support in chunker_process C code, + # but we can just check if data was all-zero (and either came from a hole + # or from stored zeros - we can not detect that here). + if zeros.startswith(data): + data = None + allocation = CH_ALLOC + else: + allocation = CH_DATA + return Chunk(data, size=got, allocation=allocation) def get_chunker(algo, *params, **kw): @@ -246,15 +290,6 @@ def get_chunker(algo, *params, **kw): raise TypeError('unsupported chunker algo %r' % algo) -def max_chunk_size(algo, *params): - # see also parseformat.ChunkerParams return values - if algo == 'buzhash': - return 1 << params[1] - if algo == 'fixed': - return max(params[0], params[1]) - raise TypeError('unsupported chunker algo %r' % algo) - - def buzhash(data, unsigned long seed): cdef uint32_t *table cdef uint32_t sum diff --git a/src/borg/constants.py b/src/borg/constants.py index a20719c65..1bd9bb6dd 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -45,6 +45,10 @@ assert MAX_OBJECT_SIZE == 20 * 1024 * 1024 # repo config max_segment_size value must be below this limit to stay within uint32 offsets: MAX_SEGMENT_SIZE_LIMIT = 2 ** 32 - MAX_OBJECT_SIZE +# have one all-zero bytes object +# we use it at all places where we need to detect or create all-zero buffers +zeros = bytes(MAX_DATA_SIZE) + # borg.remote read() buffer size BUFSIZE = 10 * 1024 * 1024 @@ -75,6 +79,9 @@ CHUNKER_PARAMS = (CH_BUZHASH, CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH # chunker params for the items metadata stream, finer granularity ITEMS_CHUNKER_PARAMS = (CH_BUZHASH, 15, 19, 17, HASH_WINDOW_SIZE) +# normal on-disk data, allocated (but not written, all zeros), not allocated hole (all zeros) +CH_DATA, CH_ALLOC, CH_HOLE = 0, 1, 2 + # operating mode of the files cache (for fast skipping of unchanged files) DEFAULT_FILES_CACHE_MODE_UI = 'ctime,size,inode' DEFAULT_FILES_CACHE_MODE = 'cis' # == CacheMode(DEFAULT_FILES_CACHE_MODE_UI) diff --git a/src/borg/testsuite/benchmark.py b/src/borg/testsuite/benchmark.py index 1e70a101f..f3ec06f2a 100644 --- a/src/borg/testsuite/benchmark.py +++ b/src/borg/testsuite/benchmark.py @@ -11,6 +11,7 @@ import os import pytest from .archiver import changedir, cmd +from ..constants import zeros @pytest.fixture @@ -34,12 +35,13 @@ def repo(request, cmd, repo_url): @pytest.fixture(scope='session', params=["zeros", "random"]) def testdata(request, tmpdir_factory): count, size = 10, 1000*1000 + assert size <= len(zeros) p = tmpdir_factory.mktemp('data') data_type = request.param if data_type == 'zeros': # do not use a binary zero (\0) to avoid sparse detection def data(size): - return b'0' * size + return memoryview(zeros)[:size] elif data_type == 'random': def data(size): return os.urandom(size) diff --git a/src/borg/testsuite/chunker.py b/src/borg/testsuite/chunker.py index df79441b6..1b275978c 100644 --- a/src/borg/testsuite/chunker.py +++ b/src/borg/testsuite/chunker.py @@ -8,18 +8,32 @@ from . import BaseTestCase # See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT +def cf(chunks): + """chunk filter""" + # this is to simplify testing: either return the data piece (bytes) or the hole length (int). + def _cf(chunk): + if chunk.meta['allocation'] == CH_DATA: + assert len(chunk.data) == chunk.meta['size'] + return bytes(chunk.data) # make sure we have bytes, not memoryview + if chunk.meta['allocation'] in (CH_HOLE, CH_ALLOC): + assert chunk.data is None + return chunk.meta['size'] + assert False, "unexpected allocation value" + return [_cf(chunk) for chunk in chunks] + + class ChunkerFixedTestCase(BaseTestCase): def test_chunkify_just_blocks(self): data = b'foobar' * 1500 chunker = ChunkerFixed(4096) - parts = [c for c in chunker.chunkify(BytesIO(data))] + parts = cf(chunker.chunkify(BytesIO(data))) self.assert_equal(parts, [data[0:4096], data[4096:8192], data[8192:]]) def test_chunkify_header_and_blocks(self): data = b'foobar' * 1500 chunker = ChunkerFixed(4096, 123) - parts = [c for c in chunker.chunkify(BytesIO(data))] + parts = cf(chunker.chunkify(BytesIO(data))) self.assert_equal(parts, [data[0:123], data[123:123+4096], data[123+4096:123+8192], data[123+8192:]]) def test_chunkify_just_blocks_fmap_complete(self): @@ -30,7 +44,7 @@ class ChunkerFixedTestCase(BaseTestCase): (4096, 8192, True), (8192, 99999999, True), ] - parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)] + parts = cf(chunker.chunkify(BytesIO(data), fmap=fmap)) self.assert_equal(parts, [data[0:4096], data[4096:8192], data[8192:]]) def test_chunkify_header_and_blocks_fmap_complete(self): @@ -42,7 +56,7 @@ class ChunkerFixedTestCase(BaseTestCase): (123+4096, 4096, True), (123+8192, 4096, True), ] - parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)] + parts = cf(chunker.chunkify(BytesIO(data), fmap=fmap)) self.assert_equal(parts, [data[0:123], data[123:123+4096], data[123+4096:123+8192], data[123+8192:]]) def test_chunkify_header_and_blocks_fmap_zeros(self): @@ -54,9 +68,9 @@ class ChunkerFixedTestCase(BaseTestCase): (123+4096, 4096, True), (123+8192, 4096, False), ] - parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)] - # because we marked the '_' ranges as holes, we will get '\0' ranges instead! - self.assert_equal(parts, [data[0:123], b'\0' * 4096, data[123+4096:123+8192], b'\0' * 4096]) + parts = cf(chunker.chunkify(BytesIO(data), fmap=fmap)) + # because we marked the '_' ranges as holes, we will get hole ranges instead! + self.assert_equal(parts, [data[0:123], 4096, data[123+4096:123+8192], 4096]) def test_chunkify_header_and_blocks_fmap_partial(self): data = b'H' * 123 + b'_' * 4096 + b'X' * 4096 + b'_' * 4096 @@ -67,7 +81,7 @@ class ChunkerFixedTestCase(BaseTestCase): (123+4096, 4096, True), # (123+8192, 4096, False), ] - parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)] + parts = cf(chunker.chunkify(BytesIO(data), fmap=fmap)) # because we left out the '_' ranges from the fmap, we will not get them at all! self.assert_equal(parts, [data[0:123], data[123+4096:123+8192]]) @@ -76,19 +90,19 @@ class ChunkerTestCase(BaseTestCase): def test_chunkify(self): data = b'0' * int(1.5 * (1 << CHUNK_MAX_EXP)) + b'Y' - parts = [bytes(c) for c in Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(data))] + parts = cf(Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(data))) self.assert_equal(len(parts), 2) self.assert_equal(b''.join(parts), data) - self.assert_equal([bytes(c) for c in Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b''))], []) - self.assert_equal([bytes(c) for c in Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz']) - self.assert_equal([bytes(c) for c in Chunker(1, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz']) - self.assert_equal([bytes(c) for c in Chunker(2, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz']) - self.assert_equal([bytes(c) for c in Chunker(0, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3]) - self.assert_equal([bytes(c) for c in Chunker(1, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz']) - self.assert_equal([bytes(c) for c in Chunker(2, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz']) - self.assert_equal([bytes(c) for c in Chunker(0, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3]) - self.assert_equal([bytes(c) for c in Chunker(1, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarbo', b'obazfoobar', b'boobazfo', b'obarboobaz']) - self.assert_equal([bytes(c) for c in Chunker(2, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz', b'foobarboobaz', b'foobarboobaz']) + self.assert_equal(cf(Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b''))), []) + self.assert_equal(cf(Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz']) + self.assert_equal(cf(Chunker(1, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz']) + self.assert_equal(cf(Chunker(2, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz']) + self.assert_equal(cf(Chunker(0, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'foobarboobaz' * 3]) + self.assert_equal(cf(Chunker(1, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz']) + self.assert_equal(cf(Chunker(2, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz']) + self.assert_equal(cf(Chunker(0, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'foobarboobaz' * 3]) + self.assert_equal(cf(Chunker(1, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'foobarbo', b'obazfoobar', b'boobazfo', b'obarboobaz']) + self.assert_equal(cf(Chunker(2, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'foobarboobaz', b'foobarboobaz', b'foobarboobaz']) def test_buzhash(self): self.assert_equal(buzhash(b'abcdefghijklmnop', 0), 3795437769) @@ -106,5 +120,5 @@ class ChunkerTestCase(BaseTestCase): return self.input[:1] chunker = get_chunker(*CHUNKER_PARAMS, seed=0) - reconstructed = b''.join(chunker.chunkify(SmallReadFile())) + reconstructed = b''.join(cf(chunker.chunkify(SmallReadFile()))) assert reconstructed == b'a' * 20 diff --git a/src/borg/testsuite/chunker_pytest.py b/src/borg/testsuite/chunker_pytest.py index daa46bb38..59c7a4515 100644 --- a/src/borg/testsuite/chunker_pytest.py +++ b/src/borg/testsuite/chunker_pytest.py @@ -4,6 +4,7 @@ import tempfile import pytest +from .chunker import cf from ..chunker import ChunkerFixed, sparsemap, has_seek_hole from ..constants import * # NOQA @@ -50,20 +51,18 @@ def make_sparsefile(fname, sparsemap, header_size=0): def make_content(sparsemap, header_size=0): - with BytesIO() as fd: - total = 0 - if header_size: - fd.write(b'H' * header_size) - total += header_size - for offset, size, is_data in sparsemap: - if is_data: - fd.write(b'X' * size) - else: - fd.write(b'\0' * size) - total += size - content = fd.getvalue() - assert len(content) == total - return content + result = [] + total = 0 + if header_size: + result.append(b'H' * header_size) + total += header_size + for offset, size, is_data in sparsemap: + if is_data: + result.append(b'X' * size) # bytes! + else: + result.append(size) # int! + total += size + return result def fs_supports_sparse(): @@ -132,7 +131,7 @@ def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse): def get_chunks(fname, sparse, header_size): chunker = ChunkerFixed(4096, header_size=header_size, sparse=sparse) with open(fname, 'rb') as fd: - return b''.join([c for c in chunker.chunkify(fd)]) + return cf(chunker.chunkify(fd)) fn = str(tmpdir / fname) make_sparsefile(fn, sparse_map, header_size=header_size) diff --git a/src/borg/testsuite/chunker_slow.py b/src/borg/testsuite/chunker_slow.py index 2739a735a..4247e2730 100644 --- a/src/borg/testsuite/chunker_slow.py +++ b/src/borg/testsuite/chunker_slow.py @@ -1,6 +1,7 @@ from io import BytesIO from binascii import unhexlify +from .chunker import cf from ..chunker import Chunker from ..crypto.low_level import blake2b_256 from ..constants import * # NOQA @@ -30,7 +31,7 @@ class ChunkerRegressionTestCase(BaseTestCase): for seed in (1849058162, 1234567653): fh = BytesIO(data) chunker = Chunker(seed, minexp, maxexp, maskbits, winsize) - chunks = [blake2b_256(b'', c) for c in chunker.chunkify(fh, -1)] + chunks = [blake2b_256(b'', c) for c in cf(chunker.chunkify(fh, -1))] runs.append(blake2b_256(b'', b''.join(chunks))) # The "correct" hash below matches the existing chunker behavior.