From 7319f85b546bff883bde7155f28b453a9dc87f93 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 15 Dec 2020 00:26:32 +0100 Subject: [PATCH] adapt the existing chunker tests --- src/borg/testsuite/chunker.py | 54 +++++++++++++++++----------- src/borg/testsuite/chunker_pytest.py | 29 ++++++++------- src/borg/testsuite/chunker_slow.py | 3 +- 3 files changed, 50 insertions(+), 36 deletions(-) diff --git a/src/borg/testsuite/chunker.py b/src/borg/testsuite/chunker.py index df79441b6..7a0db7d36 100644 --- a/src/borg/testsuite/chunker.py +++ b/src/borg/testsuite/chunker.py @@ -8,18 +8,32 @@ from . import BaseTestCase # See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT +def cf(chunks): + """chunk filter""" + # this is to simplify testing: either return the data piece (bytes) or the hole length (int). + def _cf(chunk): + if chunk.meta['allocation'] == CH_DATA: + assert len(chunk.data) == chunk.meta['size'] + return bytes(chunk.data) # make sure we have bytes, not memoryview + if chunk.meta['allocation'] == CH_HOLE: + assert chunk.data is None + return chunk.meta['size'] + assert False, "unexpected allocation value" + return [_cf(chunk) for chunk in chunks] + + class ChunkerFixedTestCase(BaseTestCase): def test_chunkify_just_blocks(self): data = b'foobar' * 1500 chunker = ChunkerFixed(4096) - parts = [c for c in chunker.chunkify(BytesIO(data))] + parts = cf(chunker.chunkify(BytesIO(data))) self.assert_equal(parts, [data[0:4096], data[4096:8192], data[8192:]]) def test_chunkify_header_and_blocks(self): data = b'foobar' * 1500 chunker = ChunkerFixed(4096, 123) - parts = [c for c in chunker.chunkify(BytesIO(data))] + parts = cf(chunker.chunkify(BytesIO(data))) self.assert_equal(parts, [data[0:123], data[123:123+4096], data[123+4096:123+8192], data[123+8192:]]) def test_chunkify_just_blocks_fmap_complete(self): @@ -30,7 +44,7 @@ class ChunkerFixedTestCase(BaseTestCase): (4096, 8192, True), (8192, 99999999, True), ] - parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)] + parts = cf(chunker.chunkify(BytesIO(data), fmap=fmap)) self.assert_equal(parts, [data[0:4096], data[4096:8192], data[8192:]]) def test_chunkify_header_and_blocks_fmap_complete(self): @@ -42,7 +56,7 @@ class ChunkerFixedTestCase(BaseTestCase): (123+4096, 4096, True), (123+8192, 4096, True), ] - parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)] + parts = cf(chunker.chunkify(BytesIO(data), fmap=fmap)) self.assert_equal(parts, [data[0:123], data[123:123+4096], data[123+4096:123+8192], data[123+8192:]]) def test_chunkify_header_and_blocks_fmap_zeros(self): @@ -54,9 +68,9 @@ class ChunkerFixedTestCase(BaseTestCase): (123+4096, 4096, True), (123+8192, 4096, False), ] - parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)] - # because we marked the '_' ranges as holes, we will get '\0' ranges instead! - self.assert_equal(parts, [data[0:123], b'\0' * 4096, data[123+4096:123+8192], b'\0' * 4096]) + parts = cf(chunker.chunkify(BytesIO(data), fmap=fmap)) + # because we marked the '_' ranges as holes, we will get hole ranges instead! + self.assert_equal(parts, [data[0:123], 4096, data[123+4096:123+8192], 4096]) def test_chunkify_header_and_blocks_fmap_partial(self): data = b'H' * 123 + b'_' * 4096 + b'X' * 4096 + b'_' * 4096 @@ -67,7 +81,7 @@ class ChunkerFixedTestCase(BaseTestCase): (123+4096, 4096, True), # (123+8192, 4096, False), ] - parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)] + parts = cf(chunker.chunkify(BytesIO(data), fmap=fmap)) # because we left out the '_' ranges from the fmap, we will not get them at all! self.assert_equal(parts, [data[0:123], data[123+4096:123+8192]]) @@ -76,19 +90,19 @@ class ChunkerTestCase(BaseTestCase): def test_chunkify(self): data = b'0' * int(1.5 * (1 << CHUNK_MAX_EXP)) + b'Y' - parts = [bytes(c) for c in Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(data))] + parts = cf(Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(data))) self.assert_equal(len(parts), 2) self.assert_equal(b''.join(parts), data) - self.assert_equal([bytes(c) for c in Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b''))], []) - self.assert_equal([bytes(c) for c in Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz']) - self.assert_equal([bytes(c) for c in Chunker(1, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz']) - self.assert_equal([bytes(c) for c in Chunker(2, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz']) - self.assert_equal([bytes(c) for c in Chunker(0, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3]) - self.assert_equal([bytes(c) for c in Chunker(1, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz']) - self.assert_equal([bytes(c) for c in Chunker(2, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz']) - self.assert_equal([bytes(c) for c in Chunker(0, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3]) - self.assert_equal([bytes(c) for c in Chunker(1, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarbo', b'obazfoobar', b'boobazfo', b'obarboobaz']) - self.assert_equal([bytes(c) for c in Chunker(2, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz', b'foobarboobaz', b'foobarboobaz']) + self.assert_equal(cf(Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b''))), []) + self.assert_equal(cf(Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz']) + self.assert_equal(cf(Chunker(1, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz']) + self.assert_equal(cf(Chunker(2, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz']) + self.assert_equal(cf(Chunker(0, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'foobarboobaz' * 3]) + self.assert_equal(cf(Chunker(1, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz']) + self.assert_equal(cf(Chunker(2, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz']) + self.assert_equal(cf(Chunker(0, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'foobarboobaz' * 3]) + self.assert_equal(cf(Chunker(1, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'foobarbo', b'obazfoobar', b'boobazfo', b'obarboobaz']) + self.assert_equal(cf(Chunker(2, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))), [b'foobarboobaz', b'foobarboobaz', b'foobarboobaz']) def test_buzhash(self): self.assert_equal(buzhash(b'abcdefghijklmnop', 0), 3795437769) @@ -106,5 +120,5 @@ class ChunkerTestCase(BaseTestCase): return self.input[:1] chunker = get_chunker(*CHUNKER_PARAMS, seed=0) - reconstructed = b''.join(chunker.chunkify(SmallReadFile())) + reconstructed = b''.join(cf(chunker.chunkify(SmallReadFile()))) assert reconstructed == b'a' * 20 diff --git a/src/borg/testsuite/chunker_pytest.py b/src/borg/testsuite/chunker_pytest.py index daa46bb38..59c7a4515 100644 --- a/src/borg/testsuite/chunker_pytest.py +++ b/src/borg/testsuite/chunker_pytest.py @@ -4,6 +4,7 @@ import tempfile import pytest +from .chunker import cf from ..chunker import ChunkerFixed, sparsemap, has_seek_hole from ..constants import * # NOQA @@ -50,20 +51,18 @@ def make_sparsefile(fname, sparsemap, header_size=0): def make_content(sparsemap, header_size=0): - with BytesIO() as fd: - total = 0 - if header_size: - fd.write(b'H' * header_size) - total += header_size - for offset, size, is_data in sparsemap: - if is_data: - fd.write(b'X' * size) - else: - fd.write(b'\0' * size) - total += size - content = fd.getvalue() - assert len(content) == total - return content + result = [] + total = 0 + if header_size: + result.append(b'H' * header_size) + total += header_size + for offset, size, is_data in sparsemap: + if is_data: + result.append(b'X' * size) # bytes! + else: + result.append(size) # int! + total += size + return result def fs_supports_sparse(): @@ -132,7 +131,7 @@ def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse): def get_chunks(fname, sparse, header_size): chunker = ChunkerFixed(4096, header_size=header_size, sparse=sparse) with open(fname, 'rb') as fd: - return b''.join([c for c in chunker.chunkify(fd)]) + return cf(chunker.chunkify(fd)) fn = str(tmpdir / fname) make_sparsefile(fn, sparse_map, header_size=header_size) diff --git a/src/borg/testsuite/chunker_slow.py b/src/borg/testsuite/chunker_slow.py index 2739a735a..4247e2730 100644 --- a/src/borg/testsuite/chunker_slow.py +++ b/src/borg/testsuite/chunker_slow.py @@ -1,6 +1,7 @@ from io import BytesIO from binascii import unhexlify +from .chunker import cf from ..chunker import Chunker from ..crypto.low_level import blake2b_256 from ..constants import * # NOQA @@ -30,7 +31,7 @@ class ChunkerRegressionTestCase(BaseTestCase): for seed in (1849058162, 1234567653): fh = BytesIO(data) chunker = Chunker(seed, minexp, maxexp, maskbits, winsize) - chunks = [blake2b_256(b'', c) for c in chunker.chunkify(fh, -1)] + chunks = [blake2b_256(b'', c) for c in cf(chunker.chunkify(fh, -1))] runs.append(blake2b_256(b'', b''.join(chunks))) # The "correct" hash below matches the existing chunker behavior.