From 01f72d15b4c4ebd5dd21e6787dacf0b44d433d5c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 3 May 2022 20:51:43 +0200 Subject: [PATCH] transfer: remove the zlib type bytes hack hack: see the docstring of ZLIB_legacy class. New clean ZLIB class that works as every other compressor. ZLIB ID 0x0500, ZLIB_legacy ID 0x.8.. --- src/borg/archiver.py | 8 +++++- src/borg/compress.pyx | 51 ++++++++++++++++++++++++++++++---- src/borg/testsuite/archiver.py | 4 +-- src/borg/testsuite/compress.py | 4 +-- 4 files changed, 56 insertions(+), 11 deletions(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 0c5dee5f1..098208167 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -44,7 +44,7 @@ try: from .archive import has_link from .cache import Cache, assert_secure, SecurityManager from .constants import * # NOQA - from .compress import CompressionSpec + from .compress import CompressionSpec, ZLIB, ZLIB_legacy from .crypto.key import key_creator, key_argument_names, tam_required_file, tam_required from .crypto.key import RepoKey, KeyfileKey, Blake2RepoKey, Blake2KeyfileKey, FlexiKey from .crypto.keymanager import KeyManager @@ -351,6 +351,11 @@ class Archiver: item.get_size(memorize=True) # if not already present: compute+remember size for items with chunks return item + def upgrade_compressed_chunk(chunk): + if ZLIB_legacy.detect(chunk): + chunk = ZLIB.ID + chunk # get rid of the attic legacy: prepend separate type bytes for zlib + return chunk + dry_run = args.dry_run args.consider_checkpoints = True @@ -378,6 +383,7 @@ class Archiver: cdata = other_repository.get(chunk_id) # keep compressed payload same, avoid decompression / recompression data = other_key.decrypt(chunk_id, cdata, decompress=False) + data = upgrade_compressed_chunk(data) chunk_entry = cache.add_chunk(chunk_id, data, archive.stats, wait=False, compress=False, size=size) cache.repository.async_response(wait=False) diff --git a/src/borg/compress.pyx b/src/borg/compress.pyx index 2e0eb4809..7997456c6 100644 --- a/src/borg/compress.pyx +++ b/src/borg/compress.pyx @@ -331,14 +331,52 @@ class ZSTD(DecidingCompressor): return dest[:osize] -class ZLIB(CompressorBase): +class ZLIB(DecidingCompressor): """ zlib compression / decompression (python stdlib) """ - ID = b'\x08\x00' # not used here, see detect() - # avoid all 0x.8.. IDs elsewhere! + ID = b'\x05\x00' name = 'zlib' + def __init__(self, level=6, **kwargs): + super().__init__(**kwargs) + self.level = level + + def _decide(self, data): + """ + Decides what to do with *data*. Returns (compressor, zlib_data). + + *zlib_data* is the ZLIB result if *compressor* is ZLIB as well, otherwise it is None. + """ + zlib_data = zlib.compress(data, self.level) + if len(zlib_data) < len(data): + return self, zlib_data + else: + return NONE_COMPRESSOR, None + + def decompress(self, data): + data = super().decompress(data) + try: + return zlib.decompress(data) + except zlib.error as e: + raise DecompressionError(str(e)) from None + + +class ZLIB_legacy(CompressorBase): + """ + zlib compression / decompression (python stdlib) + + Note: This is the legacy ZLIB support as used by borg < 1.3. + It still suffers from attic *only* supporting zlib and not having separate + ID bytes to differentiate between differently compressed chunks. + This just works because zlib compressed stuff always starts with 0x.8.. bytes. + Newer borg uses the ZLIB class that has separate ID bytes (as all the other + compressors) and does not need this hack. + """ + ID = b'\x08\x00' # not used here, see detect() + # avoid all 0x.8.. IDs elsewhere! + name = 'zlib_legacy' + @classmethod def detect(cls, data): # matches misc. patterns 0x.8.. used by zlib @@ -502,13 +540,14 @@ COMPRESSOR_TABLE = { CNONE.name: CNONE, LZ4.name: LZ4, ZLIB.name: ZLIB, + ZLIB_legacy.name: ZLIB_legacy, LZMA.name: LZMA, Auto.name: Auto, ZSTD.name: ZSTD, ObfuscateSize.name: ObfuscateSize, } # List of possible compression types. Does not include Auto, since it is a meta-Compressor. -COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, LZMA, ObfuscateSize, ] # check fast stuff first +COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, ZLIB_legacy, LZMA, ObfuscateSize, ] # check fast stuff first def get_compressor(name, **kwargs): cls = COMPRESSOR_TABLE[name] @@ -554,7 +593,7 @@ class CompressionSpec: self.name = values[0] if self.name in ('none', 'lz4', ): return - elif self.name in ('zlib', 'lzma', ): + elif self.name in ('zlib', 'lzma', 'zlib_legacy'): # zlib_legacy just for testing if count < 2: level = 6 # default compression level in py stdlib elif count == 2: @@ -597,7 +636,7 @@ class CompressionSpec: def compressor(self): if self.name in ('none', 'lz4', ): return get_compressor(self.name) - elif self.name in ('zlib', 'lzma', 'zstd', ): + elif self.name in ('zlib', 'lzma', 'zstd', 'zlib_legacy'): return get_compressor(self.name, level=self.level) elif self.name == 'auto': return get_compressor(self.name, compressor=self.inner.compressor) diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 5889b12ab..b69fe819f 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -2442,7 +2442,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_compression_zlib_compressible(self): size, csize = self._get_sizes('zlib', compressible=True) assert csize < size * 0.1 - assert csize == 35 + assert csize == 37 def test_compression_zlib_uncompressible(self): size, csize = self._get_sizes('zlib', compressible=False) @@ -2451,7 +2451,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_compression_auto_compressible(self): size, csize = self._get_sizes('auto,zlib', compressible=True) assert csize < size * 0.1 - assert csize == 35 # same as compression 'zlib' + assert csize == 37 # same as compression 'zlib' def test_compression_auto_uncompressible(self): size, csize = self._get_sizes('auto,zlib', compressible=False) diff --git a/src/borg/testsuite/compress.py b/src/borg/testsuite/compress.py index 3942c3537..c93dd3bb6 100644 --- a/src/borg/testsuite/compress.py +++ b/src/borg/testsuite/compress.py @@ -88,11 +88,11 @@ def test_autodetect_invalid(): Compressor(**params).decompress(b'\x08\x00notreallyzlib') -def test_zlib_compat(): +def test_zlib_legacy_compat(): # for compatibility reasons, we do not add an extra header for zlib, # nor do we expect one when decompressing / autodetecting for level in range(10): - c = get_compressor(name='zlib', level=level) + c = get_compressor(name='zlib_legacy', level=level) cdata1 = c.compress(data) cdata2 = zlib.compress(data, level) assert cdata1 == cdata2