From 3880c7e1279a03545b76027a5b23dc397d7d2f6b Mon Sep 17 00:00:00 2001 From: Divyansh Agrawal <154041893+div-dev123@users.noreply.github.com> Date: Sat, 5 Apr 2025 22:42:19 +0530 Subject: [PATCH] implement padme chunk size obfuscation (SPEC 250), fixes #8705 (backport from master) --- src/borg/archiver.py | 20 +++++++++++++++----- src/borg/compress.pyx | 20 +++++++++++++++++++- src/borg/testsuite/compress.py | 31 +++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 4992661e1..0ab5b62b3 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -2423,13 +2423,13 @@ class Archiver: from within a shell, the patterns should be quoted to protect them from expansion. - Patterns matching special characters, e.g. white space, within a shell may + Patterns matching special characters, e.g. white space, within a shell may require adjustments, such as putting quotation marks around the arguments. - Example: + Example: Using bash, the following command line option would match and exclude "item name": ``--pattern='-path/item name'`` - Note that when patterns are used within a pattern file directly read by borg, - e.g. when using ``--exclude-from`` or ``--patterns-from``, there is no shell + Note that when patterns are used within a pattern file directly read by borg, + e.g. when using ``--exclude-from`` or ``--patterns-from``, there is no shell involved and thus no quotation marks are required. The ``--exclude-from`` option permits loading exclusion patterns from a text @@ -2742,6 +2742,15 @@ class Archiver: ... 123: 8MiB (max.) + *Padmé padding* (deterministic) + + :: + + 250: pads to sums of powers of 2, max 12% overhead + + Uses the Padmé algorithm to deterministically pad the compressed size to a sum of + powers of 2, limiting overhead to 12%. See https://lbarman.ch/blog/padme/ for details. + Examples:: borg create --compression lz4 REPO::ARCHIVE data @@ -2753,7 +2762,8 @@ class Archiver: borg create --compression auto,lzma ... borg create --compression obfuscate,110,none ... borg create --compression obfuscate,3,auto,zstd,10 ... - borg create --compression obfuscate,2,zstd,6 ...\n\n''') + borg create --compression obfuscate,2,zstd,6 ... + borg create --compression obfuscate,250,zstd,3 ...\n\n''') def do_help(self, parser, commands, args): if not args.topic: diff --git a/src/borg/compress.pyx b/src/borg/compress.pyx index 2ab9d8133..8c005d608 100644 --- a/src/borg/compress.pyx +++ b/src/borg/compress.pyx @@ -16,6 +16,7 @@ decompressor. """ from argparse import ArgumentTypeError +import math import random from struct import Struct import zlib @@ -25,7 +26,6 @@ try: except ImportError: lzma = None - from .constants import MAX_DATA_SIZE from .helpers import Buffer, DecompressionError @@ -459,6 +459,8 @@ class ObfuscateSize(CompressorBase): elif 110 <= level <= 123: self._obfuscate = self._random_padding_obfuscate self.max_padding_size = 2 ** (level - 100) # 1kiB .. 8MiB + elif level == 250: # Padmé + self._obfuscate = self._padme_obfuscate def _obfuscate(self, compr_size): # implementations need to return the size of obfuscation data, @@ -499,6 +501,22 @@ class ObfuscateSize(CompressorBase): self.compressor = Compressor.detect(compressed_data)() return self.compressor.decompress(compressed_data) # decompress data + def _padme_obfuscate(self, compr_size): + if compr_size < 2: + return 0 + + E = math.floor(math.log2(compr_size)) # Get exponent (power of 2) + S = math.floor(math.log2(E)) + 1 # Second log component + lastBits = E - S # Bits to be zeroed + bitMask = (2 ** lastBits - 1) # Mask for rounding + + padded_size = (compr_size + bitMask) & ~bitMask # Apply rounding + + # Ensure max 12% overhead + max_allowed = int(compr_size * 1.12) + final_size = min(padded_size, max_allowed) + + return final_size - compr_size # Return only the additional padding size # Maps valid compressor names to their class COMPRESSOR_TABLE = { diff --git a/src/borg/testsuite/compress.py b/src/borg/testsuite/compress.py index 7c5dbcbe4..233bf7275 100644 --- a/src/borg/testsuite/compress.py +++ b/src/borg/testsuite/compress.py @@ -198,6 +198,37 @@ def test_obfuscate(): assert 6 + 2 + 1100 <= len(compressed) <= 6 + 2 + 1100 + 1024 +@pytest.mark.parametrize( + "data_length, expected_padding", + [ + (10, 0), + (100, 4), + (1000, 24), + (10000, 240), + (20000, 480), + (50000, 1200), + (100000, 352), + (1000000, 15808), + (5000000, 111808), + (10000000, 223616), + (20000000, 447232), + ], +) +def test_padme_obfuscation(data_length, expected_padding): + compressor = Compressor(name="obfuscate", level=250, compressor=Compressor("none")) + # the innner compressor will add an inner header of 2 bytes, so we reduce the data length by 2 bytes + # to be able to use (almost) the same test cases as in master branch. + data = b"x" * (data_length - 2) + compressed = compressor.compress(data) + + # the outer "obfuscate" pseudo-compressor adds an outer header of 6 bytes. + expected_padded_size = 6 + data_length + expected_padding + + assert ( + len(compressed) == expected_padded_size + ), f"For {data_length}, expected {expected_padded_size}, got {len(compressed)}" + + def test_compression_specs(): with pytest.raises(argparse.ArgumentTypeError): CompressionSpec('')