implement padme chunk size obfuscation (SPEC 250), fixes #8705

(backport from master)
This commit is contained in:
Divyansh Agrawal 2025-04-05 22:42:19 +05:30 committed by Thomas Waldmann
parent a6813cea24
commit 3880c7e127
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
3 changed files with 65 additions and 6 deletions

View file

@ -2423,13 +2423,13 @@ class Archiver:
from within a shell, the patterns should be quoted to protect them from
expansion.
Patterns matching special characters, e.g. white space, within a shell may
Patterns matching special characters, e.g. white space, within a shell may
require adjustments, such as putting quotation marks around the arguments.
Example:
Example:
Using bash, the following command line option would match and exclude "item name":
``--pattern='-path/item name'``
Note that when patterns are used within a pattern file directly read by borg,
e.g. when using ``--exclude-from`` or ``--patterns-from``, there is no shell
Note that when patterns are used within a pattern file directly read by borg,
e.g. when using ``--exclude-from`` or ``--patterns-from``, there is no shell
involved and thus no quotation marks are required.
The ``--exclude-from`` option permits loading exclusion patterns from a text
@ -2742,6 +2742,15 @@ class Archiver:
...
123: 8MiB (max.)
*Padmé padding* (deterministic)
::
250: pads to sums of powers of 2, max 12% overhead
Uses the Padmé algorithm to deterministically pad the compressed size to a sum of
powers of 2, limiting overhead to 12%. See https://lbarman.ch/blog/padme/ for details.
Examples::
borg create --compression lz4 REPO::ARCHIVE data
@ -2753,7 +2762,8 @@ class Archiver:
borg create --compression auto,lzma ...
borg create --compression obfuscate,110,none ...
borg create --compression obfuscate,3,auto,zstd,10 ...
borg create --compression obfuscate,2,zstd,6 ...\n\n''')
borg create --compression obfuscate,2,zstd,6 ...
borg create --compression obfuscate,250,zstd,3 ...\n\n''')
def do_help(self, parser, commands, args):
if not args.topic:

View file

@ -16,6 +16,7 @@ decompressor.
"""
from argparse import ArgumentTypeError
import math
import random
from struct import Struct
import zlib
@ -25,7 +26,6 @@ try:
except ImportError:
lzma = None
from .constants import MAX_DATA_SIZE
from .helpers import Buffer, DecompressionError
@ -459,6 +459,8 @@ class ObfuscateSize(CompressorBase):
elif 110 <= level <= 123:
self._obfuscate = self._random_padding_obfuscate
self.max_padding_size = 2 ** (level - 100) # 1kiB .. 8MiB
elif level == 250: # Padmé
self._obfuscate = self._padme_obfuscate
def _obfuscate(self, compr_size):
# implementations need to return the size of obfuscation data,
@ -499,6 +501,22 @@ class ObfuscateSize(CompressorBase):
self.compressor = Compressor.detect(compressed_data)()
return self.compressor.decompress(compressed_data) # decompress data
def _padme_obfuscate(self, compr_size):
if compr_size < 2:
return 0
E = math.floor(math.log2(compr_size)) # Get exponent (power of 2)
S = math.floor(math.log2(E)) + 1 # Second log component
lastBits = E - S # Bits to be zeroed
bitMask = (2 ** lastBits - 1) # Mask for rounding
padded_size = (compr_size + bitMask) & ~bitMask # Apply rounding
# Ensure max 12% overhead
max_allowed = int(compr_size * 1.12)
final_size = min(padded_size, max_allowed)
return final_size - compr_size # Return only the additional padding size
# Maps valid compressor names to their class
COMPRESSOR_TABLE = {

View file

@ -198,6 +198,37 @@ def test_obfuscate():
assert 6 + 2 + 1100 <= len(compressed) <= 6 + 2 + 1100 + 1024
@pytest.mark.parametrize(
"data_length, expected_padding",
[
(10, 0),
(100, 4),
(1000, 24),
(10000, 240),
(20000, 480),
(50000, 1200),
(100000, 352),
(1000000, 15808),
(5000000, 111808),
(10000000, 223616),
(20000000, 447232),
],
)
def test_padme_obfuscation(data_length, expected_padding):
compressor = Compressor(name="obfuscate", level=250, compressor=Compressor("none"))
# the innner compressor will add an inner header of 2 bytes, so we reduce the data length by 2 bytes
# to be able to use (almost) the same test cases as in master branch.
data = b"x" * (data_length - 2)
compressed = compressor.compress(data)
# the outer "obfuscate" pseudo-compressor adds an outer header of 6 bytes.
expected_padded_size = 6 + data_length + expected_padding
assert (
len(compressed) == expected_padded_size
), f"For {data_length}, expected {expected_padded_size}, got {len(compressed)}"
def test_compression_specs():
with pytest.raises(argparse.ArgumentTypeError):
CompressionSpec('')