mirror of
https://github.com/borgbackup/borg.git
synced 2026-05-28 04:03:21 -04:00
Merge pull request #129 from ThomasWaldmann/compression
compression flexibility, new none, lz4 and lzma compression
This commit is contained in:
commit
fffe509268
17 changed files with 502 additions and 24 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -6,6 +6,7 @@ env
|
|||
.tox
|
||||
hashindex.c
|
||||
chunker.c
|
||||
compress.c
|
||||
crypto.c
|
||||
platform_darwin.c
|
||||
platform_freebsd.c
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ if [[ "$(uname -s)" == 'Darwin' ]]; then
|
|||
eval "$(pyenv init -)"
|
||||
fi
|
||||
|
||||
brew install lz4
|
||||
brew outdated pyenv || brew upgrade pyenv
|
||||
|
||||
case "${TOXENV}" in
|
||||
|
|
@ -34,6 +35,9 @@ if [[ "$(uname -s)" == 'Darwin' ]]; then
|
|||
python -m pip install --user virtualenv
|
||||
else
|
||||
pip install virtualenv
|
||||
sudo add-apt-repository -y ppa:gezakovacs/lz4
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y liblz4-dev
|
||||
sudo apt-get install -y libacl1-dev
|
||||
fi
|
||||
|
||||
|
|
|
|||
25
CHANGES.rst
25
CHANGES.rst
|
|
@ -5,16 +5,35 @@ Borg Changelog
|
|||
Version 0.25.0 (not released yet)
|
||||
---------------------------------
|
||||
|
||||
Incompatible changes (compared to 0.24):
|
||||
Compatibility notes:
|
||||
|
||||
- none yet
|
||||
- the new compression code is very compatible: as long as you stay with zlib
|
||||
compression, older borg releases will still be able to read data from a
|
||||
repo/archive made with the new code (note: this is not the case for the
|
||||
default "none" compression, use "zlib,0" if you want a "no compression" mode
|
||||
that can be read by older borg). Also the new code is able to read repos and
|
||||
archives made with older borg versions (for all zlib levels 0..9).
|
||||
|
||||
Deprecations:
|
||||
|
||||
- none yet
|
||||
- --compression N (with N being a number, as in 0.24) is deprecated.
|
||||
We keep the --compression 0..9 for now to not break scripts, but it is
|
||||
deprecated and will be removed later, so better fix your scripts now:
|
||||
--compression 0 (as in 0.24) is the same as --compression zlib,0 (now).
|
||||
BUT: if you do not want compression, you rather want --compression none
|
||||
(which is the default).
|
||||
--compression 1 (in 0.24) is the same as --compression zlib,1 (now)
|
||||
--compression 9 (in 0.24) is the same as --compression zlib,9 (now)
|
||||
|
||||
|
||||
New features:
|
||||
|
||||
- create --compression none (default, means: do not compress, just pass through
|
||||
data "as is". this is more efficient than zlib level 0 as used in borg 0.24)
|
||||
- create --compression lz4 (super-fast, but not very high compression)
|
||||
Please note that borgbackup needs lz4 library as additional requirement.
|
||||
- create --compression zlib,N (slower, higher compression, default for N is 6)
|
||||
- create --compression lzma,N (slowest, highest compression, default N is 6)
|
||||
- honor the nodump flag (UF_NODUMP) and do not backup such items
|
||||
|
||||
Bug fixes:
|
||||
|
|
|
|||
|
|
@ -51,7 +51,8 @@ Main features
|
|||
authenticity is verified using HMAC-SHA256.
|
||||
|
||||
**Compression**
|
||||
All data can be compressed by zlib, level 0-9.
|
||||
All data can be compressed by lz4 (super fast, low compression), zlib
|
||||
(medium speed and compression) or lzma (low speed, high compression).
|
||||
|
||||
**Off-site backups**
|
||||
Borg can store data on any remote host accessible over SSH. If Borg is
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ import traceback
|
|||
|
||||
from . import __version__
|
||||
from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS
|
||||
from .compress import Compressor, COMPR_BUFFER
|
||||
from .repository import Repository
|
||||
from .cache import Cache
|
||||
from .key import key_creator
|
||||
|
|
@ -21,7 +22,7 @@ from .helpers import Error, location_validator, format_time, format_file_size, \
|
|||
format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \
|
||||
get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
|
||||
Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
|
||||
is_cachedir, bigint_to_int, ChunkerParams
|
||||
is_cachedir, bigint_to_int, ChunkerParams, CompressionSpec
|
||||
from .remote import RepositoryServer, RemoteRepository
|
||||
|
||||
has_lchflags = hasattr(os, 'lchflags')
|
||||
|
|
@ -104,7 +105,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
|
|||
t0 = datetime.now()
|
||||
repository = self.open_repository(args.archive, exclusive=True)
|
||||
manifest, key = Manifest.load(repository)
|
||||
key.compression_level = args.compression
|
||||
compr_args = dict(buffer=COMPR_BUFFER)
|
||||
compr_args.update(args.compression)
|
||||
key.compressor = Compressor(**compr_args)
|
||||
cache = Cache(repository, key, manifest, do_files=args.cache_files)
|
||||
archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
|
||||
create=True, checkpoint_interval=args.checkpoint_interval,
|
||||
|
|
@ -670,9 +673,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
|
|||
metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE',
|
||||
help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS)
|
||||
subparser.add_argument('-C', '--compression', dest='compression',
|
||||
type=int, default=0, metavar='N',
|
||||
help='select compression algorithm and level. 0..9 is supported and means zlib '
|
||||
'level 0 (no compression, fast, default) .. zlib level 9 (high compression, slow).')
|
||||
type=CompressionSpec, default=dict(name='none'), metavar='COMPRESSION',
|
||||
help='select compression algorithm (and level): '
|
||||
'none == no compression (default), '
|
||||
'lz4 == lz4, '
|
||||
'zlib == zlib (default level 6), '
|
||||
'zlib,0 .. zlib,9 == zlib (with level 0..9), '
|
||||
'lzma == lzma (default level 6), '
|
||||
'lzma,0 .. lzma,9 == lzma (with level 0..9).')
|
||||
subparser.add_argument('archive', metavar='ARCHIVE',
|
||||
type=location_validator(archive=True),
|
||||
help='archive to create')
|
||||
|
|
|
|||
199
borg/compress.pyx
Normal file
199
borg/compress.pyx
Normal file
|
|
@ -0,0 +1,199 @@
|
|||
import zlib
|
||||
try:
|
||||
import lzma
|
||||
except ImportError:
|
||||
lzma = None
|
||||
|
||||
cdef extern from "lz4.h":
|
||||
int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
|
||||
int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
|
||||
|
||||
|
||||
cdef class CompressorBase:
|
||||
"""
|
||||
base class for all (de)compression classes,
|
||||
also handles compression format auto detection and
|
||||
adding/stripping the ID header (which enable auto detection).
|
||||
"""
|
||||
ID = b'\xFF\xFF' # reserved and not used
|
||||
# overwrite with a unique 2-bytes bytestring in child classes
|
||||
name = 'baseclass'
|
||||
|
||||
@classmethod
|
||||
def detect(cls, data):
|
||||
return data.startswith(cls.ID)
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
pass
|
||||
|
||||
def compress(self, data):
|
||||
# add ID bytes
|
||||
return self.ID + data
|
||||
|
||||
def decompress(self, data):
|
||||
# strip ID bytes
|
||||
return data[2:]
|
||||
|
||||
|
||||
class CNONE(CompressorBase):
|
||||
"""
|
||||
none - no compression, just pass through data
|
||||
"""
|
||||
ID = b'\x00\x00'
|
||||
name = 'none'
|
||||
|
||||
def compress(self, data):
|
||||
return super().compress(data)
|
||||
|
||||
def decompress(self, data):
|
||||
data = super().decompress(data)
|
||||
if not isinstance(data, bytes):
|
||||
data = bytes(data)
|
||||
return data
|
||||
|
||||
|
||||
cdef class LZ4(CompressorBase):
|
||||
"""
|
||||
raw LZ4 compression / decompression (liblz4).
|
||||
|
||||
Features:
|
||||
- lz4 is super fast
|
||||
- wrapper releases CPython's GIL to support multithreaded code
|
||||
- buffer given by caller, avoiding frequent reallocation and buffer duplication
|
||||
- uses safe lz4 methods that never go beyond the end of the output buffer
|
||||
|
||||
But beware:
|
||||
- this is not very generic, the given buffer MUST be large enough to
|
||||
handle all compression or decompression output (or it will fail).
|
||||
- you must not do method calls to the same LZ4 instance from different
|
||||
threads at the same time - create one LZ4 instance per thread!
|
||||
"""
|
||||
ID = b'\x01\x00'
|
||||
name = 'lz4'
|
||||
|
||||
cdef char *buffer # helper buffer for (de)compression output
|
||||
cdef int bufsize # size of this buffer
|
||||
|
||||
def __cinit__(self, **kwargs):
|
||||
buffer = kwargs['buffer']
|
||||
self.buffer = buffer
|
||||
self.bufsize = len(buffer)
|
||||
|
||||
def compress(self, idata):
|
||||
if not isinstance(idata, bytes):
|
||||
idata = bytes(idata) # code below does not work with memoryview
|
||||
cdef int isize = len(idata)
|
||||
cdef int osize = self.bufsize
|
||||
cdef char *source = idata
|
||||
cdef char *dest = self.buffer
|
||||
with nogil:
|
||||
osize = LZ4_compress_limitedOutput(source, dest, isize, osize)
|
||||
if not osize:
|
||||
raise Exception('lz4 compress failed')
|
||||
return super().compress(dest[:osize])
|
||||
|
||||
def decompress(self, idata):
|
||||
if not isinstance(idata, bytes):
|
||||
idata = bytes(idata) # code below does not work with memoryview
|
||||
idata = super().decompress(idata)
|
||||
cdef int isize = len(idata)
|
||||
cdef int osize = self.bufsize
|
||||
cdef char *source = idata
|
||||
cdef char *dest = self.buffer
|
||||
with nogil:
|
||||
osize = LZ4_decompress_safe(source, dest, isize, osize)
|
||||
if osize < 0:
|
||||
# malformed input data, buffer too small, ...
|
||||
raise Exception('lz4 decompress failed')
|
||||
return dest[:osize]
|
||||
|
||||
|
||||
class LZMA(CompressorBase):
|
||||
"""
|
||||
lzma compression / decompression (python 3.3+ stdlib)
|
||||
"""
|
||||
ID = b'\x02\x00'
|
||||
name = 'lzma'
|
||||
|
||||
def __init__(self, level=6, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.level = level
|
||||
if lzma is None:
|
||||
raise ValueError('No lzma support found.')
|
||||
|
||||
def compress(self, data):
|
||||
# we do not need integrity checks in lzma, we do that already
|
||||
data = lzma.compress(data, preset=self.level, check=lzma.CHECK_NONE)
|
||||
return super().compress(data)
|
||||
|
||||
def decompress(self, data):
|
||||
data = super().decompress(data)
|
||||
return lzma.decompress(data)
|
||||
|
||||
|
||||
class ZLIB(CompressorBase):
|
||||
"""
|
||||
zlib compression / decompression (python stdlib)
|
||||
"""
|
||||
ID = b'\x08\x00' # not used here, see detect()
|
||||
# avoid all 0x.8.. IDs elsewhere!
|
||||
name = 'zlib'
|
||||
|
||||
@classmethod
|
||||
def detect(cls, data):
|
||||
# matches misc. patterns 0x.8.. used by zlib
|
||||
cmf, flg = data[:2]
|
||||
is_deflate = cmf & 0x0f == 8
|
||||
check_ok = (cmf * 256 + flg) % 31 == 0
|
||||
return check_ok and is_deflate
|
||||
|
||||
def __init__(self, level=6, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.level = level
|
||||
|
||||
def compress(self, data):
|
||||
# note: for compatibility no super call, do not add ID bytes
|
||||
return zlib.compress(data, self.level)
|
||||
|
||||
def decompress(self, data):
|
||||
# note: for compatibility no super call, do not strip ID bytes
|
||||
return zlib.decompress(data)
|
||||
|
||||
|
||||
COMPRESSOR_TABLE = {
|
||||
CNONE.name: CNONE,
|
||||
LZ4.name: LZ4,
|
||||
ZLIB.name: ZLIB,
|
||||
LZMA.name: LZMA,
|
||||
}
|
||||
COMPRESSOR_LIST = [LZ4, CNONE, ZLIB, LZMA, ] # check fast stuff first
|
||||
|
||||
def get_compressor(name, **kwargs):
|
||||
cls = COMPRESSOR_TABLE[name]
|
||||
return cls(**kwargs)
|
||||
|
||||
|
||||
class Compressor:
|
||||
"""
|
||||
compresses using a compressor with given name and parameters
|
||||
decompresses everything we can handle (autodetect)
|
||||
"""
|
||||
def __init__(self, name='null', **kwargs):
|
||||
self.params = kwargs
|
||||
self.compressor = get_compressor(name, **self.params)
|
||||
|
||||
def compress(self, data):
|
||||
return self.compressor.compress(data)
|
||||
|
||||
def decompress(self, data):
|
||||
hdr = bytes(data[:2]) # detect() does not work with memoryview
|
||||
for cls in COMPRESSOR_LIST:
|
||||
if cls.detect(hdr):
|
||||
return cls(**self.params).decompress(data)
|
||||
else:
|
||||
raise ValueError('No decompressor for this data found: %r.', data[:2])
|
||||
|
||||
|
||||
# a buffer used for (de)compression result, which can be slightly bigger
|
||||
# than the chunk buffer in the worst (incompressible data) case, add 10%:
|
||||
COMPR_BUFFER = bytes(int(1.1 * 2 ** 23)) # CHUNK_MAX_EXP == 23
|
||||
|
|
@ -278,9 +278,45 @@ def timestamp(s):
|
|||
|
||||
def ChunkerParams(s):
|
||||
window_size, chunk_mask, chunk_min, chunk_max = s.split(',')
|
||||
if int(chunk_max) > 23:
|
||||
# do not go beyond 2**23 (8MB) chunk size now,
|
||||
# COMPR_BUFFER can only cope with up to this size
|
||||
raise ValueError
|
||||
return int(window_size), int(chunk_mask), int(chunk_min), int(chunk_max)
|
||||
|
||||
|
||||
def CompressionSpec(s):
|
||||
values = s.split(',')
|
||||
count = len(values)
|
||||
if count < 1:
|
||||
raise ValueError
|
||||
compression = values[0]
|
||||
try:
|
||||
compression = int(compression)
|
||||
if count > 1:
|
||||
raise ValueError
|
||||
# DEPRECATED: it is just --compression N
|
||||
if 0 <= compression <= 9:
|
||||
return dict(name='zlib', level=compression)
|
||||
raise ValueError
|
||||
except ValueError:
|
||||
# --compression algo[,...]
|
||||
name = compression
|
||||
if name in ('none', 'lz4', ):
|
||||
return dict(name=name)
|
||||
if name in ('zlib', 'lzma', ):
|
||||
if count < 2:
|
||||
level = 6 # default compression level in py stdlib
|
||||
elif count == 2:
|
||||
level = int(values[1])
|
||||
if not 0 <= level <= 9:
|
||||
raise ValueError
|
||||
else:
|
||||
raise ValueError
|
||||
return dict(name=name, level=level)
|
||||
raise ValueError
|
||||
|
||||
|
||||
def is_cachedir(path):
|
||||
"""Determines whether the specified path is a cache directory (and
|
||||
therefore should potentially be excluded from the backup) according to
|
||||
|
|
|
|||
12
borg/key.py
12
borg/key.py
|
|
@ -6,9 +6,9 @@ import msgpack
|
|||
import textwrap
|
||||
import hmac
|
||||
from hashlib import sha256
|
||||
import zlib
|
||||
|
||||
from .crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks
|
||||
from .compress import Compressor, COMPR_BUFFER
|
||||
from .helpers import IntegrityError, get_keys_dir, Error
|
||||
|
||||
PREFIX = b'\0' * 8
|
||||
|
|
@ -68,7 +68,7 @@ class KeyBase:
|
|||
self.TYPE_STR = bytes([self.TYPE])
|
||||
self.repository = repository
|
||||
self.target = None # key location file path / repo obj
|
||||
self.compression_level = 0
|
||||
self.compressor = Compressor('none', buffer=COMPR_BUFFER)
|
||||
|
||||
def id_hash(self, data):
|
||||
"""Return HMAC hash using the "id" HMAC key
|
||||
|
|
@ -99,12 +99,12 @@ class PlaintextKey(KeyBase):
|
|||
return sha256(data).digest()
|
||||
|
||||
def encrypt(self, data):
|
||||
return b''.join([self.TYPE_STR, zlib.compress(data, self.compression_level)])
|
||||
return b''.join([self.TYPE_STR, self.compressor.compress(data)])
|
||||
|
||||
def decrypt(self, id, data):
|
||||
if data[0] != self.TYPE:
|
||||
raise IntegrityError('Invalid encryption envelope')
|
||||
data = zlib.decompress(memoryview(data)[1:])
|
||||
data = self.compressor.decompress(memoryview(data)[1:])
|
||||
if id and sha256(data).digest() != id:
|
||||
raise IntegrityError('Chunk id verification failed')
|
||||
return data
|
||||
|
|
@ -131,7 +131,7 @@ class AESKeyBase(KeyBase):
|
|||
return HMAC(self.id_key, data, sha256).digest()
|
||||
|
||||
def encrypt(self, data):
|
||||
data = zlib.compress(data, self.compression_level)
|
||||
data = self.compressor.compress(data)
|
||||
self.enc_cipher.reset()
|
||||
data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
|
||||
hmac = HMAC(self.enc_hmac_key, data, sha256).digest()
|
||||
|
|
@ -144,7 +144,7 @@ class AESKeyBase(KeyBase):
|
|||
if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac:
|
||||
raise IntegrityError('Encryption envelope checksum mismatch')
|
||||
self.dec_cipher.reset(iv=PREFIX + data[33:41])
|
||||
data = zlib.decompress(self.dec_cipher.decrypt(data[41:])) # should use memoryview
|
||||
data = self.compressor.decompress(self.dec_cipher.decrypt(data[41:]))
|
||||
if id and HMAC(self.id_key, data, sha256).digest() != id:
|
||||
raise IntegrityError('Chunk id verification failed')
|
||||
return data
|
||||
|
|
|
|||
102
borg/testsuite/compress.py
Normal file
102
borg/testsuite/compress.py
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
import zlib
|
||||
try:
|
||||
import lzma
|
||||
except ImportError:
|
||||
lzma = None
|
||||
|
||||
import pytest
|
||||
|
||||
from ..compress import get_compressor, Compressor, CNONE, ZLIB, LZ4
|
||||
|
||||
|
||||
buffer = bytes(2**16)
|
||||
data = b'fooooooooobaaaaaaaar' * 10
|
||||
params = dict(name='zlib', level=6, buffer=buffer)
|
||||
|
||||
|
||||
def test_get_compressor():
|
||||
c = get_compressor(name='none')
|
||||
assert isinstance(c, CNONE)
|
||||
c = get_compressor(name='lz4', buffer=buffer)
|
||||
assert isinstance(c, LZ4)
|
||||
c = get_compressor(name='zlib')
|
||||
assert isinstance(c, ZLIB)
|
||||
with pytest.raises(KeyError):
|
||||
get_compressor(name='foobar')
|
||||
|
||||
|
||||
def test_cnull():
|
||||
c = get_compressor(name='none')
|
||||
cdata = c.compress(data)
|
||||
assert len(cdata) > len(data)
|
||||
assert data in cdata # it's not compressed and just in there 1:1
|
||||
assert data == c.decompress(cdata)
|
||||
assert data == Compressor(**params).decompress(cdata) # autodetect
|
||||
|
||||
|
||||
def test_lz4():
|
||||
c = get_compressor(name='lz4', buffer=buffer)
|
||||
cdata = c.compress(data)
|
||||
assert len(cdata) < len(data)
|
||||
assert data == c.decompress(cdata)
|
||||
assert data == Compressor(**params).decompress(cdata) # autodetect
|
||||
|
||||
|
||||
def test_zlib():
|
||||
c = get_compressor(name='zlib')
|
||||
cdata = c.compress(data)
|
||||
assert len(cdata) < len(data)
|
||||
assert data == c.decompress(cdata)
|
||||
assert data == Compressor(**params).decompress(cdata) # autodetect
|
||||
|
||||
|
||||
def test_lzma():
|
||||
if lzma is None:
|
||||
pytest.skip("No lzma support found.")
|
||||
c = get_compressor(name='lzma')
|
||||
cdata = c.compress(data)
|
||||
assert len(cdata) < len(data)
|
||||
assert data == c.decompress(cdata)
|
||||
assert data == Compressor(**params).decompress(cdata) # autodetect
|
||||
|
||||
|
||||
def test_autodetect_invalid():
|
||||
with pytest.raises(ValueError):
|
||||
Compressor(**params).decompress(b'\xff\xfftotalcrap')
|
||||
with pytest.raises(ValueError):
|
||||
Compressor(**params).decompress(b'\x08\x00notreallyzlib')
|
||||
|
||||
|
||||
def test_zlib_compat():
|
||||
# for compatibility reasons, we do not add an extra header for zlib,
|
||||
# nor do we expect one when decompressing / autodetecting
|
||||
for level in range(10):
|
||||
c = get_compressor(name='zlib', level=level)
|
||||
cdata1 = c.compress(data)
|
||||
cdata2 = zlib.compress(data, level)
|
||||
assert cdata1 == cdata2
|
||||
data2 = c.decompress(cdata2)
|
||||
assert data == data2
|
||||
data2 = Compressor(**params).decompress(cdata2)
|
||||
assert data == data2
|
||||
|
||||
|
||||
def test_compressor():
|
||||
params_list = [
|
||||
dict(name='none', buffer=buffer),
|
||||
dict(name='lz4', buffer=buffer),
|
||||
dict(name='zlib', level=0, buffer=buffer),
|
||||
dict(name='zlib', level=6, buffer=buffer),
|
||||
dict(name='zlib', level=9, buffer=buffer),
|
||||
]
|
||||
if lzma:
|
||||
params_list += [
|
||||
dict(name='lzma', level=0, buffer=buffer),
|
||||
dict(name='lzma', level=6, buffer=buffer),
|
||||
dict(name='lzma', level=9, buffer=buffer),
|
||||
]
|
||||
for params in params_list:
|
||||
c = Compressor(**params)
|
||||
assert data == c.decompress(c.compress(data))
|
||||
|
||||
|
||||
|
|
@ -2,11 +2,12 @@ import hashlib
|
|||
from time import mktime, strptime
|
||||
from datetime import datetime, timezone, timedelta
|
||||
|
||||
import pytest
|
||||
import msgpack
|
||||
|
||||
from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \
|
||||
prune_within, prune_split, \
|
||||
StableDict, int_to_bigint, bigint_to_int, parse_timestamp
|
||||
StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec
|
||||
from . import BaseTestCase
|
||||
|
||||
|
||||
|
|
@ -104,6 +105,30 @@ class PatternTestCase(BaseTestCase):
|
|||
['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg'])
|
||||
|
||||
|
||||
def test_compression_specs():
|
||||
with pytest.raises(ValueError):
|
||||
CompressionSpec('')
|
||||
assert CompressionSpec('0') == dict(name='zlib', level=0)
|
||||
assert CompressionSpec('1') == dict(name='zlib', level=1)
|
||||
assert CompressionSpec('9') == dict(name='zlib', level=9)
|
||||
with pytest.raises(ValueError):
|
||||
CompressionSpec('10')
|
||||
assert CompressionSpec('none') == dict(name='none')
|
||||
assert CompressionSpec('lz4') == dict(name='lz4')
|
||||
assert CompressionSpec('zlib') == dict(name='zlib', level=6)
|
||||
assert CompressionSpec('zlib,0') == dict(name='zlib', level=0)
|
||||
assert CompressionSpec('zlib,9') == dict(name='zlib', level=9)
|
||||
with pytest.raises(ValueError):
|
||||
CompressionSpec('zlib,9,invalid')
|
||||
assert CompressionSpec('lzma') == dict(name='lzma', level=6)
|
||||
assert CompressionSpec('lzma,0') == dict(name='lzma', level=0)
|
||||
assert CompressionSpec('lzma,9') == dict(name='lzma', level=9)
|
||||
with pytest.raises(ValueError):
|
||||
CompressionSpec('lzma,9,invalid')
|
||||
with pytest.raises(ValueError):
|
||||
CompressionSpec('invalid')
|
||||
|
||||
|
||||
class MakePathSafeTestCase(BaseTestCase):
|
||||
|
||||
def test(self):
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
.. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2
|
||||
.. _ACL: https://en.wikipedia.org/wiki/Access_control_list
|
||||
.. _libacl: http://savannah.nongnu.org/projects/acl/
|
||||
.. _liblz4: https://github.com/Cyan4973/lz4
|
||||
.. _OpenSSL: https://www.openssl.org/
|
||||
.. _Python: http://www.python.org/
|
||||
.. _Buzhash: https://en.wikipedia.org/wiki/Buzhash
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ Installation
|
|||
* Python_ >= 3.2
|
||||
* OpenSSL_ >= 1.0.0
|
||||
* libacl_
|
||||
* liblz4_
|
||||
* some python dependencies, see install_requires in setup.py
|
||||
|
||||
General notes
|
||||
|
|
@ -59,6 +60,9 @@ Some of the steps detailled below might be useful also for non-git installs.
|
|||
# ACL support Headers + Library
|
||||
apt-get install libacl1-dev libacl1
|
||||
|
||||
# lz4 super fast compression support Headers + Library
|
||||
apt-get install liblz4-dev liblz4-1
|
||||
|
||||
# if you do not have gcc / make / etc. yet
|
||||
apt-get install build-essential
|
||||
|
||||
|
|
@ -106,13 +110,16 @@ Some of the steps detailled below might be useful also for non-git installs.
|
|||
|
||||
# ACL support Headers + Library
|
||||
sudo dnf install libacl-devel libacl
|
||||
|
||||
|
||||
# lz4 super fast compression support Headers + Library
|
||||
sudo dnf install lz4
|
||||
|
||||
# optional: FUSE support - to mount backup archives
|
||||
sudo dnf install fuse-devel fuse
|
||||
|
||||
# optional: for unit testing
|
||||
sudo dnf install fakeroot
|
||||
|
||||
|
||||
# get |project_name| from github, install it
|
||||
git clone |git_url|
|
||||
|
||||
|
|
@ -148,6 +155,7 @@ You'll need at least (use the cygwin installer to fetch/install these):
|
|||
gcc-core
|
||||
git
|
||||
libopenssl
|
||||
liblz4_1 liblz4-devel # from cygwinports.org
|
||||
make
|
||||
openssh
|
||||
openssl-devel
|
||||
|
|
|
|||
|
|
@ -382,10 +382,35 @@ representation of the repository id.
|
|||
Compression
|
||||
-----------
|
||||
|
||||
|project_name| currently always pipes all data through a zlib compressor which
|
||||
supports compression levels 0 (no compression, fast) to 9 (high compression, slow).
|
||||
|project_name| supports the following compression methods:
|
||||
|
||||
- none (no compression, pass through data 1:1)
|
||||
- lz4 (low compression, but super fast)
|
||||
- zlib (level 0-9, level 0 is no compression [but still adding zlib overhead],
|
||||
level 1 is low, level 9 is high compression)
|
||||
- lzma (level 0-9, level 0 is low, level 9 is high compression).
|
||||
|
||||
Speed: none > lz4 > zlib > lzma
|
||||
Compression: lzma > zlib > lz4 > none
|
||||
|
||||
Be careful, higher zlib and especially lzma compression levels might take a
|
||||
lot of resources (CPU and memory).
|
||||
|
||||
The overall speed of course also depends on the speed of your target storage.
|
||||
If that is slow, using a higher compression level might yield better overall
|
||||
performance. You need to experiment a bit. Maybe just watch your CPU load, if
|
||||
that is relatively low, increase compression until 1 core is 70-100% loaded.
|
||||
|
||||
Even if your target storage is rather fast, you might see interesting effects:
|
||||
while doing no compression at all (none) is a operation that takes no time, it
|
||||
likely will need to store more data to the storage compared to using lz4.
|
||||
The time needed to transfer and store the additional data might be much more
|
||||
than if you had used lz4 (which is super fast, but still might compress your
|
||||
data about 2:1). This is assuming your data is compressible (if you backup
|
||||
already compressed data, trying to compress them at backup time is usually
|
||||
pointless).
|
||||
|
||||
Compression is applied after deduplication, thus using different compression
|
||||
methods in one repo does not influence deduplication.
|
||||
|
||||
See ``borg create --help`` about how to specify the compression level and its default.
|
||||
|
||||
Note: zlib level 0 creates a little bit more output data than it gets as input,
|
||||
due to zlib protocol overhead.
|
||||
|
|
|
|||
|
|
@ -89,6 +89,31 @@ certain number of old archives::
|
|||
# and 6 monthly archives.
|
||||
borg prune -v $REPOSITORY --keep-daily=7 --keep-weekly=4 --keep-monthly=6
|
||||
|
||||
.. backup_compression:
|
||||
|
||||
Backup compression
|
||||
------------------
|
||||
|
||||
Default is no compression, but we support different methods with high speed
|
||||
or high compression:
|
||||
|
||||
If you have a quick repo storage and you want a little compression:
|
||||
|
||||
$ borg create --compression lz4 /mnt/backup::repo ~
|
||||
|
||||
If you have a medium fast repo storage and you want a bit more compression (N=0..9,
|
||||
0 means no compression, 9 means high compression):
|
||||
|
||||
$ borg create --compression zlib,N /mnt/backup::repo ~
|
||||
|
||||
If you have a very slow repo storage and you want high compression (N=0..9, 0 means
|
||||
low compression, 9 means high compression):
|
||||
|
||||
$ borg create --compression lzma,N /mnt/backup::repo ~
|
||||
|
||||
You'll need to experiment a bit to find the best compression for your use case.
|
||||
Keep an eye on CPU load and throughput.
|
||||
|
||||
.. _encrypted_repos:
|
||||
|
||||
Repository encryption
|
||||
|
|
|
|||
|
|
@ -4,6 +4,9 @@
|
|||
Support
|
||||
=======
|
||||
|
||||
Please first read the docs and the FAQ section in the docs, a lot of stuff is
|
||||
documented / explained there.
|
||||
|
||||
Issue Tracker
|
||||
-------------
|
||||
|
||||
|
|
|
|||
|
|
@ -76,8 +76,11 @@ Resource Usage
|
|||
|project_name| might use a lot of resources depending on the size of the data set it is dealing with.
|
||||
|
||||
CPU: it won't go beyond 100% of 1 core as the code is currently single-threaded.
|
||||
Especially higher zlib and lzma compression levels use significant amounts of CPU cycles.
|
||||
|
||||
Memory (RAM): the chunks index and the files index are read into memory for performance reasons.
|
||||
compression, esp. lzma compression with high levels might need substantial amounts
|
||||
of memory.
|
||||
|
||||
Temporary files: reading data and metadata from a FUSE mounted repository will consume about the same space as the
|
||||
deduplicated chunks used to represent them in the repository.
|
||||
|
|
@ -175,6 +178,18 @@ Examples
|
|||
# Backup a raw device (must not be active/in use/mounted at that time)
|
||||
$ dd if=/dev/sda bs=10M | borg create /mnt/backup::my-sda -
|
||||
|
||||
# No compression (default)
|
||||
$ borg create /mnt/backup::repo ~
|
||||
|
||||
# Super fast, low compression
|
||||
$ borg create --compression lz4 /mnt/backup::repo ~
|
||||
|
||||
# Less fast, higher compression (N = 0..9)
|
||||
$ borg create --compression zlib,N /mnt/backup::repo ~
|
||||
|
||||
# Even slower, even higher compression (N = 0..9)
|
||||
$ borg create --compression lzma,N /mnt/backup::repo ~
|
||||
|
||||
|
||||
.. include:: usage/extract.rst.inc
|
||||
|
||||
|
|
|
|||
8
setup.py
8
setup.py
|
|
@ -19,6 +19,7 @@ if sys.version_info < min_python:
|
|||
|
||||
from setuptools import setup, Extension
|
||||
|
||||
compress_source = 'borg/compress.pyx'
|
||||
crypto_source = 'borg/crypto.pyx'
|
||||
chunker_source = 'borg/chunker.pyx'
|
||||
hashindex_source = 'borg/hashindex.pyx'
|
||||
|
|
@ -38,6 +39,7 @@ try:
|
|||
|
||||
def make_distribution(self):
|
||||
self.filelist.extend([
|
||||
'borg/compress.c',
|
||||
'borg/crypto.c',
|
||||
'borg/chunker.c', 'borg/_chunker.c',
|
||||
'borg/hashindex.c', 'borg/_hashindex.c',
|
||||
|
|
@ -52,6 +54,7 @@ except ImportError:
|
|||
def __init__(self, *args, **kwargs):
|
||||
raise Exception('Cython is required to run sdist')
|
||||
|
||||
compress_source = compress_source.replace('.pyx', '.c')
|
||||
crypto_source = crypto_source.replace('.pyx', '.c')
|
||||
chunker_source = chunker_source.replace('.pyx', '.c')
|
||||
hashindex_source = hashindex_source.replace('.pyx', '.c')
|
||||
|
|
@ -59,7 +62,9 @@ except ImportError:
|
|||
platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c')
|
||||
platform_darwin_source = platform_darwin_source.replace('.pyx', '.c')
|
||||
from distutils.command.build_ext import build_ext
|
||||
if not all(os.path.exists(path) for path in [crypto_source, chunker_source, hashindex_source, platform_linux_source, platform_freebsd_source]):
|
||||
if not all(os.path.exists(path) for path in [
|
||||
compress_source, crypto_source, chunker_source, hashindex_source,
|
||||
platform_linux_source, platform_freebsd_source]):
|
||||
raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version')
|
||||
|
||||
|
||||
|
|
@ -89,6 +94,7 @@ cmdclass = versioneer.get_cmdclass()
|
|||
cmdclass.update({'build_ext': build_ext, 'sdist': Sdist})
|
||||
|
||||
ext_modules = [
|
||||
Extension('borg.compress', [compress_source], libraries=['lz4']),
|
||||
Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs),
|
||||
Extension('borg.chunker', [chunker_source]),
|
||||
Extension('borg.hashindex', [hashindex_source])
|
||||
|
|
|
|||
Loading…
Reference in a new issue