diff --git a/.gitignore b/.gitignore index 97df7c610..ab98dc85a 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ env .tox hashindex.c chunker.c +compress.c crypto.c platform_darwin.c platform_freebsd.c diff --git a/.travis/install.sh b/.travis/install.sh index 80b39226f..27eb668db 100755 --- a/.travis/install.sh +++ b/.travis/install.sh @@ -14,6 +14,7 @@ if [[ "$(uname -s)" == 'Darwin' ]]; then eval "$(pyenv init -)" fi + brew install lz4 brew outdated pyenv || brew upgrade pyenv case "${TOXENV}" in @@ -34,6 +35,9 @@ if [[ "$(uname -s)" == 'Darwin' ]]; then python -m pip install --user virtualenv else pip install virtualenv + sudo add-apt-repository -y ppa:gezakovacs/lz4 + sudo apt-get update + sudo apt-get install -y liblz4-dev sudo apt-get install -y libacl1-dev fi diff --git a/CHANGES.rst b/CHANGES.rst index 13dfdb4ce..439ee4c37 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,16 +5,35 @@ Borg Changelog Version 0.25.0 (not released yet) --------------------------------- -Incompatible changes (compared to 0.24): +Compatibility notes: -- none yet +- the new compression code is very compatible: as long as you stay with zlib + compression, older borg releases will still be able to read data from a + repo/archive made with the new code (note: this is not the case for the + default "none" compression, use "zlib,0" if you want a "no compression" mode + that can be read by older borg). Also the new code is able to read repos and + archives made with older borg versions (for all zlib levels 0..9). Deprecations: -- none yet +- --compression N (with N being a number, as in 0.24) is deprecated. + We keep the --compression 0..9 for now to not break scripts, but it is + deprecated and will be removed later, so better fix your scripts now: + --compression 0 (as in 0.24) is the same as --compression zlib,0 (now). + BUT: if you do not want compression, you rather want --compression none + (which is the default). + --compression 1 (in 0.24) is the same as --compression zlib,1 (now) + --compression 9 (in 0.24) is the same as --compression zlib,9 (now) + New features: +- create --compression none (default, means: do not compress, just pass through + data "as is". this is more efficient than zlib level 0 as used in borg 0.24) +- create --compression lz4 (super-fast, but not very high compression) + Please note that borgbackup needs lz4 library as additional requirement. +- create --compression zlib,N (slower, higher compression, default for N is 6) +- create --compression lzma,N (slowest, highest compression, default N is 6) - honor the nodump flag (UF_NODUMP) and do not backup such items Bug fixes: diff --git a/README.rst b/README.rst index 22320d3fe..8180fd2ab 100644 --- a/README.rst +++ b/README.rst @@ -51,7 +51,8 @@ Main features authenticity is verified using HMAC-SHA256. **Compression** - All data can be compressed by zlib, level 0-9. + All data can be compressed by lz4 (super fast, low compression), zlib + (medium speed and compression) or lzma (low speed, high compression). **Off-site backups** Borg can store data on any remote host accessible over SSH. If Borg is diff --git a/borg/archiver.py b/borg/archiver.py index de9989d37..3f4876943 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -14,6 +14,7 @@ import traceback from . import __version__ from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS +from .compress import Compressor, COMPR_BUFFER from .repository import Repository from .cache import Cache from .key import key_creator @@ -21,7 +22,7 @@ from .helpers import Error, location_validator, format_time, format_file_size, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ - is_cachedir, bigint_to_int, ChunkerParams + is_cachedir, bigint_to_int, ChunkerParams, CompressionSpec from .remote import RepositoryServer, RemoteRepository has_lchflags = hasattr(os, 'lchflags') @@ -104,7 +105,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") t0 = datetime.now() repository = self.open_repository(args.archive, exclusive=True) manifest, key = Manifest.load(repository) - key.compression_level = args.compression + compr_args = dict(buffer=COMPR_BUFFER) + compr_args.update(args.compression) + key.compressor = Compressor(**compr_args) cache = Cache(repository, key, manifest, do_files=args.cache_files) archive = Archive(repository, key, manifest, args.archive.archive, cache=cache, create=True, checkpoint_interval=args.checkpoint_interval, @@ -670,9 +673,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE', help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS) subparser.add_argument('-C', '--compression', dest='compression', - type=int, default=0, metavar='N', - help='select compression algorithm and level. 0..9 is supported and means zlib ' - 'level 0 (no compression, fast, default) .. zlib level 9 (high compression, slow).') + type=CompressionSpec, default=dict(name='none'), metavar='COMPRESSION', + help='select compression algorithm (and level): ' + 'none == no compression (default), ' + 'lz4 == lz4, ' + 'zlib == zlib (default level 6), ' + 'zlib,0 .. zlib,9 == zlib (with level 0..9), ' + 'lzma == lzma (default level 6), ' + 'lzma,0 .. lzma,9 == lzma (with level 0..9).') subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to create') diff --git a/borg/compress.pyx b/borg/compress.pyx new file mode 100644 index 000000000..2285b55d8 --- /dev/null +++ b/borg/compress.pyx @@ -0,0 +1,199 @@ +import zlib +try: + import lzma +except ImportError: + lzma = None + +cdef extern from "lz4.h": + int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil + int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil + + +cdef class CompressorBase: + """ + base class for all (de)compression classes, + also handles compression format auto detection and + adding/stripping the ID header (which enable auto detection). + """ + ID = b'\xFF\xFF' # reserved and not used + # overwrite with a unique 2-bytes bytestring in child classes + name = 'baseclass' + + @classmethod + def detect(cls, data): + return data.startswith(cls.ID) + + def __init__(self, **kwargs): + pass + + def compress(self, data): + # add ID bytes + return self.ID + data + + def decompress(self, data): + # strip ID bytes + return data[2:] + + +class CNONE(CompressorBase): + """ + none - no compression, just pass through data + """ + ID = b'\x00\x00' + name = 'none' + + def compress(self, data): + return super().compress(data) + + def decompress(self, data): + data = super().decompress(data) + if not isinstance(data, bytes): + data = bytes(data) + return data + + +cdef class LZ4(CompressorBase): + """ + raw LZ4 compression / decompression (liblz4). + + Features: + - lz4 is super fast + - wrapper releases CPython's GIL to support multithreaded code + - buffer given by caller, avoiding frequent reallocation and buffer duplication + - uses safe lz4 methods that never go beyond the end of the output buffer + + But beware: + - this is not very generic, the given buffer MUST be large enough to + handle all compression or decompression output (or it will fail). + - you must not do method calls to the same LZ4 instance from different + threads at the same time - create one LZ4 instance per thread! + """ + ID = b'\x01\x00' + name = 'lz4' + + cdef char *buffer # helper buffer for (de)compression output + cdef int bufsize # size of this buffer + + def __cinit__(self, **kwargs): + buffer = kwargs['buffer'] + self.buffer = buffer + self.bufsize = len(buffer) + + def compress(self, idata): + if not isinstance(idata, bytes): + idata = bytes(idata) # code below does not work with memoryview + cdef int isize = len(idata) + cdef int osize = self.bufsize + cdef char *source = idata + cdef char *dest = self.buffer + with nogil: + osize = LZ4_compress_limitedOutput(source, dest, isize, osize) + if not osize: + raise Exception('lz4 compress failed') + return super().compress(dest[:osize]) + + def decompress(self, idata): + if not isinstance(idata, bytes): + idata = bytes(idata) # code below does not work with memoryview + idata = super().decompress(idata) + cdef int isize = len(idata) + cdef int osize = self.bufsize + cdef char *source = idata + cdef char *dest = self.buffer + with nogil: + osize = LZ4_decompress_safe(source, dest, isize, osize) + if osize < 0: + # malformed input data, buffer too small, ... + raise Exception('lz4 decompress failed') + return dest[:osize] + + +class LZMA(CompressorBase): + """ + lzma compression / decompression (python 3.3+ stdlib) + """ + ID = b'\x02\x00' + name = 'lzma' + + def __init__(self, level=6, **kwargs): + super().__init__(**kwargs) + self.level = level + if lzma is None: + raise ValueError('No lzma support found.') + + def compress(self, data): + # we do not need integrity checks in lzma, we do that already + data = lzma.compress(data, preset=self.level, check=lzma.CHECK_NONE) + return super().compress(data) + + def decompress(self, data): + data = super().decompress(data) + return lzma.decompress(data) + + +class ZLIB(CompressorBase): + """ + zlib compression / decompression (python stdlib) + """ + ID = b'\x08\x00' # not used here, see detect() + # avoid all 0x.8.. IDs elsewhere! + name = 'zlib' + + @classmethod + def detect(cls, data): + # matches misc. patterns 0x.8.. used by zlib + cmf, flg = data[:2] + is_deflate = cmf & 0x0f == 8 + check_ok = (cmf * 256 + flg) % 31 == 0 + return check_ok and is_deflate + + def __init__(self, level=6, **kwargs): + super().__init__(**kwargs) + self.level = level + + def compress(self, data): + # note: for compatibility no super call, do not add ID bytes + return zlib.compress(data, self.level) + + def decompress(self, data): + # note: for compatibility no super call, do not strip ID bytes + return zlib.decompress(data) + + +COMPRESSOR_TABLE = { + CNONE.name: CNONE, + LZ4.name: LZ4, + ZLIB.name: ZLIB, + LZMA.name: LZMA, +} +COMPRESSOR_LIST = [LZ4, CNONE, ZLIB, LZMA, ] # check fast stuff first + +def get_compressor(name, **kwargs): + cls = COMPRESSOR_TABLE[name] + return cls(**kwargs) + + +class Compressor: + """ + compresses using a compressor with given name and parameters + decompresses everything we can handle (autodetect) + """ + def __init__(self, name='null', **kwargs): + self.params = kwargs + self.compressor = get_compressor(name, **self.params) + + def compress(self, data): + return self.compressor.compress(data) + + def decompress(self, data): + hdr = bytes(data[:2]) # detect() does not work with memoryview + for cls in COMPRESSOR_LIST: + if cls.detect(hdr): + return cls(**self.params).decompress(data) + else: + raise ValueError('No decompressor for this data found: %r.', data[:2]) + + +# a buffer used for (de)compression result, which can be slightly bigger +# than the chunk buffer in the worst (incompressible data) case, add 10%: +COMPR_BUFFER = bytes(int(1.1 * 2 ** 23)) # CHUNK_MAX_EXP == 23 diff --git a/borg/helpers.py b/borg/helpers.py index d20532723..8643166f6 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -278,9 +278,45 @@ def timestamp(s): def ChunkerParams(s): window_size, chunk_mask, chunk_min, chunk_max = s.split(',') + if int(chunk_max) > 23: + # do not go beyond 2**23 (8MB) chunk size now, + # COMPR_BUFFER can only cope with up to this size + raise ValueError return int(window_size), int(chunk_mask), int(chunk_min), int(chunk_max) +def CompressionSpec(s): + values = s.split(',') + count = len(values) + if count < 1: + raise ValueError + compression = values[0] + try: + compression = int(compression) + if count > 1: + raise ValueError + # DEPRECATED: it is just --compression N + if 0 <= compression <= 9: + return dict(name='zlib', level=compression) + raise ValueError + except ValueError: + # --compression algo[,...] + name = compression + if name in ('none', 'lz4', ): + return dict(name=name) + if name in ('zlib', 'lzma', ): + if count < 2: + level = 6 # default compression level in py stdlib + elif count == 2: + level = int(values[1]) + if not 0 <= level <= 9: + raise ValueError + else: + raise ValueError + return dict(name=name, level=level) + raise ValueError + + def is_cachedir(path): """Determines whether the specified path is a cache directory (and therefore should potentially be excluded from the backup) according to diff --git a/borg/key.py b/borg/key.py index fabdae5b3..7067a4454 100644 --- a/borg/key.py +++ b/borg/key.py @@ -6,9 +6,9 @@ import msgpack import textwrap import hmac from hashlib import sha256 -import zlib from .crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks +from .compress import Compressor, COMPR_BUFFER from .helpers import IntegrityError, get_keys_dir, Error PREFIX = b'\0' * 8 @@ -68,7 +68,7 @@ class KeyBase: self.TYPE_STR = bytes([self.TYPE]) self.repository = repository self.target = None # key location file path / repo obj - self.compression_level = 0 + self.compressor = Compressor('none', buffer=COMPR_BUFFER) def id_hash(self, data): """Return HMAC hash using the "id" HMAC key @@ -99,12 +99,12 @@ class PlaintextKey(KeyBase): return sha256(data).digest() def encrypt(self, data): - return b''.join([self.TYPE_STR, zlib.compress(data, self.compression_level)]) + return b''.join([self.TYPE_STR, self.compressor.compress(data)]) def decrypt(self, id, data): if data[0] != self.TYPE: raise IntegrityError('Invalid encryption envelope') - data = zlib.decompress(memoryview(data)[1:]) + data = self.compressor.decompress(memoryview(data)[1:]) if id and sha256(data).digest() != id: raise IntegrityError('Chunk id verification failed') return data @@ -131,7 +131,7 @@ class AESKeyBase(KeyBase): return HMAC(self.id_key, data, sha256).digest() def encrypt(self, data): - data = zlib.compress(data, self.compression_level) + data = self.compressor.compress(data) self.enc_cipher.reset() data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data))) hmac = HMAC(self.enc_hmac_key, data, sha256).digest() @@ -144,7 +144,7 @@ class AESKeyBase(KeyBase): if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac: raise IntegrityError('Encryption envelope checksum mismatch') self.dec_cipher.reset(iv=PREFIX + data[33:41]) - data = zlib.decompress(self.dec_cipher.decrypt(data[41:])) # should use memoryview + data = self.compressor.decompress(self.dec_cipher.decrypt(data[41:])) if id and HMAC(self.id_key, data, sha256).digest() != id: raise IntegrityError('Chunk id verification failed') return data diff --git a/borg/testsuite/compress.py b/borg/testsuite/compress.py new file mode 100644 index 000000000..8019925b2 --- /dev/null +++ b/borg/testsuite/compress.py @@ -0,0 +1,102 @@ +import zlib +try: + import lzma +except ImportError: + lzma = None + +import pytest + +from ..compress import get_compressor, Compressor, CNONE, ZLIB, LZ4 + + +buffer = bytes(2**16) +data = b'fooooooooobaaaaaaaar' * 10 +params = dict(name='zlib', level=6, buffer=buffer) + + +def test_get_compressor(): + c = get_compressor(name='none') + assert isinstance(c, CNONE) + c = get_compressor(name='lz4', buffer=buffer) + assert isinstance(c, LZ4) + c = get_compressor(name='zlib') + assert isinstance(c, ZLIB) + with pytest.raises(KeyError): + get_compressor(name='foobar') + + +def test_cnull(): + c = get_compressor(name='none') + cdata = c.compress(data) + assert len(cdata) > len(data) + assert data in cdata # it's not compressed and just in there 1:1 + assert data == c.decompress(cdata) + assert data == Compressor(**params).decompress(cdata) # autodetect + + +def test_lz4(): + c = get_compressor(name='lz4', buffer=buffer) + cdata = c.compress(data) + assert len(cdata) < len(data) + assert data == c.decompress(cdata) + assert data == Compressor(**params).decompress(cdata) # autodetect + + +def test_zlib(): + c = get_compressor(name='zlib') + cdata = c.compress(data) + assert len(cdata) < len(data) + assert data == c.decompress(cdata) + assert data == Compressor(**params).decompress(cdata) # autodetect + + +def test_lzma(): + if lzma is None: + pytest.skip("No lzma support found.") + c = get_compressor(name='lzma') + cdata = c.compress(data) + assert len(cdata) < len(data) + assert data == c.decompress(cdata) + assert data == Compressor(**params).decompress(cdata) # autodetect + + +def test_autodetect_invalid(): + with pytest.raises(ValueError): + Compressor(**params).decompress(b'\xff\xfftotalcrap') + with pytest.raises(ValueError): + Compressor(**params).decompress(b'\x08\x00notreallyzlib') + + +def test_zlib_compat(): + # for compatibility reasons, we do not add an extra header for zlib, + # nor do we expect one when decompressing / autodetecting + for level in range(10): + c = get_compressor(name='zlib', level=level) + cdata1 = c.compress(data) + cdata2 = zlib.compress(data, level) + assert cdata1 == cdata2 + data2 = c.decompress(cdata2) + assert data == data2 + data2 = Compressor(**params).decompress(cdata2) + assert data == data2 + + +def test_compressor(): + params_list = [ + dict(name='none', buffer=buffer), + dict(name='lz4', buffer=buffer), + dict(name='zlib', level=0, buffer=buffer), + dict(name='zlib', level=6, buffer=buffer), + dict(name='zlib', level=9, buffer=buffer), + ] + if lzma: + params_list += [ + dict(name='lzma', level=0, buffer=buffer), + dict(name='lzma', level=6, buffer=buffer), + dict(name='lzma', level=9, buffer=buffer), + ] + for params in params_list: + c = Compressor(**params) + assert data == c.decompress(c.compress(data)) + + diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 26b422b0c..76bafb5b7 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -2,11 +2,12 @@ import hashlib from time import mktime, strptime from datetime import datetime, timezone, timedelta +import pytest import msgpack from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \ prune_within, prune_split, \ - StableDict, int_to_bigint, bigint_to_int, parse_timestamp + StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec from . import BaseTestCase @@ -104,6 +105,30 @@ class PatternTestCase(BaseTestCase): ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']) +def test_compression_specs(): + with pytest.raises(ValueError): + CompressionSpec('') + assert CompressionSpec('0') == dict(name='zlib', level=0) + assert CompressionSpec('1') == dict(name='zlib', level=1) + assert CompressionSpec('9') == dict(name='zlib', level=9) + with pytest.raises(ValueError): + CompressionSpec('10') + assert CompressionSpec('none') == dict(name='none') + assert CompressionSpec('lz4') == dict(name='lz4') + assert CompressionSpec('zlib') == dict(name='zlib', level=6) + assert CompressionSpec('zlib,0') == dict(name='zlib', level=0) + assert CompressionSpec('zlib,9') == dict(name='zlib', level=9) + with pytest.raises(ValueError): + CompressionSpec('zlib,9,invalid') + assert CompressionSpec('lzma') == dict(name='lzma', level=6) + assert CompressionSpec('lzma,0') == dict(name='lzma', level=0) + assert CompressionSpec('lzma,9') == dict(name='lzma', level=9) + with pytest.raises(ValueError): + CompressionSpec('lzma,9,invalid') + with pytest.raises(ValueError): + CompressionSpec('invalid') + + class MakePathSafeTestCase(BaseTestCase): def test(self): diff --git a/docs/global.rst.inc b/docs/global.rst.inc index c0629a143..c8c490498 100644 --- a/docs/global.rst.inc +++ b/docs/global.rst.inc @@ -13,6 +13,7 @@ .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2 .. _ACL: https://en.wikipedia.org/wiki/Access_control_list .. _libacl: http://savannah.nongnu.org/projects/acl/ +.. _liblz4: https://github.com/Cyan4973/lz4 .. _OpenSSL: https://www.openssl.org/ .. _Python: http://www.python.org/ .. _Buzhash: https://en.wikipedia.org/wiki/Buzhash diff --git a/docs/installation.rst b/docs/installation.rst index 3cd4e13b6..d08863b7f 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -9,6 +9,7 @@ Installation * Python_ >= 3.2 * OpenSSL_ >= 1.0.0 * libacl_ +* liblz4_ * some python dependencies, see install_requires in setup.py General notes @@ -59,6 +60,9 @@ Some of the steps detailled below might be useful also for non-git installs. # ACL support Headers + Library apt-get install libacl1-dev libacl1 + # lz4 super fast compression support Headers + Library + apt-get install liblz4-dev liblz4-1 + # if you do not have gcc / make / etc. yet apt-get install build-essential @@ -106,13 +110,16 @@ Some of the steps detailled below might be useful also for non-git installs. # ACL support Headers + Library sudo dnf install libacl-devel libacl - + + # lz4 super fast compression support Headers + Library + sudo dnf install lz4 + # optional: FUSE support - to mount backup archives sudo dnf install fuse-devel fuse # optional: for unit testing sudo dnf install fakeroot - + # get |project_name| from github, install it git clone |git_url| @@ -148,6 +155,7 @@ You'll need at least (use the cygwin installer to fetch/install these): gcc-core git libopenssl + liblz4_1 liblz4-devel # from cygwinports.org make openssh openssl-devel diff --git a/docs/internals.rst b/docs/internals.rst index 6dfc8ba9b..845dff131 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -382,10 +382,35 @@ representation of the repository id. Compression ----------- -|project_name| currently always pipes all data through a zlib compressor which -supports compression levels 0 (no compression, fast) to 9 (high compression, slow). +|project_name| supports the following compression methods: + +- none (no compression, pass through data 1:1) +- lz4 (low compression, but super fast) +- zlib (level 0-9, level 0 is no compression [but still adding zlib overhead], + level 1 is low, level 9 is high compression) +- lzma (level 0-9, level 0 is low, level 9 is high compression). + +Speed: none > lz4 > zlib > lzma +Compression: lzma > zlib > lz4 > none + +Be careful, higher zlib and especially lzma compression levels might take a +lot of resources (CPU and memory). + +The overall speed of course also depends on the speed of your target storage. +If that is slow, using a higher compression level might yield better overall +performance. You need to experiment a bit. Maybe just watch your CPU load, if +that is relatively low, increase compression until 1 core is 70-100% loaded. + +Even if your target storage is rather fast, you might see interesting effects: +while doing no compression at all (none) is a operation that takes no time, it +likely will need to store more data to the storage compared to using lz4. +The time needed to transfer and store the additional data might be much more +than if you had used lz4 (which is super fast, but still might compress your +data about 2:1). This is assuming your data is compressible (if you backup +already compressed data, trying to compress them at backup time is usually +pointless). + +Compression is applied after deduplication, thus using different compression +methods in one repo does not influence deduplication. See ``borg create --help`` about how to specify the compression level and its default. - -Note: zlib level 0 creates a little bit more output data than it gets as input, -due to zlib protocol overhead. diff --git a/docs/quickstart.rst b/docs/quickstart.rst index fcb223503..4b78fefbb 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -89,6 +89,31 @@ certain number of old archives:: # and 6 monthly archives. borg prune -v $REPOSITORY --keep-daily=7 --keep-weekly=4 --keep-monthly=6 +.. backup_compression: + +Backup compression +------------------ + +Default is no compression, but we support different methods with high speed +or high compression: + +If you have a quick repo storage and you want a little compression: + + $ borg create --compression lz4 /mnt/backup::repo ~ + +If you have a medium fast repo storage and you want a bit more compression (N=0..9, +0 means no compression, 9 means high compression): + + $ borg create --compression zlib,N /mnt/backup::repo ~ + +If you have a very slow repo storage and you want high compression (N=0..9, 0 means +low compression, 9 means high compression): + + $ borg create --compression lzma,N /mnt/backup::repo ~ + +You'll need to experiment a bit to find the best compression for your use case. +Keep an eye on CPU load and throughput. + .. _encrypted_repos: Repository encryption diff --git a/docs/support.rst b/docs/support.rst index 5e953f202..f53c01285 100644 --- a/docs/support.rst +++ b/docs/support.rst @@ -4,6 +4,9 @@ Support ======= +Please first read the docs and the FAQ section in the docs, a lot of stuff is +documented / explained there. + Issue Tracker ------------- diff --git a/docs/usage.rst b/docs/usage.rst index fcbee5fef..c4e2fa80f 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -76,8 +76,11 @@ Resource Usage |project_name| might use a lot of resources depending on the size of the data set it is dealing with. CPU: it won't go beyond 100% of 1 core as the code is currently single-threaded. + Especially higher zlib and lzma compression levels use significant amounts of CPU cycles. Memory (RAM): the chunks index and the files index are read into memory for performance reasons. + compression, esp. lzma compression with high levels might need substantial amounts + of memory. Temporary files: reading data and metadata from a FUSE mounted repository will consume about the same space as the deduplicated chunks used to represent them in the repository. @@ -175,6 +178,18 @@ Examples # Backup a raw device (must not be active/in use/mounted at that time) $ dd if=/dev/sda bs=10M | borg create /mnt/backup::my-sda - + # No compression (default) + $ borg create /mnt/backup::repo ~ + + # Super fast, low compression + $ borg create --compression lz4 /mnt/backup::repo ~ + + # Less fast, higher compression (N = 0..9) + $ borg create --compression zlib,N /mnt/backup::repo ~ + + # Even slower, even higher compression (N = 0..9) + $ borg create --compression lzma,N /mnt/backup::repo ~ + .. include:: usage/extract.rst.inc diff --git a/setup.py b/setup.py index edd75dc1a..87de52b71 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ if sys.version_info < min_python: from setuptools import setup, Extension +compress_source = 'borg/compress.pyx' crypto_source = 'borg/crypto.pyx' chunker_source = 'borg/chunker.pyx' hashindex_source = 'borg/hashindex.pyx' @@ -38,6 +39,7 @@ try: def make_distribution(self): self.filelist.extend([ + 'borg/compress.c', 'borg/crypto.c', 'borg/chunker.c', 'borg/_chunker.c', 'borg/hashindex.c', 'borg/_hashindex.c', @@ -52,6 +54,7 @@ except ImportError: def __init__(self, *args, **kwargs): raise Exception('Cython is required to run sdist') + compress_source = compress_source.replace('.pyx', '.c') crypto_source = crypto_source.replace('.pyx', '.c') chunker_source = chunker_source.replace('.pyx', '.c') hashindex_source = hashindex_source.replace('.pyx', '.c') @@ -59,7 +62,9 @@ except ImportError: platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c') platform_darwin_source = platform_darwin_source.replace('.pyx', '.c') from distutils.command.build_ext import build_ext - if not all(os.path.exists(path) for path in [crypto_source, chunker_source, hashindex_source, platform_linux_source, platform_freebsd_source]): + if not all(os.path.exists(path) for path in [ + compress_source, crypto_source, chunker_source, hashindex_source, + platform_linux_source, platform_freebsd_source]): raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version') @@ -89,6 +94,7 @@ cmdclass = versioneer.get_cmdclass() cmdclass.update({'build_ext': build_ext, 'sdist': Sdist}) ext_modules = [ + Extension('borg.compress', [compress_source], libraries=['lz4']), Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs), Extension('borg.chunker', [chunker_source]), Extension('borg.hashindex', [hashindex_source])