From 5a7c02729674811e2bee634794fd906ad8c6fefe Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 24 Apr 2016 02:25:04 +0200 Subject: [PATCH 01/28] vagrant: OS X update osxfuse / fix lzma, fixes #933 --- Vagrantfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index cecd8341c..59ce9f5bb 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -54,14 +54,15 @@ def packages_darwin # install all the (security and other) updates sudo softwareupdate --install --all # get osxfuse 3.0.x pre-release code from github: - curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.0.9/osxfuse-3.0.9.dmg >osxfuse.dmg + curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.2.0/osxfuse-3.2.0.dmg >osxfuse.dmg MOUNTDIR=$(echo `hdiutil mount osxfuse.dmg | tail -1 | awk '{$1="" ; print $0}'` | xargs -0 echo) \ - && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.0.9.pkg" -target / + && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.2.0.pkg" -target / sudo chown -R vagrant /usr/local # brew must be able to create stuff here ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" brew update brew install openssl brew install lz4 + brew install xz # required for python lzma module brew install fakeroot brew install git brew install pkgconfig From 0a30d40a9d4ae5b5df5b6963ad2431854f42c467 Mon Sep 17 00:00:00 2001 From: anarcat Date: Mon, 25 Apr 2016 22:54:36 -0400 Subject: [PATCH 02/28] fix debian sid cross-reference in install docs --- docs/installation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/installation.rst b/docs/installation.rst index fae4517b3..e1456d06c 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -49,7 +49,7 @@ Ubuntu `16.04`_, backports (PPA): `15.10`_, `14.04`_ ``apt install borgbac .. _[community]: https://www.archlinux.org/packages/?name=borg .. _jessie-backports: https://packages.debian.org/jessie-backports/borgbackup .. _stretch: https://packages.debian.org/stretch/borgbackup -.. _unstable/sid: https://packages.debian.org/sid/borgbackup +.. _sid: https://packages.debian.org/sid/borgbackup .. _ebuild: https://packages.gentoo.org/packages/app-backup/borgbackup .. _Ports-Tree: http://www.freshports.org/archivers/py-borgbackup/ .. _pkgsrc: http://pkgsrc.se/sysutils/py-borgbackup From f20a78cda8828c3e2fa264d23670be2f6b8dc632 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 19 Apr 2016 01:13:10 +0200 Subject: [PATCH 03/28] flexible compression --- borg/archive.py | 31 ++++++++++------ borg/archiver.py | 16 +++++--- borg/helpers.py | 75 ++++++++++++++++++++++++++++++++++++++ borg/key.py | 22 +++++++---- borg/testsuite/helpers.py | 52 +++++++++++++++++++++++++- docs/misc/compression.conf | 56 ++++++++++++++++++++++++++++ 6 files changed, 226 insertions(+), 26 deletions(-) create mode 100644 docs/misc/compression.conf diff --git a/borg/archive.py b/borg/archive.py index d37fb09eb..ef41c9900 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -15,13 +15,14 @@ import sys import time from io import BytesIO from . import xattr -from .compress import Compressor, COMPR_BUFFER +from .compress import COMPR_BUFFER from .constants import * # NOQA from .helpers import Chunk, Error, uid2user, user2uid, gid2group, group2gid, \ parse_timestamp, to_localtime, format_time, format_timedelta, safe_encode, safe_decode, \ Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \ ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \ - PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume + PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume, \ + CompressionDecider1, CompressionDecider2, CompressionSpec from .repository import Repository from .platform import acl_get, acl_set from .chunker import Chunker @@ -125,7 +126,7 @@ class Archive: def __init__(self, repository, key, manifest, name, cache=None, create=False, checkpoint_interval=300, numeric_owner=False, progress=False, - chunker_params=CHUNKER_PARAMS, start=None, end=None): + chunker_params=CHUNKER_PARAMS, start=None, end=None, compression=None, compression_files=None): self.cwd = os.getcwd() self.key = key self.repository = repository @@ -148,6 +149,9 @@ class Archive: if create: self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats) self.chunker = Chunker(self.key.chunk_seed, *chunker_params) + self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'), + compression_files or []) + key.compression_decider2 = CompressionDecider2(compression or CompressionSpec('none')) if name in manifest.archives: raise self.AlreadyExists(name) self.last_checkpoint = time.time() @@ -592,11 +596,15 @@ Number of files: {0.stats.nfiles}'''.format( } # Only chunkify the file if needed if chunks is None: + compress = self.compression_decider1.decide(path) + logger.debug('%s -> compression %s', path, compress['name']) fh = Archive._open_rb(path) with os.fdopen(fh, 'rb') as fd: chunks = [] for data in self.chunker.chunkify(fd, fh): - chunks.append(cache.add_chunk(self.key.id_hash(data), Chunk(data), self.stats)) + chunks.append(cache.add_chunk(self.key.id_hash(data), + Chunk(data, compress=compress), + self.stats)) if self.show_progress: self.stats.show_progress(item=item, dt=0.2) cache.memorize_file(path_hash, st, [c.id for c in chunks]) @@ -939,7 +947,7 @@ class ArchiveRecreater: def __init__(self, repository, manifest, key, cache, matcher, exclude_caches=False, exclude_if_present=None, keep_tag_files=False, - chunker_params=None, compression=None, + chunker_params=None, compression=None, compression_files=None, dry_run=False, stats=False, progress=False, file_status_printer=None): self.repository = repository self.key = key @@ -952,12 +960,12 @@ class ArchiveRecreater: self.keep_tag_files = keep_tag_files self.chunker_params = chunker_params or CHUNKER_PARAMS - self.compression = compression or dict(name='none') - self.seen_chunks = set() self.recompress = bool(compression) - compr_args = dict(buffer=COMPR_BUFFER) - compr_args.update(self.compression) - key.compressor = Compressor(**compr_args) + self.compression = compression or CompressionSpec('none') + self.seen_chunks = set() + self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'), + compression_files or []) + key.compression_decider2 = CompressionDecider2(compression or CompressionSpec('none')) self.autocommit_threshold = max(self.AUTOCOMMIT_THRESHOLD, self.cache.chunks_stored_size() / 100) logger.debug("Autocommit threshold: %s", format_file_size(self.autocommit_threshold)) @@ -1045,6 +1053,7 @@ class ArchiveRecreater: def process_chunks(self, archive, target, item): """Return new chunk ID list for 'item'.""" + # TODO: support --compression-from if not self.recompress and not target.recreate_rechunkify: for chunk_id, size, csize in item[b'chunks']: self.cache.chunk_incref(chunk_id, target.stats) @@ -1239,7 +1248,7 @@ class ArchiveRecreater: def create_target_archive(self, name): target = Archive(self.repository, self.key, self.manifest, name, create=True, progress=self.progress, chunker_params=self.chunker_params, cache=self.cache, - checkpoint_interval=0) + checkpoint_interval=0, compression=self.compression) target.recreate_partial_chunks = None target.recreate_uncomitted_bytes = 0 return target diff --git a/borg/archiver.py b/borg/archiver.py index 6a68eaa4f..afe9162f4 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -279,14 +279,12 @@ class Archiver: dry_run = args.dry_run t0 = datetime.utcnow() if not dry_run: - compr_args = dict(buffer=COMPR_BUFFER) - compr_args.update(args.compression) - key.compressor = Compressor(**compr_args) with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache: archive = Archive(repository, key, manifest, args.location.archive, cache=cache, create=True, checkpoint_interval=args.checkpoint_interval, numeric_owner=args.numeric_owner, progress=args.progress, - chunker_params=args.chunker_params, start=t0) + chunker_params=args.chunker_params, start=t0, + compression=args.compression, compression_files=args.compression_files) create_inner(archive, cache) else: create_inner(None, None) @@ -868,8 +866,8 @@ class Archiver: recreater = ArchiveRecreater(repository, manifest, key, cache, matcher, exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present, - keep_tag_files=args.keep_tag_files, - compression=args.compression, chunker_params=args.chunker_params, + keep_tag_files=args.keep_tag_files, chunker_params=args.chunker_params, + compression=args.compression, compression_files=args.compression_files, progress=args.progress, stats=args.stats, file_status_printer=self.print_file_status, dry_run=args.dry_run) @@ -1349,6 +1347,9 @@ class Archiver: 'zlib,0 .. zlib,9 == zlib (with level 0..9),\n' 'lzma == lzma (default level 6),\n' 'lzma,0 .. lzma,9 == lzma (with level 0..9).') + archive_group.add_argument('--compression-from', dest='compression_files', + type=argparse.FileType('r'), action='append', + metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line') subparser.add_argument('location', metavar='ARCHIVE', type=location_validator(archive=True), @@ -1815,6 +1816,9 @@ class Archiver: 'zlib,0 .. zlib,9 == zlib (with level 0..9),\n' 'lzma == lzma (default level 6),\n' 'lzma,0 .. lzma,9 == lzma (with level 0..9).') + archive_group.add_argument('--compression-from', dest='compression_files', + type=argparse.FileType('r'), action='append', + metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line') archive_group.add_argument('--chunker-params', dest='chunker_params', type=ChunkerParams, default=None, metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE', diff --git a/borg/helpers.py b/borg/helpers.py index 994ad9a3e..b134ba86b 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -31,6 +31,7 @@ from . import hashindex from . import chunker from .constants import * # NOQA from . import crypto +from .compress import COMPR_BUFFER from . import shellpattern import msgpack import msgpack.fallback @@ -1423,3 +1424,77 @@ except ImportError: def scandir_inorder(path='.'): return sorted(scandir(path), key=lambda dirent: dirent.inode()) + + +def clean_lines(lines, lstrip=None, rstrip=None, remove_empty=True, remove_comments=True): + """ + clean lines (usually read from a config file): + + 1. strip whitespace (left and right), 2. remove empty lines, 3. remove comments. + + note: only "pure comment lines" are supported, no support for "trailing comments". + + :param lines: input line iterator (e.g. list or open text file) that gives unclean input lines + :param lstrip: lstrip call arguments or False, if lstripping is not desired + :param rstrip: rstrip call arguments or False, if rstripping is not desired + :param remove_comments: remove comment lines (lines starting with "#") + :param remove_empty: remove empty lines + :return: yields processed lines + """ + for line in lines: + if lstrip is not False: + line = line.lstrip(lstrip) + if rstrip is not False: + line = line.rstrip(rstrip) + if remove_empty and not line: + continue + if remove_comments and line.startswith('#'): + continue + yield line + + +class CompressionDecider1: + def __init__(self, compression, compression_files): + """ + Initialize a CompressionDecider instance (and read config files, if needed). + + :param compression: default CompressionSpec (e.g. from --compression option) + :param compression_files: list of compression config files (e.g. from --compression-from) or + a list of other line iterators + """ + self.compression = compression + if not compression_files: + self.matcher = None + else: + self.matcher = PatternMatcher(fallback=compression) + for file in compression_files: + try: + for line in clean_lines(file): + try: + compr_spec, fn_pattern = line.split(':', 1) + except: + continue + self.matcher.add([parse_pattern(fn_pattern)], CompressionSpec(compr_spec)) + finally: + if hasattr(file, 'close'): + file.close() + + def decide(self, path): + if self.matcher is not None: + return self.matcher.match(path) + return self.compression + + +class CompressionDecider2: + def __init__(self, compression): + self.compression = compression + + def decide(self, chunk): + # nothing fancy here yet: we either use what the metadata says or the default + # later, we can decide based on the chunk data also. + # if we compress the data here to decide, we can even update the chunk data + # and modify the metadata as desired. + compr_spec = chunk.meta.get('compress', self.compression) + compr_args = dict(buffer=COMPR_BUFFER) + compr_args.update(compr_spec) + return compr_args, chunk diff --git a/borg/key.py b/borg/key.py index ad960b796..0786a49da 100644 --- a/borg/key.py +++ b/borg/key.py @@ -7,13 +7,13 @@ import textwrap from hmac import compare_digest from hashlib import sha256, pbkdf2_hmac -from .helpers import Chunk, IntegrityError, get_keys_dir, Error, yes, bin_to_hex +from .helpers import Chunk, IntegrityError, get_keys_dir, Error, yes, bin_to_hex, CompressionDecider2, CompressionSpec from .logger import create_logger logger = create_logger() from .constants import * # NOQA from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks, hmac_sha256 -from .compress import Compressor, COMPR_BUFFER +from .compress import Compressor, COMPR_BUFFER, get_compressor import msgpack PREFIX = b'\0' * 8 @@ -71,12 +71,20 @@ class KeyBase: self.TYPE_STR = bytes([self.TYPE]) self.repository = repository self.target = None # key location file path / repo obj - self.compressor = Compressor('none', buffer=COMPR_BUFFER) + self.compression_decider2 = CompressionDecider2(CompressionSpec('none')) + self.compressor = Compressor('none', buffer=COMPR_BUFFER) # for decompression def id_hash(self, data): """Return HMAC hash using the "id" HMAC key """ + def compress(self, chunk): + compr_args, chunk = self.compression_decider2.decide(chunk) + compressor = Compressor(**compr_args) + meta, data = chunk + data = compressor.compress(data) + return Chunk(data, **meta) + def encrypt(self, chunk): pass @@ -102,8 +110,8 @@ class PlaintextKey(KeyBase): return sha256(data).digest() def encrypt(self, chunk): - meta, data = chunk - return b''.join([self.TYPE_STR, self.compressor.compress(data)]) + chunk = self.compress(chunk) + return b''.join([self.TYPE_STR, chunk.data]) def decrypt(self, id, data): if data[0] != self.TYPE: @@ -135,9 +143,9 @@ class AESKeyBase(KeyBase): return hmac_sha256(self.id_key, data) def encrypt(self, chunk): - data = self.compressor.compress(chunk.data) + chunk = self.compress(chunk) self.enc_cipher.reset() - data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data))) + data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(chunk.data))) hmac = hmac_sha256(self.enc_hmac_key, data) return b''.join((self.TYPE_STR, hmac, data)) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index c0b9a049a..c86a0b3b2 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -10,11 +10,12 @@ import msgpack import msgpack.fallback import time -from ..helpers import Location, format_file_size, format_timedelta, make_path_safe, \ +from ..helpers import Location, format_file_size, format_timedelta, make_path_safe, clean_lines, \ prune_within, prune_split, get_cache_dir, get_keys_dir, Statistics, is_slow_msgpack, \ yes, TRUISH, FALSISH, DEFAULTISH, \ - StableDict, int_to_bigint, bigint_to_int, bin_to_hex, parse_timestamp, CompressionSpec, ChunkerParams, Chunk, \ + StableDict, int_to_bigint, bigint_to_int, bin_to_hex, parse_timestamp, ChunkerParams, Chunk, \ ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \ + CompressionSpec, CompressionDecider1, CompressionDecider2, \ PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, partial_format, ChunkIteratorFileWrapper from . import BaseTestCase, environment_variable, FakeInputs @@ -915,3 +916,50 @@ def test_chunk_file_wrapper(): cfw = ChunkIteratorFileWrapper(iter([])) assert cfw.read(2) == b'' assert cfw.exhausted + + +def test_clean_lines(): + conf = """\ +#comment +data1 #data1 +data2 + + data3 +""".splitlines(keepends=True) + assert list(clean_lines(conf)) == ['data1 #data1', 'data2', 'data3', ] + assert list(clean_lines(conf, lstrip=False)) == ['data1 #data1', 'data2', ' data3', ] + assert list(clean_lines(conf, rstrip=False)) == ['data1 #data1\n', 'data2\n', 'data3\n', ] + assert list(clean_lines(conf, remove_empty=False)) == ['data1 #data1', 'data2', '', 'data3', ] + assert list(clean_lines(conf, remove_comments=False)) == ['#comment', 'data1 #data1', 'data2', 'data3', ] + + +def test_compression_decider1(): + default = CompressionSpec('zlib') + conf = """ +# use super-fast lz4 compression on huge VM files in this path: +lz4:/srv/vm_disks + +# jpeg or zip files do not compress: +none:*.jpeg +none:*.zip +""".splitlines() + + cd = CompressionDecider1(default, []) # no conf, always use default + assert cd.decide('/srv/vm_disks/linux')['name'] == 'zlib' + assert cd.decide('test.zip')['name'] == 'zlib' + assert cd.decide('test')['name'] == 'zlib' + + cd = CompressionDecider1(default, [conf, ]) + assert cd.decide('/srv/vm_disks/linux')['name'] == 'lz4' + assert cd.decide('test.zip')['name'] == 'none' + assert cd.decide('test')['name'] == 'zlib' # no match in conf, use default + + +def test_compression_decider2(): + default = CompressionSpec('zlib') + + cd = CompressionDecider2(default) + compr_spec, chunk = cd.decide(Chunk(None)) + assert compr_spec['name'] == 'zlib' + compr_spec, chunk = cd.decide(Chunk(None, compress=CompressionSpec('lzma'))) + assert compr_spec['name'] == 'lzma' diff --git a/docs/misc/compression.conf b/docs/misc/compression.conf new file mode 100644 index 000000000..881f5fe9a --- /dev/null +++ b/docs/misc/compression.conf @@ -0,0 +1,56 @@ +# example config file for --compression-from option +# +# Format of non-comment / non-empty lines: +# : +# compression-spec is same format as for --compression option +# path/filename pattern is same format as for --exclude option + +# archives / files: +none:*.gz +none:*.tgz +none:*.bz2 +none:*.tbz2 +none:*.xz +none:*.txz +none:*.lzma +none:*.lzo +none:*.zip +none:*.rar +none:*.7z + +# audio: +none:*.mp3 +none:*.ogg +none:*.oga +none:*.flac +none:*.aac +none:*.m4a + +# video: +none:*.mp4 +none:*.mkv +none:*.m4v +none:*.avi +none:*.mpg +none:*.mpeg +none:*.webm +none:*.vob +none:*.ts +none:*.ogv +none:*.mov +none:*.flv +none:*.ogm + +# pictures/images +none:*.jpg +none:*.jpeg +none:*.png +none:*.gif + +# disk images +none:*.dmg + +# software archives +none:*.rpm +none:*.deb +none:*.msi From 8d3b1a5804c0a4d492a6afae397a97d2e7fa5a44 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Thu, 28 Apr 2016 00:06:19 +0200 Subject: [PATCH 04/28] Add self tests These run in ~100 ms here, so even on much slower machines (where also Python setup will be slower) it shouldn't be noticeable at all. --- borg/archiver.py | 7 +++- borg/selftest.py | 79 +++++++++++++++++++++++++++++++++++++ borg/testsuite/__init__.py | 17 +++++--- borg/testsuite/archiver.py | 1 + borg/testsuite/chunker.py | 3 ++ borg/testsuite/conftest.py | 4 ++ borg/testsuite/crypto.py | 3 ++ borg/testsuite/hashindex.py | 32 ++++++++------- 8 files changed, 125 insertions(+), 21 deletions(-) create mode 100644 borg/selftest.py create mode 100644 borg/testsuite/conftest.py diff --git a/borg/archiver.py b/borg/archiver.py index c56da85f8..b3927de84 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -34,6 +34,7 @@ from .constants import * # NOQA from .key import key_creator, RepoKey, PassphraseKey from .archive import Archive, ArchiveChecker, ArchiveRecreater from .remote import RepositoryServer, RemoteRepository, cache_if_remote +from .selftest import selftest from .hashindex import ChunkIndexEntry has_lchflags = hasattr(os, 'lchflags') @@ -1901,13 +1902,17 @@ class Archiver: update_excludes(args) return args + def prerun_checks(self, logger): + check_extension_modules() + selftest(logger) + def run(self, args): os.umask(args.umask) # early, before opening files self.lock_wait = args.lock_wait setup_logging(level=args.log_level, is_serve=args.func == self.do_serve) # do not use loggers before this! if args.show_version: logger.info('borgbackup version %s' % __version__) - check_extension_modules() + self.prerun_checks(logger) if is_slow_msgpack(): logger.warning("Using a pure-python msgpack! This will result in lower performance.") return args.func(args) diff --git a/borg/selftest.py b/borg/selftest.py new file mode 100644 index 000000000..2093b89a9 --- /dev/null +++ b/borg/selftest.py @@ -0,0 +1,79 @@ +""" +Self testing module +=================== + +The selftest() function runs a small test suite of relatively fast tests that are meant to discover issues +with the way Borg was compiled or packaged and also bugs in Borg itself. + +Theses tests are a subset of the borg/testsuite and are run with Pythons built-in unittest, hence none of +the tests used for this can or should be ported to py.test currently. + +To assert that self test discovery works correctly the number of tests is kept in the SELFTEST_COUNT +variable. SELFTEST_COUNT must be updated if new tests are added or removed to or from any of the tests +used here. +""" + + +import sys +import time +from unittest import TestResult, TestSuite, defaultTestLoader + +from .testsuite.hashindex import HashIndexDataTestCase, HashIndexRefcountingTestCase, HashIndexTestCase +from .testsuite.crypto import CryptoTestCase +from .testsuite.chunker import ChunkerTestCase + +SELFTEST_CASES = [ + HashIndexDataTestCase, + HashIndexRefcountingTestCase, + HashIndexTestCase, + CryptoTestCase, + ChunkerTestCase, +] + +SELFTEST_COUNT = 27 + + +class SelfTestResult(TestResult): + def __init__(self): + super().__init__() + self.successes = [] + + def addSuccess(self, test): + super().addSuccess(test) + self.successes.append(test) + + def test_name(self, test): + return test.shortDescription() or str(test) + + def log_results(self, logger): + for test, failure in self.errors + self.failures + self.unexpectedSuccesses: + logger.error('self test %s FAILED:\n%s', self.test_name(test), failure) + for test, reason in self.skipped: + logger.warning('self test %s skipped: %s', self.test_name(test), reason) + + def successful_test_count(self): + return len(self.successes) + + +def selftest(logger): + selftest_started = time.perf_counter() + result = SelfTestResult() + test_suite = TestSuite() + for test_case in SELFTEST_CASES: + test_suite.addTest(defaultTestLoader.loadTestsFromTestCase(test_case)) + test_suite.run(result) + result.log_results(logger) + successful_tests = result.successful_test_count() + count_mismatch = successful_tests != SELFTEST_COUNT + if result.wasSuccessful() and count_mismatch: + # only print this if all tests succeeded + logger.error("self test count (%d != %d) mismatch, either test discovery is broken or a test was added " + "without updating borg.selftest", + successful_tests, SELFTEST_COUNT) + if not result.wasSuccessful() or count_mismatch: + logger.error("self test failed\n" + "This is a bug either in Borg or in the package / distribution you use.") + sys.exit(2) + assert False, "sanity assertion failed: ran beyond sys.exit()" + selftest_elapsed = time.perf_counter() - selftest_started + logger.debug("%d self tests completed in %.2f seconds", successful_tests, selftest_elapsed) diff --git a/borg/testsuite/__init__.py b/borg/testsuite/__init__.py index 5c1a0a6fd..cccf97a82 100644 --- a/borg/testsuite/__init__.py +++ b/borg/testsuite/__init__.py @@ -8,7 +8,8 @@ import sysconfig import time import unittest from ..xattr import get_all -from ..logger import setup_logging + +# Note: this is used by borg.selftest, do not use or import py.test functionality here. try: import llfuse @@ -17,6 +18,11 @@ try: except ImportError: have_fuse_mtime_ns = False +try: + from pytest import raises +except ImportError: + raises = None + has_lchflags = hasattr(os, 'lchflags') @@ -31,9 +37,6 @@ else: if sys.platform.startswith('netbsd'): st_mtime_ns_round = -4 # only >1 microsecond resolution here? -# Ensure that the loggers exist for all tests -setup_logging() - class BaseTestCase(unittest.TestCase): """ @@ -42,9 +45,13 @@ class BaseTestCase(unittest.TestCase): assert_not_in = unittest.TestCase.assertNotIn assert_equal = unittest.TestCase.assertEqual assert_not_equal = unittest.TestCase.assertNotEqual - assert_raises = unittest.TestCase.assertRaises assert_true = unittest.TestCase.assertTrue + if raises: + assert_raises = staticmethod(raises) + else: + assert_raises = unittest.TestCase.assertRaises + @contextmanager def assert_creates_file(self, path): self.assert_true(not os.path.exists(path), '{} should not exist'.format(path)) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 115e14736..c57e397df 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -62,6 +62,7 @@ def exec_cmd(*args, archiver=None, fork=False, exe=None, **kw): sys.stdout = sys.stderr = output = StringIO() if archiver is None: archiver = Archiver() + archiver.prerun_checks = lambda *args: None archiver.exit_code = EXIT_SUCCESS args = archiver.parse_args(list(args)) ret = archiver.run(args) diff --git a/borg/testsuite/chunker.py b/borg/testsuite/chunker.py index 0db7203d5..2a14bd604 100644 --- a/borg/testsuite/chunker.py +++ b/borg/testsuite/chunker.py @@ -4,6 +4,9 @@ from ..chunker import Chunker, buzhash, buzhash_update from ..constants import * # NOQA from . import BaseTestCase +# Note: these tests are part of the self test, do not use or import py.test functionality here. +# See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT + class ChunkerTestCase(BaseTestCase): diff --git a/borg/testsuite/conftest.py b/borg/testsuite/conftest.py new file mode 100644 index 000000000..0c350fb7f --- /dev/null +++ b/borg/testsuite/conftest.py @@ -0,0 +1,4 @@ +from ..logger import setup_logging + +# Ensure that the loggers exist for all tests +setup_logging() diff --git a/borg/testsuite/crypto.py b/borg/testsuite/crypto.py index 9609e259a..e3eff8bec 100644 --- a/borg/testsuite/crypto.py +++ b/borg/testsuite/crypto.py @@ -3,6 +3,9 @@ from binascii import hexlify, unhexlify from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, hmac_sha256 from . import BaseTestCase +# Note: these tests are part of the self test, do not use or import py.test functionality here. +# See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT + class CryptoTestCase(BaseTestCase): diff --git a/borg/testsuite/hashindex.py b/borg/testsuite/hashindex.py index 3aac0c7db..000dfe4c3 100644 --- a/borg/testsuite/hashindex.py +++ b/borg/testsuite/hashindex.py @@ -1,15 +1,16 @@ import base64 import hashlib import os -import struct import tempfile import zlib -import pytest from ..hashindex import NSIndex, ChunkIndex from .. import hashindex from . import BaseTestCase +# Note: these tests are part of the self test, do not use or import py.test functionality here. +# See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT + def H(x): # make some 32byte long thing that depends on x @@ -194,7 +195,7 @@ class HashIndexRefcountingTestCase(BaseTestCase): def test_decref_zero(self): idx1 = ChunkIndex() idx1[H(1)] = 0, 0, 0 - with pytest.raises(AssertionError): + with self.assert_raises(AssertionError): idx1.decref(H(1)) def test_incref_decref(self): @@ -208,18 +209,18 @@ class HashIndexRefcountingTestCase(BaseTestCase): def test_setitem_raises(self): idx1 = ChunkIndex() - with pytest.raises(AssertionError): + with self.assert_raises(AssertionError): idx1[H(1)] = hashindex.MAX_VALUE + 1, 0, 0 def test_keyerror(self): idx = ChunkIndex() - with pytest.raises(KeyError): + with self.assert_raises(KeyError): idx.incref(H(1)) - with pytest.raises(KeyError): + with self.assert_raises(KeyError): idx.decref(H(1)) - with pytest.raises(KeyError): + with self.assert_raises(KeyError): idx[H(1)] - with pytest.raises(OverflowError): + with self.assert_raises(OverflowError): idx.add(H(1), -1, 0, 0) @@ -269,10 +270,11 @@ class HashIndexDataTestCase(BaseTestCase): assert idx1[H(3)] == (hashindex.MAX_VALUE, 6, 7) -def test_nsindex_segment_limit(): - idx = NSIndex() - with pytest.raises(AssertionError): - idx[H(1)] = hashindex.MAX_VALUE + 1, 0 - assert H(1) not in idx - idx[H(2)] = hashindex.MAX_VALUE, 0 - assert H(2) in idx +class NSIndexTestCase(BaseTestCase): + def test_nsindex_segment_limit(self): + idx = NSIndex() + with self.assert_raises(AssertionError): + idx[H(1)] = hashindex.MAX_VALUE + 1, 0 + assert H(1) not in idx + idx[H(2)] = hashindex.MAX_VALUE, 0 + assert H(2) in idx From fcd323f22ac8f1c8facf5f0306fd7026f2399726 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sat, 23 Apr 2016 11:39:42 +0200 Subject: [PATCH 05/28] Bump API version of crypto.pyx (overlooked in c5bd7f2) --- borg/crypto.pyx | 2 +- borg/helpers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/crypto.pyx b/borg/crypto.pyx index 8bee39fe4..16f1cda82 100644 --- a/borg/crypto.pyx +++ b/borg/crypto.pyx @@ -5,7 +5,7 @@ This could be replaced by PyCrypto maybe? from libc.stdlib cimport malloc, free from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release -API_VERSION = 2 +API_VERSION = 3 cdef extern from "openssl/rand.h": diff --git a/borg/helpers.py b/borg/helpers.py index 9259c8f81..fb61c49e5 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -79,7 +79,7 @@ def check_extension_modules(): raise ExtensionModuleError if chunker.API_VERSION != 2: raise ExtensionModuleError - if crypto.API_VERSION != 2: + if crypto.API_VERSION != 3: raise ExtensionModuleError if platform.API_VERSION != 2: raise ExtensionModuleError From 4b73ebc613b10f2a5ed6f15dd5d99ba89d6c2f20 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 29 Apr 2016 03:19:27 +0200 Subject: [PATCH 06/28] update readthedocs urls, fixes #991 --- README.rst | 2 +- docs/resources.rst | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index c0eed00ce..2417b24d5 100644 --- a/README.rst +++ b/README.rst @@ -107,7 +107,7 @@ Now doing another backup, just to show off the great deduplication:: ----------------------------------------------------------------------------- -For a graphical frontend refer to our complementary project `BorgWeb `_. +For a graphical frontend refer to our complementary project `BorgWeb `_. Links ===== diff --git a/docs/resources.rst b/docs/resources.rst index 4113c11d4..59fa0310a 100644 --- a/docs/resources.rst +++ b/docs/resources.rst @@ -36,6 +36,6 @@ Some of them refer to attic, but you can do the same stuff (and more) with borgb Software -------- -- `BorgWeb - a very simple web UI for BorgBackup `_ +- `BorgWeb - a very simple web UI for BorgBackup `_ - some other stuff found at the `BorgBackup Github organisation `_ - `atticmatic `_ (includes borgmatic) diff --git a/setup.py b/setup.py index 23d8b7cda..ff9899da5 100644 --- a/setup.py +++ b/setup.py @@ -236,7 +236,7 @@ setup( }, author='The Borg Collective (see AUTHORS file)', author_email='borgbackup@python.org', - url='https://borgbackup.readthedocs.org/', + url='https://borgbackup.readthedocs.io/', description='Deduplicated, encrypted, authenticated and compressed backups', long_description=long_description, license='BSD', From 700e715b23eda3ef57a60bc651cabbb82f46cbf5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 30 Apr 2016 00:32:25 +0200 Subject: [PATCH 07/28] add missing docs for borg break-lock, fixes #992 --- docs/usage.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/usage.rst b/docs/usage.rst index a9e5bda75..e14f5dfff 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -540,6 +540,9 @@ Examples no key file found for repository +.. include:: usage/break-lock.rst.inc + + Miscellaneous Help ------------------ From 240e5696b715277927842788801ff1e05bcd1c2b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 1 May 2016 20:18:17 +0200 Subject: [PATCH 08/28] fix vagrant ssh syntax, fixes #999 --- docs/development.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/development.rst b/docs/development.rst index 524957e01..7d6323dce 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -122,7 +122,9 @@ Usage:: # To create and provision the VM: vagrant up OS # To create an ssh session to the VM: - vagrant ssh OS command + vagrant ssh OS + # To execute a command via ssh in the VM: + vagrant ssh OS -c "command args" # To shut down the VM: vagrant halt OS # To shut down and destroy the VM: From a5ba0abe42aed8e00d5092d430ead0eee90bc849 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Sipma?= Date: Fri, 22 Apr 2016 19:48:26 +0200 Subject: [PATCH 09/28] support new env var 'BORG_KEY_FILE' --- borg/key.py | 32 ++++++++++++++++++++++++++++---- docs/usage.rst | 2 ++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/borg/key.py b/borg/key.py index 0786a49da..81bf69756 100644 --- a/borg/key.py +++ b/borg/key.py @@ -35,6 +35,14 @@ class KeyfileNotFoundError(Error): """No key file for repository {} found in {}.""" +class KeyfileInvalidError(Error): + """Invalid key file for repository {} found in {}.""" + + +class KeyfileMismatchError(Error): + """Mismatch between repository {} and key file {}.""" + + class RepoKeyNotFoundError(Error): """No key entry found in the config of repository {}.""" @@ -404,17 +412,33 @@ class KeyfileKey(KeyfileKeyBase): TYPE = 0x00 FILE_ID = 'BORG_KEY' + def sanity_check(self, filename, id): + with open(filename, 'r') as fd: + line = fd.readline().strip() + if not line.startswith(self.FILE_ID): + raise KeyfileInvalidError(self.repository._location.canonical_path(), filename) + if line[len(self.FILE_ID) + 1:] != id: + raise KeyfileMismatchError(self.repository._location.canonical_path(), filename) + return filename + def find_key(self): + id = self.repository.id_str + keyfile = os.environ.get('BORG_KEY_FILE') + if keyfile: + return self.sanity_check(keyfile, id) keys_dir = get_keys_dir() for name in os.listdir(keys_dir): filename = os.path.join(keys_dir, name) - with open(filename, 'r') as fd: - line = fd.readline().strip() - if line.startswith(self.FILE_ID) and line[len(self.FILE_ID) + 1:] == self.repository.id_str: - return filename + try: + return self.sanity_check(filename, id) + except (KeyfileInvalidError, KeyfileMismatchError): + pass raise KeyfileNotFoundError(self.repository._location.canonical_path(), get_keys_dir()) def get_new_target(self, args): + keyfile = os.environ.get('BORG_KEY_FILE') + if keyfile: + return keyfile filename = args.location.to_key_filename() path = filename i = 1 diff --git a/docs/usage.rst b/docs/usage.rst index 64ee75076..9cbb4503d 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -77,6 +77,8 @@ General: When set, use the value to answer the passphrase question for encrypted repositories. BORG_DISPLAY_PASSPHRASE When set, use the value to answer the "display the passphrase for verification" question when defining a new passphrase for encrypted repositories. + BORG_KEY_FILE + When set, use the given filename as repository key file. BORG_LOGGING_CONF When set, use the given filename as INI_-style logging configuration. BORG_RSH From 0bdc757bbf845cf4d057b083ada13bec4c07565f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 1 May 2016 22:10:29 +0200 Subject: [PATCH 10/28] add tests for BORG_KEY_FILE env var --- borg/testsuite/key.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/borg/testsuite/key.py b/borg/testsuite/key.py index 9e01103ad..11eb35061 100644 --- a/borg/testsuite/key.py +++ b/borg/testsuite/key.py @@ -7,7 +7,7 @@ from binascii import hexlify, unhexlify from ..crypto import bytes_to_long, num_aes_blocks from ..key import PlaintextKey, PassphraseKey, KeyfileKey from ..helpers import Location, Chunk, bin_to_hex -from . import BaseTestCase +from . import BaseTestCase, environment_variable class KeyTestCase(BaseTestCase): @@ -34,9 +34,11 @@ class KeyTestCase(BaseTestCase): def setUp(self): self.tmppath = tempfile.mkdtemp() os.environ['BORG_KEYS_DIR'] = self.tmppath + self.tmppath2 = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tmppath) + shutil.rmtree(self.tmppath2) class MockRepository: class _Location: @@ -71,6 +73,20 @@ class KeyTestCase(BaseTestCase): chunk = Chunk(b'foo') self.assert_equal(chunk, key2.decrypt(key.id_hash(chunk.data), key.encrypt(chunk))) + def test_keyfile_kfenv(self): + keyfile = os.path.join(self.tmppath2, 'keyfile') + with environment_variable(BORG_KEY_FILE=keyfile, BORG_PASSPHRASE='testkf'): + assert not os.path.exists(keyfile) + key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) + assert os.path.exists(keyfile) + chunk = Chunk(b'XXX') + chunk_id = key.id_hash(chunk.data) + chunk_cdata = key.encrypt(chunk) + key = KeyfileKey.detect(self.MockRepository(), chunk_cdata) + self.assert_equal(chunk, key.decrypt(chunk_id, chunk_cdata)) + os.unlink(keyfile) + self.assert_raises(FileNotFoundError, KeyfileKey.detect, self.MockRepository(), chunk_cdata) + def test_keyfile2(self): with open(os.path.join(os.environ['BORG_KEYS_DIR'], 'keyfile'), 'w') as fd: fd.write(self.keyfile2_key_file) @@ -78,6 +94,14 @@ class KeyTestCase(BaseTestCase): key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata) self.assert_equal(key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data, b'payload') + def test_keyfile2_kfenv(self): + keyfile = os.path.join(self.tmppath2, 'keyfile') + with open(keyfile, 'w') as fd: + fd.write(self.keyfile2_key_file) + with environment_variable(BORG_KEY_FILE=keyfile, BORG_PASSPHRASE='passphrase'): + key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata) + self.assert_equal(key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data, b'payload') + def test_passphrase(self): os.environ['BORG_PASSPHRASE'] = 'test' key = PassphraseKey.create(self.MockRepository(), None) From e7ca74fbef19162e02bcfe8fb26b3587ccb1dae0 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 2 May 2016 00:38:16 +0200 Subject: [PATCH 11/28] move BORG_KEY_FILE closer to BORG_KEYS_DIR --- docs/usage.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index 9cbb4503d..13d02aa93 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -77,8 +77,6 @@ General: When set, use the value to answer the passphrase question for encrypted repositories. BORG_DISPLAY_PASSPHRASE When set, use the value to answer the "display the passphrase for verification" question when defining a new passphrase for encrypted repositories. - BORG_KEY_FILE - When set, use the given filename as repository key file. BORG_LOGGING_CONF When set, use the given filename as INI_-style logging configuration. BORG_RSH @@ -103,9 +101,11 @@ Some automatic "answerers" (if set, they automatically answer confirmation quest answer or ask you interactively, depending on whether retries are allowed (they by default are allowed). So please test your scripts interactively before making them a non-interactive script. -Directories: +Directories and files: BORG_KEYS_DIR Default to '~/.config/borg/keys'. This directory contains keys for encrypted repositories. + BORG_KEY_FILE + When set, use the given filename as repository key file. BORG_CACHE_DIR Default to '~/.cache/borg'. This directory contains the local cache and might need a lot of space for dealing with big repositories). From b743fd09ab0c56484c1400d1bbdfc08a4836e157 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 2 May 2016 01:12:15 +0200 Subject: [PATCH 12/28] borg prune: ignore checkpoints, fixes #997 also: - add a test for this - add some words to borg create help about the archive name --- borg/archiver.py | 8 ++++++++ borg/testsuite/archiver.py | 9 ++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/borg/archiver.py b/borg/archiver.py index 1b2d6875f..995278b75 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -7,6 +7,7 @@ import functools import inspect import io import os +import re import shlex import signal import stat @@ -574,6 +575,10 @@ class Archiver: archives = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list if args.prefix: archives = [archive for archive in archives if archive.name.startswith(args.prefix)] + # ignore all checkpoint archives to avoid keeping one (which is an incomplete backup) + # that is newer than a successfully completed backup - and killing the successful backup. + is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search + archives = [archive for archive in archives if not is_checkpoint(archive.name)] keep = [] if args.within: keep += prune_within(archives, args.within) @@ -988,6 +993,9 @@ class Archiver: traversing all paths specified. The archive will consume almost no disk space for files or parts of files that have already been stored in other archives. + The archive name needs to be unique. It must not end in '.checkpoint' or + '.checkpoint.N' (with N being a number), because these names are used for + checkpoints and treated in special ways. To speed up pulling backups over sshfs and similar network file systems which do not provide correct inode information the --ignore-inode flag can be used. This diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 7ee13a77f..6a89213e1 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -863,15 +863,22 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('init', self.repository_location) self.cmd('create', self.repository_location + '::test1', src_dir) self.cmd('create', self.repository_location + '::test2', src_dir) + # these are not really a checkpoints, but they look like some: + self.cmd('create', self.repository_location + '::test3.checkpoint', src_dir) + self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir) output = self.cmd('prune', '-v', '--list', '--dry-run', self.repository_location, '--keep-daily=2') - self.assert_in('Keeping archive: test2', output) self.assert_in('Would prune: test1', output) + # must keep the latest non-checkpoint archive: + self.assert_in('Keeping archive: test2', output) output = self.cmd('list', self.repository_location) self.assert_in('test1', output) self.assert_in('test2', output) + self.assert_in('test3.checkpoint', output) + self.assert_in('test3.checkpoint.1', output) self.cmd('prune', self.repository_location, '--keep-daily=2') output = self.cmd('list', self.repository_location) self.assert_not_in('test1', output) + # the latest non-checkpoint archive must be still there: self.assert_in('test2', output) def test_prune_repository_save_space(self): From 962c2e9d5458618d8cf3018575d81ac72608d45f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 28 Apr 2016 01:28:43 +0200 Subject: [PATCH 13/28] borg with-lock REPO CMD ARGS --- borg/archiver.py | 42 ++++++++++++++++++++++++++++++++++++ borg/testsuite/archiver.py | 6 ++++++ docs/usage.rst | 3 +++ docs/usage/with-lock.rst.inc | 32 +++++++++++++++++++++++++++ 4 files changed, 83 insertions(+) create mode 100644 docs/usage/with-lock.rst.inc diff --git a/borg/archiver.py b/borg/archiver.py index 5729f51f5..949d8fbf7 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -12,6 +12,7 @@ import os import shlex import signal import stat +import subprocess import sys import textwrap import traceback @@ -895,6 +896,21 @@ class Archiver: cache.commit() return self.exit_code + @with_repository(manifest=False) + def do_with_lock(self, args, repository): + """run a user specified command with the repository lock held""" + # re-write manifest to start a repository transaction - this causes a + # lock upgrade to exclusive for remote (and also for local) repositories. + # by using manifest=False in the decorator, we avoid having to require + # the encryption key (and can operate just with encrypted data). + data = repository.get(Manifest.MANIFEST_ID) + repository.put(Manifest.MANIFEST_ID, data) + try: + # we exit with the return code we get from the subprocess + return subprocess.call([args.command] + args.args) + finally: + repository.rollback() + @with_repository() def do_debug_dump_archive_items(self, args, repository, manifest, key): """dump (decrypted, decompressed) archive items metadata (not: data)""" @@ -1831,6 +1847,32 @@ class Archiver: subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, help='paths to recreate; patterns are supported') + with_lock_epilog = textwrap.dedent(""" + This command runs a user-specified command while the repository lock is held. + + It will first try to acquire the lock (make sure that no other operation is + running in the repo), then execute the given command as a subprocess and wait + for its termination, release the lock and return the user command's return + code as borg's return code. + + Note: if you copy a repository with the lock held, the lock will be present in + the copy, obviously. Thus, before using borg on the copy, you need to + use "borg break-lock" on it. + """) + subparser = subparsers.add_parser('with-lock', parents=[common_parser], add_help=False, + description=self.do_with_lock.__doc__, + epilog=with_lock_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter, + help='run user command with lock held') + subparser.set_defaults(func=self.do_with_lock) + subparser.add_argument('location', metavar='REPOSITORY', + type=location_validator(archive=False), + help='repository to lock') + subparser.add_argument('command', metavar='COMMAND', + help='command to run') + subparser.add_argument('args', metavar='ARGS', nargs=argparse.REMAINDER, + help='command arguments') + subparser = subparsers.add_parser('help', parents=[common_parser], add_help=False, description='Extra help') subparser.add_argument('--epilog-only', dest='epilog_only', diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index bbefea3f2..73632ee95 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -1399,6 +1399,12 @@ class ArchiverTestCase(ArchiverTestCaseBase): info_after = self.cmd('info', self.repository_location + '::test') assert info_before == info_after # includes archive ID + def test_with_lock(self): + self.cmd('init', self.repository_location) + lock_path = os.path.join(self.repository_path, 'lock.exclusive') + cmd = 'python3', '-c', 'import os, sys; sys.exit(42 if os.path.exists("%s") else 23)' % lock_path + self.cmd('with-lock', self.repository_location, *cmd, fork=True, exit_code=42) + @unittest.skipUnless('binary' in BORG_EXES, 'no borg.exe available') class ArchiverTestCaseBinary(ArchiverTestCase): diff --git a/docs/usage.rst b/docs/usage.rst index 13d02aa93..d1cb8a934 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -647,6 +647,9 @@ Examples ... +.. include:: usage/with-lock.rst.inc + + .. include:: usage/break-lock.rst.inc diff --git a/docs/usage/with-lock.rst.inc b/docs/usage/with-lock.rst.inc new file mode 100644 index 000000000..3037ee809 --- /dev/null +++ b/docs/usage/with-lock.rst.inc @@ -0,0 +1,32 @@ +.. _borg_with-lock: + +borg with-lock +-------------- +:: + + borg with-lock REPOSITORY COMMAND ARGS + +positional arguments + REPOSITORY + repository to lock + COMMAND + command to run + ARGS + command arguments + +`Common options`_ + | + +Description +~~~~~~~~~~~ + +This command runs a user-specified command while the repository lock is held. + +It will first try to acquire the lock (make sure that no other operation is +running in the repo), then execute the given command as a subprocess and wait +for its termination, release the lock and return the user command's return +code as borg's return code. + +Note: if you copy a repository with the lock held, the lock will be present in + the copy, obviously. Thus, before using borg on the copy, you need to + use "borg break-lock" on it. From 75b3e786ed1b28662095e8b6a96fd9b198532f6f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 2 May 2016 21:50:59 +0200 Subject: [PATCH 14/28] implement compression heuristics based on lz4-compressibility, fixes #1006 also: add some tests that invoke all supported compression algorithms --- borg/archiver.py | 4 +++ borg/helpers.py | 27 +++++++++++++++++- borg/testsuite/archiver.py | 58 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 1 deletion(-) diff --git a/borg/archiver.py b/borg/archiver.py index 949d8fbf7..e99f8e8d6 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -1359,6 +1359,8 @@ class Archiver: type=CompressionSpec, default=dict(name='none'), metavar='COMPRESSION', help='select compression algorithm (and level):\n' 'none == no compression (default),\n' + 'auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L]\n' + ' being any valid compression algorithm (and optional level),\n' 'lz4 == lz4,\n' 'zlib == zlib (default level 6),\n' 'zlib,0 .. zlib,9 == zlib (with level 0..9),\n' @@ -1828,6 +1830,8 @@ class Archiver: type=CompressionSpec, default=None, metavar='COMPRESSION', help='select compression algorithm (and level):\n' 'none == no compression (default),\n' + 'auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L]\n' + ' being any valid compression algorithm (and optional level),\n' 'lz4 == lz4,\n' 'zlib == zlib (default level 6),\n' 'zlib,0 .. zlib,9 == zlib (with level 0..9),\n' diff --git a/borg/helpers.py b/borg/helpers.py index 395e078e0..999da93c5 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -31,7 +31,7 @@ from . import hashindex from . import chunker from .constants import * # NOQA from . import crypto -from .compress import COMPR_BUFFER +from .compress import COMPR_BUFFER, get_compressor from . import shellpattern import msgpack import msgpack.fallback @@ -530,6 +530,12 @@ def CompressionSpec(s): else: raise ValueError return dict(name=name, level=level) + if name == 'auto': + if 2 <= count <= 3: + compression = ','.join(values[1:]) + else: + raise ValueError + return dict(name=name, spec=CompressionSpec(compression)) raise ValueError @@ -1497,4 +1503,23 @@ class CompressionDecider2: compr_spec = chunk.meta.get('compress', self.compression) compr_args = dict(buffer=COMPR_BUFFER) compr_args.update(compr_spec) + if compr_args['name'] == 'auto': + # we did not decide yet, use heuristic: + compr_args, chunk = self.heuristic_lz4(compr_args, chunk) return compr_args, chunk + + def heuristic_lz4(self, compr_args, chunk): + meta, data = chunk + lz4 = get_compressor('lz4', buffer=compr_args['buffer']) + cdata = lz4.compress(data) + data_len = len(data) + cdata_len = len(cdata) + if cdata_len < data_len: + compr_spec = compr_args['spec'] + else: + # uncompressible - we could have a special "uncompressible compressor" + # that marks such data as uncompressible via compression-type metadata. + compr_spec = CompressionSpec('none') + compr_args.update(compr_spec) + logger.debug("len(data) == %d, len(lz4(data)) == %d, choosing %s", data_len, cdata_len, compr_spec) + return compr_args, Chunk(data, **meta) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 73632ee95..f5d0d0305 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -1089,6 +1089,64 @@ class ArchiverTestCase(ArchiverTestCaseBase): size, csize, path = output.split("\n")[1].split(" ") assert int(csize) < int(size) + def _get_sizes(self, compression, compressible, size=10000): + if compressible: + contents = b'X' * size + else: + contents = os.urandom(size) + self.create_regular_file('file', contents=contents) + self.cmd('init', '--encryption=none', self.repository_location) + archive = self.repository_location + '::test' + self.cmd('create', '-C', compression, archive, 'input') + output = self.cmd('list', '--format', '{size} {csize} {path}{NL}', archive) + size, csize, path = output.split("\n")[1].split(" ") + return int(size), int(csize) + + def test_compression_none_compressible(self): + size, csize = self._get_sizes('none', compressible=True) + assert csize >= size + assert csize == size + 3 + + def test_compression_none_uncompressible(self): + size, csize = self._get_sizes('none', compressible=False) + assert csize >= size + assert csize == size + 3 + + def test_compression_zlib_compressible(self): + size, csize = self._get_sizes('zlib', compressible=True) + assert csize < size * 0.1 + assert csize == 35 + + def test_compression_zlib_uncompressible(self): + size, csize = self._get_sizes('zlib', compressible=False) + assert csize >= size + + def test_compression_auto_compressible(self): + size, csize = self._get_sizes('auto,zlib', compressible=True) + assert csize < size * 0.1 + assert csize == 35 # same as compression 'zlib' + + def test_compression_auto_uncompressible(self): + size, csize = self._get_sizes('auto,zlib', compressible=False) + assert csize >= size + assert csize == size + 3 # same as compression 'none' + + def test_compression_lz4_compressible(self): + size, csize = self._get_sizes('lz4', compressible=True) + assert csize < size * 0.1 + + def test_compression_lz4_uncompressible(self): + size, csize = self._get_sizes('lz4', compressible=False) + assert csize >= size + + def test_compression_lzma_compressible(self): + size, csize = self._get_sizes('lzma', compressible=True) + assert csize < size * 0.1 + + def test_compression_lzma_uncompressible(self): + size, csize = self._get_sizes('lzma', compressible=False) + assert csize >= size + def test_break_lock(self): self.cmd('init', self.repository_location) self.cmd('break-lock', self.repository_location) From f44b4bb9b7964ac748fa4ddc14a2eec57c856e77 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 3 May 2016 00:11:25 +0200 Subject: [PATCH 15/28] travis / OSX: add xz package for python lzma support --- .travis/install.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis/install.sh b/.travis/install.sh index 73e292ddd..6ae408992 100755 --- a/.travis/install.sh +++ b/.travis/install.sh @@ -15,6 +15,7 @@ if [[ "$(uname -s)" == 'Darwin' ]]; then fi brew install lz4 + brew install xz # required for python lzma module brew outdated pyenv || brew upgrade pyenv case "${TOXENV}" in From 9e09786b33babdb358d254a55dae176f20d48c51 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 5 May 2016 13:38:08 +0200 Subject: [PATCH 16/28] borg create help: document format tags, fixes #894 docs: rephrased, more useful examples --- borg/archiver.py | 3 +++ docs/usage.rst | 7 +++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 995278b75..ae2fd4645 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -997,6 +997,9 @@ class Archiver: '.checkpoint.N' (with N being a number), because these names are used for checkpoints and treated in special ways. + In the archive name, you may use the following format tags: + {now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid} + To speed up pulling backups over sshfs and similar network file systems which do not provide correct inode information the --ignore-inode flag can be used. This potentially decreases reliability of change detection, while avoiding always reading diff --git a/docs/usage.rst b/docs/usage.rst index e14f5dfff..258f9a1aa 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -301,10 +301,9 @@ Examples # Even slower, even higher compression (N = 0..9) $ borg create --compression lzma,N /path/to/repo::arch ~ - # Format tags available for archive name: - # {now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid} - # add short hostname, backup username and current unixtime (seconds from epoch) - $ borg create /path/to/repo::{hostname}-{user}-{now:%s} ~ + # Use short hostname, user name and current time in archive name + $ borg create /path/to/repo::{hostname}-{user}-{now} ~ + $ borg create /path/to/repo::{hostname}-{user}-{now:%Y-%m-%d_%H:%M:%S} ~ .. include:: usage/extract.rst.inc From 7457c0f5da5bf5696500c7d7bbabef2fd6e37660 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 30 Apr 2016 00:45:34 +0200 Subject: [PATCH 17/28] update CHANGES in 1.0-maint --- docs/changes.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/changes.rst b/docs/changes.rst index 7d32c2eff..7adabb903 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -1,6 +1,24 @@ Changelog ========= +Version 1.0.3 (not released yet) +-------------------------------- + +Bug fixes: + +- prune: ignore checkpoints, #997 +- prune: fix bad validator, #942 +- fix capabilities extraction on Linux (set xattrs last, after chown()) + +Other changes: + +- update readthedocs URLs, #991 +- add missing docs for "borg break-lock", #992 +- borg create help: add some words to about the archive name +- borg create help: document format tags, #894 +- Vagrantfile: OS X: update osxfuse / install lzma package, #933 + + Version 1.0.2 ------------- From dabf8163644e86c3401b1e8183281ce2173ee3cf Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 3 May 2016 23:06:26 +0200 Subject: [PATCH 18/28] prune: cleanup checkpoints kill all the checkpoints when pruning, except the latest one which might be useful for continuing an interrupted backup. kill the latest checkpoint also if it is already superceded by a successful backup. note: this only works on checkpoints matched by --prefix, other checkpoints are not touched. --- borg/archiver.py | 30 +++++++++++++++++++++++------- borg/testsuite/archiver.py | 16 ++++++++++++++++ 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 5f82e200e..42c6e4212 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -783,13 +783,21 @@ class Archiver: '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", ' '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.') return self.exit_code - archives = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list + archives_checkpoints = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list if args.prefix: - archives = [archive for archive in archives if archive.name.startswith(args.prefix)] + archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)] + is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search + checkpoints = [arch for arch in archives_checkpoints if is_checkpoint(arch.name)] + # keep the latest checkpoint, if there is no later non-checkpoint archive + latest_checkpoint = checkpoints[0] if checkpoints else None + if archives_checkpoints[0] is latest_checkpoint: + keep_checkpoints = [latest_checkpoint, ] + else: + keep_checkpoints = [] + checkpoints = set(checkpoints) # ignore all checkpoint archives to avoid keeping one (which is an incomplete backup) # that is newer than a successfully completed backup - and killing the successful backup. - is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search - archives = [archive for archive in archives if not is_checkpoint(archive.name)] + archives = [arch for arch in archives_checkpoints if arch not in checkpoints] keep = [] if args.within: keep += prune_within(archives, args.within) @@ -807,11 +815,10 @@ class Archiver: keep += prune_split(archives, '%Y-%m', args.monthly, keep) if args.yearly: keep += prune_split(archives, '%Y', args.yearly, keep) - - to_delete = set(archives) - set(keep) + to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints)) stats = Statistics() with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache: - for archive in archives: + for archive in archives_checkpoints: if archive in to_delete: if args.dry_run: if args.output_list: @@ -1628,11 +1635,20 @@ class Archiver: any of the specified retention options. This command is normally used by automated backup scripts wanting to keep a certain number of historic backups. + Also, prune automatically removes checkpoint archives (incomplete archives left + behind by interrupted backup runs) except if the checkpoint is the latest + archive (and thus still needed). Checkpoint archives are not considered when + comparing archive counts against the retention limits (--keep-*). + If a prefix is set with -P, then only archives that start with the prefix are considered for deletion and only those archives count towards the totals specified by the rules. Otherwise, *all* archives in the repository are candidates for deletion! + If you have multiple sequences of archives with different data sets (e.g. + from different machines) in one shared repository, use one prune call per + data set that matches only the respective archives using the -P option. + The "--keep-within" option takes an argument of the form "", where char is "H", "d", "w", "m", "y". For example, "--keep-within 2d" means to keep all archives that were created within the past 48 hours. diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 87d2c71d5..1b89515c5 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -991,20 +991,36 @@ class ArchiverTestCase(ArchiverTestCaseBase): # these are not really a checkpoints, but they look like some: self.cmd('create', self.repository_location + '::test3.checkpoint', src_dir) self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir) + self.cmd('create', self.repository_location + '::test4.checkpoint', src_dir) output = self.cmd('prune', '-v', '--list', '--dry-run', self.repository_location, '--keep-daily=2') self.assert_in('Would prune: test1', output) # must keep the latest non-checkpoint archive: self.assert_in('Keeping archive: test2', output) + # must keep the latest checkpoint archive: + self.assert_in('Keeping archive: test4.checkpoint', output) output = self.cmd('list', self.repository_location) self.assert_in('test1', output) self.assert_in('test2', output) self.assert_in('test3.checkpoint', output) self.assert_in('test3.checkpoint.1', output) + self.assert_in('test4.checkpoint', output) self.cmd('prune', self.repository_location, '--keep-daily=2') output = self.cmd('list', self.repository_location) self.assert_not_in('test1', output) # the latest non-checkpoint archive must be still there: self.assert_in('test2', output) + # only the latest checkpoint archive must still be there: + self.assert_not_in('test3.checkpoint', output) + self.assert_not_in('test3.checkpoint.1', output) + self.assert_in('test4.checkpoint', output) + # now we supercede the latest checkpoint by a successful backup: + self.cmd('create', self.repository_location + '::test5', src_dir) + self.cmd('prune', self.repository_location, '--keep-daily=2') + output = self.cmd('list', self.repository_location) + # all checkpoints should be gone now: + self.assert_not_in('checkpoint', output) + # the latest archive must be still there + self.assert_in('test5', output) def test_prune_repository_save_space(self): self.cmd('init', self.repository_location) From 8304b8a59189f808fb47f20e4384c5db3d7bc9d7 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 5 May 2016 17:52:31 +0200 Subject: [PATCH 19/28] FAQ: add entries about compromised machines and other troubles also: fix append-only mode description - "multiple machines" and "central server" are over-specific and not required for this scenario. --- docs/faq.rst | 44 ++++++++++++++++++++++++++++++++++++++++++++ docs/usage.rst | 11 +++++++---- 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index 0051a48c0..6e7cd73a2 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -133,6 +133,50 @@ into the repository. Yes, as an attacker with access to the remote server could delete (or otherwise make unavailable) all your backups. +How can I protect against a hacked backup client? +------------------------------------------------- + +Assume you backup your backup client machine C to the backup server S and +C gets hacked. In a simple push setup, the attacker could then use borg on +C to delete all backups residing on S. + +These are your options to protect against that: + +- Do not allow to permanently delete data from the repo, see :ref:`append-only-mode`. +- Use a pull-mode setup using ``ssh -R``, see :issue:`900`. +- Mount C's filesystem on another machine and then create a backup of it. +- Do not give C filesystem-level access to S. + +How can I protect against a hacked backup server? +------------------------------------------------- + +Just in case you got the impression that pull-mode backups are way more safe +than push-mode, you also need to consider the case that your backup server S +gets hacked. In case S has access to a lot of clients C, that might bring you +into even bigger trouble than a hacked backup client in the previous FAQ entry. + +These are your options to protect against that: + +- Use the standard push-mode setup (see also previous FAQ entry). +- Mount (the repo part of) S's filesystem on C. +- Do not give S file-system level access to C. +- Have your backup server at a well protected place (maybe not reachable from + the internet), configure it safely, apply security updates, monitor it, ... + +How can I protect against theft, sabotage, lightning, fire, ...? +---------------------------------------------------------------- + +In general: if your only backup medium is nearby the backupped machine and +always connected, you can easily get into trouble: they likely share the same +fate if something goes really wrong. + +Thus: + +- have multiple backup media +- have media disconnected from network, power, computer +- have media at another place +- have a relatively recent backup on your media + Why do I get "connection closed by remote" after a while? --------------------------------------------------------- diff --git a/docs/usage.rst b/docs/usage.rst index 34048ee81..f4b0b52ae 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -821,13 +821,16 @@ Now, let's see how to restore some LVs from such a backup. :: $ borg extract --stdout /path/to/repo::arch dev/vg0/home-snapshot > /dev/vg0/home +.. _append-only-mode: + Append-only mode ~~~~~~~~~~~~~~~~ A repository can be made "append-only", which means that Borg will never overwrite or -delete committed data. This is useful for scenarios where multiple machines back up to -a central backup server using ``borg serve``, since a hacked machine cannot delete -backups permanently. +delete committed data (append-only refers to the segment files, but borg will also +reject to delete the repository completely). This is useful for scenarios where a +backup client machine backups remotely to a backup server using ``borg serve``, since +a hacked client machine cannot delete backups on the server permanently. To activate append-only mode, edit the repository ``config`` file and add a line ``append_only=1`` to the ``[repository]`` section (or edit the line if it exists). @@ -888,6 +891,6 @@ repository. Make sure that backup client machines only get to access the reposit Ensure that no remote access is possible if the repository is temporarily set to normal mode for e.g. regular pruning. -Further protections can be implemented, but are outside of Borgs scope. For example, +Further protections can be implemented, but are outside of Borg's scope. For example, file system snapshots or wrapping ``borg serve`` to set special permissions or ACLs on new data files. From 619654b8023a51fb6d03ed3dd93a42facf365614 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 5 May 2016 18:16:32 +0200 Subject: [PATCH 20/28] faq: fix issue links --- docs/faq.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index 0051a48c0..d79fbb54d 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -140,8 +140,7 @@ When doing a backup to a remote server (using a ssh: repo URL), it sometimes stops after a while (some minutes, hours, ... - not immediately) with "connection closed by remote" error message. Why? -That's a good question and we are trying to find a good answer in -`ticket 636 `_. +That's a good question and we are trying to find a good answer in :issue:`636`. The borg cache eats way too much disk space, what can I do? ----------------------------------------------------------- @@ -217,7 +216,7 @@ control which we do not have (and also can't get, even if we wanted). So, if you need that, consider RAID or a filesystem that offers redundant storage or just make backups to different locations / different hardware. -See also `ticket 225 `_. +See also :issue:`225`. Can |project_name| verify data integrity of a backup archive? ------------------------------------------------------------- From 4c52fcffcc626f5f3f5ce4d8bb61c54f13292f5b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 5 May 2016 18:59:03 +0200 Subject: [PATCH 21/28] faq: be more detailed about how checkpoints work, fixes #994 --- docs/faq.rst | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index d20eaa5b3..23dab5382 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -223,12 +223,25 @@ Yes, |project_name| supports resuming backups. During a backup a special checkpoint archive named ``.checkpoint`` is saved every checkpoint interval (the default value for this is 5 -minutes) containing all the data backed-up until that point. This means +minutes) containing all the data backed-up until that point. This checkpoint +archive is a valid archive, but it is only a partial backup. Having it +in the repo until a successful, full backup is completed is useful because it +references all the transmitted chunks up to the checkpoint time. This means that at most worth of data needs to be retransmitted -if a backup needs to be restarted. +if you restart the backup. + +If a backup was interrupted, you do not need to do any special considerations, +just invoke ``borg create`` as you always do. You may use the same archive name +as in previous attempt or a different one (e.g. if you always include the current +datetime), it does not matter. +|project_name| always does full single-pass backups, so it will start again +from the beginning - but it will be much faster, because some of the data was +already stored into the repo (and is still referenced by the checkpoint +archive), so it does not need to get transmitted and stored again. Once your backup has finished successfully, you can delete all -``.checkpoint`` archives. +``.checkpoint`` archives. If you run ``borg prune``, it will +also care for deleting unneeded checkpoints. If it crashes with a UnicodeError, what can I do? ------------------------------------------------- From 0fa663febe59bc3a8857300bdea6beb139a737df Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 6 May 2016 14:55:27 +0200 Subject: [PATCH 22/28] seek /opt/pkg (pkgsrc OS X) for openssl and lz4 Add /opt/pkg (used by pkgsrc on Mac OS X) to possible openssl and lz4 header path. from 2015Q2 onwards the prefix has changed to /opt/pkg in order to be compatible with El Capitan's "System Integrity Protection" feature. -- pkgsrc.joynet.com thanks to @weakish for finding this! --- setup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 7223a0eb5..741070cc5 100644 --- a/setup.py +++ b/setup.py @@ -106,7 +106,8 @@ def detect_lz4(prefixes): include_dirs = [] library_dirs = [] -possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl', '/usr/local/borg', '/opt/local'] +possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl', + '/usr/local/borg', '/opt/local', '/opt/pkg', ] if os.environ.get('BORG_OPENSSL_PREFIX'): possible_openssl_prefixes.insert(0, os.environ.get('BORG_OPENSSL_PREFIX')) ssl_prefix = detect_openssl(possible_openssl_prefixes) @@ -116,7 +117,8 @@ include_dirs.append(os.path.join(ssl_prefix, 'include')) library_dirs.append(os.path.join(ssl_prefix, 'lib')) -possible_lz4_prefixes = ['/usr', '/usr/local', '/usr/local/opt/lz4', '/usr/local/lz4', '/usr/local/borg', '/opt/local'] +possible_lz4_prefixes = ['/usr', '/usr/local', '/usr/local/opt/lz4', '/usr/local/lz4', + '/usr/local/borg', '/opt/local', '/opt/pkg', ] if os.environ.get('BORG_LZ4_PREFIX'): possible_lz4_prefixes.insert(0, os.environ.get('BORG_LZ4_PREFIX')) lz4_prefix = detect_lz4(possible_lz4_prefixes) From ee8f7539463803604e5d4121739bc2e757d48a15 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 May 2016 17:02:09 +0200 Subject: [PATCH 23/28] load_excludes: reuse existing clean_lines function --- borg/helpers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index 999da93c5..7d8905942 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -282,8 +282,7 @@ def load_excludes(fh): """Load and parse exclude patterns from file object. Lines empty or starting with '#' after stripping whitespace on both line ends are ignored. """ - patterns = (line for line in (i.strip() for i in fh) if not line.startswith('#')) - return [parse_pattern(pattern) for pattern in patterns if pattern] + return [parse_pattern(pattern) for pattern in clean_lines(fh)] def update_excludes(args): From 61a73f5f917d991ed7fa3ec4937527bb2a891d07 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 May 2016 18:18:40 +0200 Subject: [PATCH 24/28] _chunker.c: remove tab characters always use 4 spaces for indenting. --- borg/_chunker.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 0f9494e79..7f772ca4b 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -184,9 +184,9 @@ chunker_fill(Chunker *c) length = c->bytes_read - offset; #if ( ( _XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L ) && defined(POSIX_FADV_DONTNEED) ) - // Only do it once per run. - if (pagemask == 0) - pagemask = getpagesize() - 1; + // Only do it once per run. + if (pagemask == 0) + pagemask = getpagesize() - 1; // We tell the OS that we do not need the data that we just have read any // more (that it maybe has in the cache). This avoids that we spoil the @@ -207,7 +207,7 @@ chunker_fill(Chunker *c) // fadvise. This will cancel the final page and is not part // of the above workaround. overshoot = 0; - } + } posix_fadvise(c->fh, offset & ~pagemask, length - overshoot, POSIX_FADV_DONTNEED); #endif From d2988444c0682cfc5e05fd134ee247c42819402b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 May 2016 18:53:58 +0200 Subject: [PATCH 25/28] better help / docs for borg extract --dry-run, fixes #1022 --- borg/archiver.py | 4 ++++ docs/usage.rst | 3 +++ 2 files changed, 7 insertions(+) diff --git a/borg/archiver.py b/borg/archiver.py index 42c6e4212..42cb0ba08 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -1401,6 +1401,10 @@ class Archiver: be restricted by using the ``--exclude`` option. See the output of the "borg help patterns" command for more help on exclude patterns. + + By using ``--dry-run``, you can do all extraction steps except actually writing the + output data: reading metadata and data chunks from the repo, checking the hash/hmac, + decrypting, decompressing. """) subparser = subparsers.add_parser('extract', parents=[common_parser], add_help=False, description=self.do_extract.__doc__, diff --git a/docs/usage.rst b/docs/usage.rst index f4b0b52ae..600af4fea 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -327,6 +327,9 @@ Examples # Extract entire archive and list files while processing $ borg extract -v --list /path/to/repo::my-files + # Verify whether an archive could be successfully extracted, but do not write files to disk + $ borg extract --dry-run /path/to/repo::my-files + # Extract the "src" directory $ borg extract /path/to/repo::my-files home/USERNAME/src From 06caee2ed0f4816c22feea28d8e422bfd7fbec7b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 May 2016 19:22:48 +0200 Subject: [PATCH 26/28] improve backup example script, fixes #1020 --- docs/quickstart.rst | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 1d15f5d23..c8456b85d 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -105,23 +105,27 @@ server. The script also uses the :ref:`borg_prune` subcommand to maintain a certain number of old archives:: #!/bin/sh - REPOSITORY=username@remoteserver.com:backup - # Backup all of /home and /var/www except a few - # excluded directories - borg create -v --stats \ - $REPOSITORY::`hostname`-`date +%Y-%m-%d` \ - /home \ - /var/www \ - --exclude '/home/*/.cache' \ - --exclude /home/Ben/Music/Justin\ Bieber \ + # setting this, so the repo does not need to be given on the commandline: + export BORG_REPO=username@remoteserver.com:backup + + # setting this, so you won't be asked for your passphrase - make sure the + # script has appropriate owner/group and mode, e.g. root.root 600: + export BORG_PASSPHRASE=mysecret + + # Backup most important stuff: + borg create -v --stats -C lz4 ::`hostname`-`date +%Y-%m-%d` \ + /etc \ + /home \ + /var \ + --exclude '/home/*/.cache' \ --exclude '*.pyc' # Use the `prune` subcommand to maintain 7 daily, 4 weekly and 6 monthly - # archives of THIS machine. --prefix `hostname`- is very important to + # archives of THIS machine. Using --prefix is very important to # limit prune's operation to this machine's archives and not apply to # other machine's archives also. - borg prune -v $REPOSITORY --prefix `hostname`- \ + borg prune -v --prefix `hostname`- \ --keep-daily=7 --keep-weekly=4 --keep-monthly=6 .. backup_compression: From a44e131661525c9f17eab7db046129073defb61c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 May 2016 21:03:31 +0200 Subject: [PATCH 27/28] prune: fix IndexError if giving --prefix resulted in an empty archives_checkpoints list, it crashed with an IndexError. --- borg/archiver.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 42c6e4212..448187e04 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -789,9 +789,8 @@ class Archiver: is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search checkpoints = [arch for arch in archives_checkpoints if is_checkpoint(arch.name)] # keep the latest checkpoint, if there is no later non-checkpoint archive - latest_checkpoint = checkpoints[0] if checkpoints else None - if archives_checkpoints[0] is latest_checkpoint: - keep_checkpoints = [latest_checkpoint, ] + if archives_checkpoints and checkpoints and archives_checkpoints[0] is checkpoints[0]: + keep_checkpoints = checkpoints[:1] else: keep_checkpoints = [] checkpoints = set(checkpoints) From 595e7c1dcd0f5a399c83a428554338a1342febdd Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 9 May 2016 04:14:50 +0200 Subject: [PATCH 28/28] remove openssl RAND_bytes from crypto.pyx was forgotten there and not used any more since a while. also removed the hint about PyCrypto being an alternative to crypto.pyx, it was not updated since 2014, looks pretty stale. --- borg/crypto.pyx | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/borg/crypto.pyx b/borg/crypto.pyx index 16f1cda82..dd0fa14ac 100644 --- a/borg/crypto.pyx +++ b/borg/crypto.pyx @@ -1,17 +1,11 @@ -"""A thin OpenSSL wrapper +"""A thin OpenSSL wrapper""" -This could be replaced by PyCrypto maybe? -""" from libc.stdlib cimport malloc, free from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release API_VERSION = 3 -cdef extern from "openssl/rand.h": - int RAND_bytes(unsigned char *buf, int num) - - cdef extern from "openssl/evp.h": ctypedef struct EVP_MD: pass