From f7210c749f8ed6db1c46a37e1ca4b7f4bf418bf3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 23:34:58 +0200 Subject: [PATCH 001/151] remove cpu intensive compression methods for the chunks.archive also remove the comment about how good xz compresses - while that was true for smaller index files, it seems to be less effective with bigger ones. maybe just an issue with compression dict size. --- borg/cache.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/borg/cache.py b/borg/cache.py index 2391be275..207fb58a6 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -213,9 +213,6 @@ class Cache: so it has complete and current information about all backup archives. Finally, it builds the master chunks index by merging all indices from the tar. - - Note: compression (esp. xz) is very effective in keeping the tar - relatively small compared to the files it contains. """ in_archive_path = os.path.join(self.path, 'chunks.archive') out_archive_path = os.path.join(self.path, 'chunks.archive.tmp') @@ -234,8 +231,10 @@ class Cache: return tf def open_out_archive(): - for compression in ('xz', 'bz2', 'gz'): - # xz needs py 3.3, bz2 and gz also work on 3.2 + for compression in ('gz', ): + # 'xz' needs py 3.3 and is expensive on the cpu + # 'bz2' also works on 3.2 and is expensive on the cpu + # 'gz' also works on 3.2 and is less expensive on the cpu try: tf = tarfile.open(out_archive_path, 'w:'+compression, format=tarfile.PAX_FORMAT) break From 22dd925986f46615d4bb8b09830c493ae65ec896 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 30 Aug 2015 03:03:48 +0200 Subject: [PATCH 002/151] chunks index archive: remove all tar and compression related stuff and just use separate files in a directory the compression was quite cpu intensive and didn't work that great anyway. now the disk space usage is a bit higher, but it is much faster and less hard on the cpu. disk space needs grow linearly with the amount and size of the archives, this is a problem esp. if one has many and/or big archives (but this problem existed before also because compression was not as effective as I believed). the tar archive always needed a complete rebuild (and thus: decompression and recompression) because deleting outdated archive indexes was not possible in the tar file. now we just have a directory chunks.archive.d and keep archive index files there for all archives we already know. if an archive does not exist any more in the repo, we just delete its index file. if an archive is unknown still, we fetch the infos and build a new index file. when merging, we avoid growing the hash table from zero, but just start with the first archive's index as basis for merging. --- borg/cache.py | 185 ++++++++++++++++++++++---------------------------- 1 file changed, 82 insertions(+), 103 deletions(-) diff --git a/borg/cache.py b/borg/cache.py index 207fb58a6..65e64af5b 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -96,8 +96,7 @@ class Cache: with open(os.path.join(self.path, 'config'), 'w') as fd: config.write(fd) ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8')) - with open(os.path.join(self.path, 'chunks.archive'), 'wb') as fd: - pass # empty file + os.makedirs(os.path.join(self.path, 'chunks.archive.d')) with open(os.path.join(self.path, 'files'), 'wb') as fd: pass # empty file @@ -153,7 +152,6 @@ class Cache: os.mkdir(txn_dir) shutil.copy(os.path.join(self.path, 'config'), txn_dir) shutil.copy(os.path.join(self.path, 'chunks'), txn_dir) - shutil.copy(os.path.join(self.path, 'chunks.archive'), txn_dir) shutil.copy(os.path.join(self.path, 'files'), txn_dir) os.rename(os.path.join(self.path, 'txn.tmp'), os.path.join(self.path, 'txn.active')) @@ -195,7 +193,6 @@ class Cache: if os.path.exists(txn_dir): shutil.copy(os.path.join(txn_dir, 'config'), self.path) shutil.copy(os.path.join(txn_dir, 'chunks'), self.path) - shutil.copy(os.path.join(txn_dir, 'chunks.archive'), self.path) shutil.copy(os.path.join(txn_dir, 'files'), self.path) os.rename(txn_dir, os.path.join(self.path, 'txn.tmp')) if os.path.exists(os.path.join(self.path, 'txn.tmp')): @@ -206,53 +203,14 @@ class Cache: def sync(self): """Re-synchronize chunks cache with repository. - If present, uses a compressed tar archive of known backup archive - indices, so it only needs to fetch infos from repo and build a chunk - index once per backup archive. - If out of sync, the tar gets rebuilt from known + fetched chunk infos, - so it has complete and current information about all backup archives. - Finally, it builds the master chunks index by merging all indices from - the tar. + Maintains a directory with known backup archive indexes, so it only + needs to fetch infos from repo and build a chunk index once per backup + archive. + If out of sync, missing archive indexes get added, outdated indexes + get removed and a new master chunks index is built by merging all + archive indexes. """ - in_archive_path = os.path.join(self.path, 'chunks.archive') - out_archive_path = os.path.join(self.path, 'chunks.archive.tmp') - - def open_in_archive(): - try: - tf = tarfile.open(in_archive_path, 'r') - except OSError as e: - if e.errno != errno.ENOENT: - raise - # file not found - tf = None - except tarfile.ReadError: - # empty file? - tf = None - return tf - - def open_out_archive(): - for compression in ('gz', ): - # 'xz' needs py 3.3 and is expensive on the cpu - # 'bz2' also works on 3.2 and is expensive on the cpu - # 'gz' also works on 3.2 and is less expensive on the cpu - try: - tf = tarfile.open(out_archive_path, 'w:'+compression, format=tarfile.PAX_FORMAT) - break - except tarfile.CompressionError: - continue - else: # shouldn't happen - tf = None - return tf - - def close_archive(tf): - if tf: - tf.close() - - def delete_in_archive(): - os.unlink(in_archive_path) - - def rename_out_archive(): - os.rename(out_archive_path, in_archive_path) + archive_path = os.path.join(self.path, 'chunks.archive.d') def add(chunk_idx, id, size, csize, incr=1): try: @@ -261,16 +219,21 @@ class Cache: except KeyError: chunk_idx[id] = incr, size, csize - def transfer_known_idx(archive_id, tf_in, tf_out): - archive_id_hex = hexlify(archive_id).decode('ascii') - tarinfo = tf_in.getmember(archive_id_hex) - archive_name = tarinfo.pax_headers['archive_name'] - print('Already known archive:', archive_name) - f_in = tf_in.extractfile(archive_id_hex) - tf_out.addfile(tarinfo, f_in) - return archive_name + def mkpath(id, suffix=''): + path = os.path.join(archive_path, id + suffix) + return path.encode('utf-8') - def fetch_and_build_idx(archive_id, repository, key, tmp_dir, tf_out): + def list_archives(): + fns = os.listdir(archive_path) + # only return filenames that are 64 hex digits (256bit) + return [fn for fn in fns if len(fn) == 64] + + def cleanup_outdated(ids): + for id in ids: + id_hex = hexlify(id).decode('ascii') + os.unlink(mkpath(id_hex)) + + def fetch_and_build_idx(archive_id, repository, key): chunk_idx = ChunkIndex() cdata = repository.get(archive_id) data = key.decrypt(archive_id, cdata) @@ -293,55 +256,71 @@ class Cache: for chunk_id, size, csize in item[b'chunks']: add(chunk_idx, chunk_id, size, csize) archive_id_hex = hexlify(archive_id).decode('ascii') - file_tmp = os.path.join(tmp_dir, archive_id_hex).encode('utf-8') - chunk_idx.write(file_tmp) - tarinfo = tf_out.gettarinfo(file_tmp, archive_id_hex) - tarinfo.pax_headers['archive_name'] = archive[b'name'] - with open(file_tmp, 'rb') as f: - tf_out.addfile(tarinfo, f) - os.unlink(file_tmp) + fn = mkpath(archive_id_hex) + fn_tmp = mkpath(archive_id_hex, suffix='.tmp') + try: + chunk_idx.write(fn_tmp) + except Exception: + os.unlink(fn_tmp) + else: + os.rename(fn_tmp, fn) - def create_master_idx(chunk_idx, tf_in, tmp_dir): + def create_master_idx(chunk_idx): + # deallocates old hashindex, creates empty hashindex: chunk_idx.clear() - for tarinfo in tf_in: - archive_id_hex = tarinfo.name - archive_name = tarinfo.pax_headers['archive_name'] - print("- extracting archive %s ..." % archive_name) - tf_in.extract(archive_id_hex, tmp_dir) - chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8') - print("- reading archive ...") - archive_chunk_idx = ChunkIndex.read(chunk_idx_path) - print("- merging archive ...") - chunk_idx.merge(archive_chunk_idx) - os.unlink(chunk_idx_path) + archives = list_archives() + if archives: + chunk_idx = None + for fn in archives: + archive_id_hex = fn + archive_id = unhexlify(archive_id_hex) + for name, info in self.manifest.archives.items(): + if info[b'id'] == archive_id: + archive_name = name + break + archive_chunk_idx_path = mkpath(archive_id_hex) + print("- reading archive %s ..." % archive_name) + archive_chunk_idx = ChunkIndex.read(archive_chunk_idx_path) + print("- merging archive ...") + if chunk_idx is None: + # we just use the first archive's idx as starting point, + # to avoid growing the hash table from 0 size and also + # to save 1 merge call. + chunk_idx = archive_chunk_idx + else: + chunk_idx.merge(archive_chunk_idx) + return chunk_idx + + def legacy_support(): + try: + # get rid of the compressed tar file, if present + os.unlink(os.path.join(self.path, 'chunks.archive')) + except: + pass + try: + # create the directory for the archive index files we use now + os.mkdir(archive_path) + except: + pass + self.begin_txn() print('Synchronizing chunks cache...') - # XXX we have to do stuff on disk due to lacking ChunkIndex api - with tempfile.TemporaryDirectory(prefix='borg-tmp') as tmp_dir: - repository = cache_if_remote(self.repository) - out_archive = open_out_archive() - in_archive = open_in_archive() - if in_archive: - known_ids = set(unhexlify(hexid) for hexid in in_archive.getnames()) - else: - known_ids = set() - archive_ids = set(info[b'id'] for info in self.manifest.archives.values()) - print('Rebuilding archive collection. Known: %d Repo: %d Unknown: %d' % ( - len(known_ids), len(archive_ids), len(archive_ids - known_ids), )) - for archive_id in archive_ids & known_ids: - transfer_known_idx(archive_id, in_archive, out_archive) - close_archive(in_archive) - delete_in_archive() # free disk space - for archive_id in archive_ids - known_ids: - fetch_and_build_idx(archive_id, repository, self.key, tmp_dir, out_archive) - close_archive(out_archive) - rename_out_archive() - print('Merging collection into master chunks cache...') - in_archive = open_in_archive() - create_master_idx(self.chunks, in_archive, tmp_dir) - close_archive(in_archive) - print('Done.') + repository = cache_if_remote(self.repository) + legacy_support() + known_ids = set(unhexlify(hexid) for hexid in list_archives()) + archive_ids = set(info[b'id'] for info in self.manifest.archives.values()) + print('Rebuilding archive collection. Repo: %d Known: %d Outdated: %d Unknown: %d' % ( + len(archive_ids), len(known_ids), + len(known_ids - archive_ids), len(archive_ids - known_ids), )) + cleanup_outdated(known_ids - archive_ids) + for archive_id in archive_ids - known_ids: + fetch_and_build_idx(archive_id, repository, self.key) + known_ids = set(unhexlify(hexid) for hexid in list_archives()) + assert known_ids == archive_ids + print('Merging collection into master chunks cache...') + self.chunks = create_master_idx(self.chunks) + print('Done.') def add_chunk(self, id, data, stats): if not self.txn_active: From 54ccbc5ae26c0b11c804a635955f2fa5953462f5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 30 Aug 2015 15:15:15 +0200 Subject: [PATCH 003/151] chunks index resync: do all in one pass if we do not have a cached archive index: fetch and build and merge it if we have one: merge it --- borg/cache.py | 100 +++++++++++++++++++++++++------------------------- 1 file changed, 51 insertions(+), 49 deletions(-) diff --git a/borg/cache.py b/borg/cache.py index 65e64af5b..13f80f325 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -212,6 +212,23 @@ class Cache: """ archive_path = os.path.join(self.path, 'chunks.archive.d') + def mkpath(id, suffix=''): + id_hex = hexlify(id).decode('ascii') + path = os.path.join(archive_path, id_hex + suffix) + return path.encode('utf-8') + + def cached_archives(): + fns = os.listdir(archive_path) + # filenames with 64 hex digits == 256bit + return set(unhexlify(fn) for fn in fns if len(fn) == 64) + + def repo_archives(): + return set(info[b'id'] for info in self.manifest.archives.values()) + + def cleanup_outdated(ids): + for id in ids: + os.unlink(mkpath(id)) + def add(chunk_idx, id, size, csize, incr=1): try: count, size, csize = chunk_idx[id] @@ -219,20 +236,6 @@ class Cache: except KeyError: chunk_idx[id] = incr, size, csize - def mkpath(id, suffix=''): - path = os.path.join(archive_path, id + suffix) - return path.encode('utf-8') - - def list_archives(): - fns = os.listdir(archive_path) - # only return filenames that are 64 hex digits (256bit) - return [fn for fn in fns if len(fn) == 64] - - def cleanup_outdated(ids): - for id in ids: - id_hex = hexlify(id).decode('ascii') - os.unlink(mkpath(id_hex)) - def fetch_and_build_idx(archive_id, repository, key): chunk_idx = ChunkIndex() cdata = repository.get(archive_id) @@ -242,7 +245,6 @@ class Cache: if archive[b'version'] != 1: raise Exception('Unknown archive metadata version') decode_dict(archive, (b'name',)) - print('Analyzing new archive:', archive[b'name']) unpacker = msgpack.Unpacker() for item_id, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])): data = key.decrypt(item_id, chunk) @@ -255,33 +257,43 @@ class Cache: if b'chunks' in item: for chunk_id, size, csize in item[b'chunks']: add(chunk_idx, chunk_id, size, csize) - archive_id_hex = hexlify(archive_id).decode('ascii') - fn = mkpath(archive_id_hex) - fn_tmp = mkpath(archive_id_hex, suffix='.tmp') + fn = mkpath(archive_id) + fn_tmp = mkpath(archive_id, suffix='.tmp') try: chunk_idx.write(fn_tmp) except Exception: os.unlink(fn_tmp) else: os.rename(fn_tmp, fn) + return chunk_idx + + def lookup_name(archive_id): + for name, info in self.manifest.archives.items(): + if info[b'id'] == archive_id: + return name def create_master_idx(chunk_idx): + print('Synchronizing chunks cache...') + cached_ids = cached_archives() + archive_ids = repo_archives() + print('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.' % ( + len(archive_ids), len(cached_ids), + len(cached_ids - archive_ids), len(archive_ids - cached_ids), )) # deallocates old hashindex, creates empty hashindex: chunk_idx.clear() - archives = list_archives() - if archives: + cleanup_outdated(cached_ids - archive_ids) + if archive_ids: chunk_idx = None - for fn in archives: - archive_id_hex = fn - archive_id = unhexlify(archive_id_hex) - for name, info in self.manifest.archives.items(): - if info[b'id'] == archive_id: - archive_name = name - break - archive_chunk_idx_path = mkpath(archive_id_hex) - print("- reading archive %s ..." % archive_name) - archive_chunk_idx = ChunkIndex.read(archive_chunk_idx_path) - print("- merging archive ...") + for archive_id in archive_ids: + archive_name = lookup_name(archive_id) + if archive_id in cached_ids: + archive_chunk_idx_path = mkpath(archive_id) + print("Reading cached archive chunk index for %s ..." % archive_name) + archive_chunk_idx = ChunkIndex.read(archive_chunk_idx_path) + else: + print('Fetching and building archive index for %s ...' % archive_name) + archive_chunk_idx = fetch_and_build_idx(archive_id, repository, self.key) + print("Merging into master chunks index ...") if chunk_idx is None: # we just use the first archive's idx as starting point, # to avoid growing the hash table from 0 size and also @@ -289,38 +301,28 @@ class Cache: chunk_idx = archive_chunk_idx else: chunk_idx.merge(archive_chunk_idx) + print('Done.') return chunk_idx - def legacy_support(): + def legacy_cleanup(): + """bring old cache dirs into the desired state (cleanup and adapt)""" try: - # get rid of the compressed tar file, if present os.unlink(os.path.join(self.path, 'chunks.archive')) except: pass try: - # create the directory for the archive index files we use now + os.unlink(os.path.join(self.path, 'chunks.archive.tmp')) + except: + pass + try: os.mkdir(archive_path) except: pass - self.begin_txn() - print('Synchronizing chunks cache...') repository = cache_if_remote(self.repository) - legacy_support() - known_ids = set(unhexlify(hexid) for hexid in list_archives()) - archive_ids = set(info[b'id'] for info in self.manifest.archives.values()) - print('Rebuilding archive collection. Repo: %d Known: %d Outdated: %d Unknown: %d' % ( - len(archive_ids), len(known_ids), - len(known_ids - archive_ids), len(archive_ids - known_ids), )) - cleanup_outdated(known_ids - archive_ids) - for archive_id in archive_ids - known_ids: - fetch_and_build_idx(archive_id, repository, self.key) - known_ids = set(unhexlify(hexid) for hexid in list_archives()) - assert known_ids == archive_ids - print('Merging collection into master chunks cache...') + legacy_cleanup() self.chunks = create_master_idx(self.chunks) - print('Done.') def add_chunk(self, id, data, stats): if not self.txn_active: From d9fb1d2b03b58bccc1908c185b62346ee2677f79 Mon Sep 17 00:00:00 2001 From: Ed Blackman Date: Tue, 8 Sep 2015 23:33:34 -0400 Subject: [PATCH 004/151] Normalize paths before pattern matching on OS X The OS X file system HFS+ stores path names as Unicode, and converts them to a variant of Unicode NFD for storage. Because path names will always be in this canonical form, it's not friendly to require users to match this form exactly. Convert paths from the repository and patterns from the command line to NFD before comparing them. Unix (and Windows, I think) file systems don't convert path names into a canonical form, so users will continue to have to exactly match the path name they want, because there could be two paths with the same character visually that are actually composed of different byte sequences. --- borg/helpers.py | 43 +++++++++++++++--- borg/testsuite/helpers.py | 96 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 132 insertions(+), 7 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index aa5bead0b..ecf138125 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -7,6 +7,8 @@ import pwd import re import sys import time +import unicodedata + from datetime import datetime, timezone, timedelta from fnmatch import translate from operator import attrgetter @@ -220,6 +222,10 @@ def exclude_path(path, patterns): # unify the two cases, we add a path separator to the end of # the path before matching. +##### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +##### For discussion only, don't merge this code! +##### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + class IncludePattern: """Literal files or directories listed on the command line for some operations (e.g. extract, but not create). @@ -227,10 +233,22 @@ class IncludePattern: path match as well. A trailing slash makes no difference. """ def __init__(self, pattern): - self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep + def match(path): + return (path+os.path.sep).startswith(self.pattern) - def match(self, path): - return (path+os.path.sep).startswith(self.pattern) + # HFS+ converts paths to a canonical form, so users shouldn't be + # required to enter an exact match + if sys.platform in ('darwin',): + # repository paths will be mostly in NFD, as the OSX exception list + # to NFD is small, so normalize to that form for best performance + pattern = unicodedata.normalize("NFD", pattern) + self.match = lambda p: match(unicodedata.normalize("NFD", p)) + # Windows and Unix filesystems allow different forms, so users + # always have to enter an exact match + else: + self.match = match + + self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep def __repr__(self): return '%s(%s)' % (type(self), self.pattern) @@ -241,17 +259,30 @@ class ExcludePattern(IncludePattern): exclude the contents of a directory, but not the directory itself. """ def __init__(self, pattern): + def match(path): + return self.regex.match(path+os.path.sep) is not None + if pattern.endswith(os.path.sep): self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep else: self.pattern = os.path.normpath(pattern)+os.path.sep+'*' + + # HFS+ converts paths to a canonical form, so users shouldn't be + # required to enter an exact match + if sys.platform in ('darwin',): + # repository paths will be mostly in NFD, as the OSX exception list + # to NFD is small, so normalize to that form for best performance + self.pattern = unicodedata.normalize("NFD", self.pattern) + self.match = lambda p: match(unicodedata.normalize("NFD", p)) + # Windows and Unix filesystems allow different forms, so users + # always have to enter an exact match + else: + self.match = match + # fnmatch and re.match both cache compiled regular expressions. # Nevertheless, this is about 10 times faster. self.regex = re.compile(translate(self.pattern)) - def match(self, path): - return self.regex.match(path+os.path.sep) is not None - def __repr__(self): return '%s(%s)' % (type(self), self.pattern) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 95531df83..002033f57 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -3,9 +3,10 @@ from time import mktime, strptime from datetime import datetime, timezone, timedelta import pytest +import sys import msgpack -from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \ +from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, \ prune_within, prune_split, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams from . import BaseTestCase @@ -178,6 +179,99 @@ class PatternTestCase(BaseTestCase): ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']) +@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='all but OS X test') +class IncludePatternNonAsciiTestCase(BaseTestCase): + def testComposedUnicode(self): + pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' + i = IncludePattern(pattern) + + assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert not i.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testDecomposedUnicode(self): + pattern = 'ba\N{COMBINING ACUTE ACCENT}' + i = IncludePattern(pattern) + + assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testInvalidUnicode(self): + pattern = str(b'ba\x80', 'latin1') + i = IncludePattern(pattern) + + assert not i.match("ba/foo") + assert i.match(str(b"ba\x80/foo", 'latin1')) + + +@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='all but OS X test') +class ExcludePatternNonAsciiTestCase(BaseTestCase): + def testComposedUnicode(self): + pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' + e = ExcludePattern(pattern) + + assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert not e.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testDecomposedUnicode(self): + pattern = 'ba\N{COMBINING ACUTE ACCENT}' + e = ExcludePattern(pattern) + + assert not e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testInvalidUnicode(self): + pattern = str(b'ba\x80', 'latin1') + e = ExcludePattern(pattern) + + assert not e.match("ba/foo") + assert e.match(str(b"ba\x80/foo", 'latin1')) + +#@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='OS X only test') +class OSXPatternNormalizationTestCase(BaseTestCase): + # monkey patch sys.platform to allow testing on non-OSX during development + # remove and uncomment OSX-only decorator before push + def setUp(self): + self.oldplatform = sys.platform + sys.platform = 'darwin' + pass + + # monkey patch sys.platform to allow testing on non-OSX during development + # remove and uncomment OSX-only decorator before push + def tearDown(self): + sys.platform = self.oldplatform + pass + + def testComposedUnicode(self): + pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' + i = IncludePattern(pattern) + e = ExcludePattern(pattern) + + assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") + assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testDecomposedUnicode(self): + pattern = 'ba\N{COMBINING ACUTE ACCENT}' + i = IncludePattern(pattern) + e = ExcludePattern(pattern) + + assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") + assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testInvalidUnicode(self): + pattern = str(b'ba\x80', 'latin1') + i = IncludePattern(pattern) + e = ExcludePattern(pattern) + + assert not i.match("ba/foo") + assert i.match(str(b"ba\x80/foo", 'latin1')) + assert not e.match("ba/foo") + assert e.match(str(b"ba\x80/foo", 'latin1')) + + def test_compression_specs(): with pytest.raises(ValueError): CompressionSpec('') From d510ff7c63a4ad64f2c6a84e2af74092366136fa Mon Sep 17 00:00:00 2001 From: Ed Blackman Date: Wed, 9 Sep 2015 13:41:34 -0400 Subject: [PATCH 005/151] Merge non-ascii Include and ExcludePattern tests to parallel the OSX non-ascii tests --- borg/testsuite/helpers.py | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 002033f57..360695ba8 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -180,52 +180,38 @@ class PatternTestCase(BaseTestCase): @pytest.mark.skipif(sys.platform.startswith('darwin'), reason='all but OS X test') -class IncludePatternNonAsciiTestCase(BaseTestCase): +class PatternNonAsciiTestCase(BaseTestCase): def testComposedUnicode(self): pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' i = IncludePattern(pattern) + e = ExcludePattern(pattern) assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") assert not i.match("ba\N{COMBINING ACUTE ACCENT}/foo") - - def testDecomposedUnicode(self): - pattern = 'ba\N{COMBINING ACUTE ACCENT}' - i = IncludePattern(pattern) - - assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") - - def testInvalidUnicode(self): - pattern = str(b'ba\x80', 'latin1') - i = IncludePattern(pattern) - - assert not i.match("ba/foo") - assert i.match(str(b"ba\x80/foo", 'latin1')) - - -@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='all but OS X test') -class ExcludePatternNonAsciiTestCase(BaseTestCase): - def testComposedUnicode(self): - pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' - e = ExcludePattern(pattern) - assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") assert not e.match("ba\N{COMBINING ACUTE ACCENT}/foo") def testDecomposedUnicode(self): pattern = 'ba\N{COMBINING ACUTE ACCENT}' + i = IncludePattern(pattern) e = ExcludePattern(pattern) + assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") assert not e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") def testInvalidUnicode(self): pattern = str(b'ba\x80', 'latin1') + i = IncludePattern(pattern) e = ExcludePattern(pattern) + assert not i.match("ba/foo") + assert i.match(str(b"ba\x80/foo", 'latin1')) assert not e.match("ba/foo") assert e.match(str(b"ba\x80/foo", 'latin1')) + #@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='OS X only test') class OSXPatternNormalizationTestCase(BaseTestCase): # monkey patch sys.platform to allow testing on non-OSX during development From cc13f3db979300ab1ebc982106e1ad8074133bb7 Mon Sep 17 00:00:00 2001 From: Ed Blackman Date: Wed, 9 Sep 2015 13:48:46 -0400 Subject: [PATCH 006/151] Express non-ascii pattern platform skips better including correcting thinko in the commented-out OSX-only test --- borg/testsuite/helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 360695ba8..077c171b2 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -179,7 +179,7 @@ class PatternTestCase(BaseTestCase): ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']) -@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='all but OS X test') +@pytest.mark.skipif(sys.platform in ('darwin',), reason='all but OS X test') class PatternNonAsciiTestCase(BaseTestCase): def testComposedUnicode(self): pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' @@ -212,7 +212,7 @@ class PatternNonAsciiTestCase(BaseTestCase): assert e.match(str(b"ba\x80/foo", 'latin1')) -#@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='OS X only test') +#@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test') class OSXPatternNormalizationTestCase(BaseTestCase): # monkey patch sys.platform to allow testing on non-OSX during development # remove and uncomment OSX-only decorator before push From 13ddfdf4a3b64b109dde3a7ba5333a32e14be758 Mon Sep 17 00:00:00 2001 From: Ed Blackman Date: Wed, 9 Sep 2015 15:00:58 -0400 Subject: [PATCH 007/151] Move pattern normalization decision into decorator Using a decorator moves the duplicate code in the init methods into a single decorator method, while still retaining the same runtime overhead (zero for for the non-OSX path, one extra function call plus the call to unicodedata.normalize for OSX). The pattern classes are much visually cleaner, and duplicate code limited to two lines normalizing the pattern on OSX. Because the decoration happens at class init time (vs instance init time for the previous approach), the OSX and non-OSX test cases can no longer be called in the same run, so I also removed the OSX test case monkey patching and uncommented the platform skipif decorator. --- borg/helpers.py | 52 +++++++++++++++++++-------------------- borg/testsuite/helpers.py | 15 +---------- 2 files changed, 26 insertions(+), 41 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index ecf138125..0da9918f8 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -1,6 +1,7 @@ import argparse import binascii from collections import namedtuple +from functools import wraps import grp import os import pwd @@ -222,9 +223,22 @@ def exclude_path(path, patterns): # unify the two cases, we add a path separator to the end of # the path before matching. -##### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -##### For discussion only, don't merge this code! -##### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +def normalized(func): + """ Decorator for the Pattern match methods, returning a wrapper that + normalizes OSX paths to match the normalized pattern on OSX, and + returning the original method on other platforms""" + @wraps(func) + def normalize_wrapper(self, path): + return func(self, unicodedata.normalize("NFD", path)) + + if sys.platform in ('darwin',): + # HFS+ converts paths to a canonical form, so users shouldn't be + # required to enter an exact match + return normalize_wrapper + else: + # Windows and Unix filesystems allow different forms, so users + # always have to enter an exact match + return func class IncludePattern: """Literal files or directories listed on the command line @@ -233,23 +247,15 @@ class IncludePattern: path match as well. A trailing slash makes no difference. """ def __init__(self, pattern): - def match(path): - return (path+os.path.sep).startswith(self.pattern) - - # HFS+ converts paths to a canonical form, so users shouldn't be - # required to enter an exact match if sys.platform in ('darwin',): - # repository paths will be mostly in NFD, as the OSX exception list - # to NFD is small, so normalize to that form for best performance pattern = unicodedata.normalize("NFD", pattern) - self.match = lambda p: match(unicodedata.normalize("NFD", p)) - # Windows and Unix filesystems allow different forms, so users - # always have to enter an exact match - else: - self.match = match self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep + @normalized + def match(self, path): + return (path+os.path.sep).startswith(self.pattern) + def __repr__(self): return '%s(%s)' % (type(self), self.pattern) @@ -259,30 +265,22 @@ class ExcludePattern(IncludePattern): exclude the contents of a directory, but not the directory itself. """ def __init__(self, pattern): - def match(path): - return self.regex.match(path+os.path.sep) is not None - if pattern.endswith(os.path.sep): self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep else: self.pattern = os.path.normpath(pattern)+os.path.sep+'*' - # HFS+ converts paths to a canonical form, so users shouldn't be - # required to enter an exact match if sys.platform in ('darwin',): - # repository paths will be mostly in NFD, as the OSX exception list - # to NFD is small, so normalize to that form for best performance self.pattern = unicodedata.normalize("NFD", self.pattern) - self.match = lambda p: match(unicodedata.normalize("NFD", p)) - # Windows and Unix filesystems allow different forms, so users - # always have to enter an exact match - else: - self.match = match # fnmatch and re.match both cache compiled regular expressions. # Nevertheless, this is about 10 times faster. self.regex = re.compile(translate(self.pattern)) + @normalized + def match(self, path): + return self.regex.match(path+os.path.sep) is not None + def __repr__(self): return '%s(%s)' % (type(self), self.pattern) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 077c171b2..f755df22a 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -212,21 +212,8 @@ class PatternNonAsciiTestCase(BaseTestCase): assert e.match(str(b"ba\x80/foo", 'latin1')) -#@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test') +@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test') class OSXPatternNormalizationTestCase(BaseTestCase): - # monkey patch sys.platform to allow testing on non-OSX during development - # remove and uncomment OSX-only decorator before push - def setUp(self): - self.oldplatform = sys.platform - sys.platform = 'darwin' - pass - - # monkey patch sys.platform to allow testing on non-OSX during development - # remove and uncomment OSX-only decorator before push - def tearDown(self): - sys.platform = self.oldplatform - pass - def testComposedUnicode(self): pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' i = IncludePattern(pattern) From 1eecb020e88b635adbc7c2213430eed91b49bc5f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 10 Sep 2015 23:12:12 +0200 Subject: [PATCH 008/151] cython code: add some int types to get rid of unspecific python add / subtract operations they somehow pull in some floating point error code that led to a undefined symbol FPE_... when using the borgbackup wheel on some non-ubuntu/debian linux platform. --- borg/chunker.pyx | 2 +- borg/crypto.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/chunker.pyx b/borg/chunker.pyx index 1d4897db1..0faa06f38 100644 --- a/borg/chunker.pyx +++ b/borg/chunker.pyx @@ -20,7 +20,7 @@ cdef extern from "_chunker.c": cdef class Chunker: cdef _Chunker *chunker - def __cinit__(self, seed, chunk_min_exp, chunk_max_exp, hash_mask_bits, hash_window_size): + def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size): min_size = 1 << chunk_min_exp max_size = 1 << chunk_max_exp hash_mask = (1 << hash_mask_bits) - 1 diff --git a/borg/crypto.pyx b/borg/crypto.pyx index 61dbc42d5..d8143bdbc 100644 --- a/borg/crypto.pyx +++ b/borg/crypto.pyx @@ -52,7 +52,7 @@ bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0] long_to_bytes = lambda x: _long.pack(x) -def num_aes_blocks(length): +def num_aes_blocks(int length): """Return the number of AES blocks required to encrypt/decrypt *length* bytes of data. Note: this is only correct for modes without padding, like AES-CTR. """ From 1fa00c2a84a3084a3d5b1723096079c80a0a44bb Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 12 Sep 2015 19:13:17 +0200 Subject: [PATCH 009/151] use vagrant to do easy cross-platform testing --- Vagrantfile | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 Vagrantfile diff --git a/Vagrantfile b/Vagrantfile new file mode 100644 index 000000000..cc8f9111a --- /dev/null +++ b/Vagrantfile @@ -0,0 +1,148 @@ +# -*- mode: ruby -*- +# vi: set ft=ruby : + +# TODO +# add pkg-config to sphinx docs, needed for fuse +# reduce lzma compression level to << 9 in unit tests, needs more memory than vagrant box has +# /usr/local/include/lz4.h for freebsd - use same code as for finding the openssl headers +# llfuse <0.41 >0.41.1 broken install due to UnicodeError + +def packages_prepare_wheezy + return <<-EOF + # debian 7 wheezy does not have lz4, but it is available from wheezy-backports: + echo "deb http://http.debian.net/debian wheezy-backports main" > /etc/apt/sources.list.d/wheezy-backports.list + EOF +end + +def packages_prepare_precise + return <<-EOF + # ubuntu 12.04 precise does not have lz4, but it is available from a ppa: + add-apt-repository -y ppa:gezakovacs/lz4 + EOF +end + +def packages_debianoid + return <<-EOF + apt-get update + apt-get install -y python3-dev python3-setuptools + apt-get install -y libssl-dev libacl1-dev liblz4-dev + apt-get install -y libfuse-dev fuse pkg-config + apt-get install -y fakeroot build-essential git + apt-get install -y curl + # this way it works on older dists (like ubuntu 12.04) also: + easy_install3 pip + pip3 install virtualenv + EOF +end + +def packages_freebsd + return <<-EOF + pkg install -y python34 py34-setuptools34 + ln -s /usr/local/bin/python3.4 /usr/local/bin/python3 + pkg install -y openssl liblz4 + pkg install -y fusefs-libs pkgconf + pkg install -y fakeroot git + pkg install -y curl + easy_install-3.4 pip + pip3 install virtualenv + # make FUSE work + echo 'fuse_load="YES"' >> /boot/loader.conf + echo 'vfs.usermount=1' >> /etc/sysctl.conf + kldload fuse + sysctl vfs.usermount=1 + pw groupmod operator -M vagrant + EOF +end + +def packages_darwin + return <<-EOF + ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" + brew update || brew update + brew outdated openssl || brew upgrade openssl + brew outdated pyenv || brew upgrade pyenv + brew install lz4 + brew install osxfuse + pyenv install 3.4.3 + pyenv global 3.4.3 + pyenv rehash + python -m pip install --user virtualenv + EOF +end + +def prepare_user(boxname) + return <<-EOF + echo export 'PATH=/usr/local/bin:$PATH' >> ~/.profile + . ~/.profile + + cd /vagrant/borg + virtualenv --python=python3 borg-env + . borg-env/bin/activate + + cd borg + pip install -U pip setuptools + pip install 'llfuse<0.41' # 0.41 does not install due to UnicodeDecodeError + pip install -r requirements.d/development.txt + pip install -e . + + echo + echo "Run:" + echo " vagrant rsync #{boxname}" + echo " vagrant ssh #{boxname} -c 'cd project/path; ...'" + EOF +end + +def fix_perms + return <<-EOF + chown -R vagrant /vagrant/borg + EOF +end + +Vagrant.configure(2) do |config| + # use rsync to copy content to the folder + config.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync" + config.vm.synced_folder ".", "/vagrant", disabled: true + + # fix permissions on synced folder + config.vm.provision "fix perms", :type => :shell, :inline => fix_perms + + config.vm.define "trusty64" do |b| + b.vm.box = "ubuntu/trusty64" + b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid + b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("trusty64") + end + + config.vm.define "precise32" do |b| + b.vm.box = "ubuntu/precise32" + b.vm.provision "packages prepare precise", :type => :shell, :inline => packages_prepare_precise + b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid + b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("precise32") + end + + config.vm.define "jessie64" do |b| + b.vm.box = "debian/jessie64" + b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid + b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("jessie64") + end + + config.vm.define "wheezy32" do |b| + b.vm.box = "puppetlabs/debian-7.8-32-nocm" + b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy + b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid + b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("wheezy32") + end + + # BSD + config.vm.define "freebsd" do |b| + b.vm.box = "geoffgarside/freebsd-10.2" + #b.vm.base_mac = "11:22:33:44:56:67" + b.vm.provision "packages freebsd", :type => :shell, :inline => packages_freebsd + b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("freebsd") + end + + # OS X + config.vm.define "darwin" do |b| + b.vm.box = "jhcook/yosemite-clitools" + b.vm.provision "packages darwin", :type => :shell, :privileged => false, :inline => packages_darwin + b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("darwin") + end +end From bc021d4ed7c5b6245413c180a5215d8c1dbbddf5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 12 Sep 2015 19:16:45 +0200 Subject: [PATCH 010/151] do not test lzma level 9 compression got a MemoryError in a vagrant VM, level 9 needs a lot of memory... --- borg/testsuite/compress.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/testsuite/compress.py b/borg/testsuite/compress.py index 8019925b2..ce46c9d30 100644 --- a/borg/testsuite/compress.py +++ b/borg/testsuite/compress.py @@ -93,7 +93,7 @@ def test_compressor(): params_list += [ dict(name='lzma', level=0, buffer=buffer), dict(name='lzma', level=6, buffer=buffer), - dict(name='lzma', level=9, buffer=buffer), + # we do not test lzma on level 9 because of the huge memory needs ] for params in params_list: c = Compressor(**params) From e8f4fe0b88b63102cd04b92d526c7e9276cd776c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 12 Sep 2015 19:19:52 +0200 Subject: [PATCH 011/151] pkg-config is needed for llfuse installation --- docs/installation.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 6bc38a0aa..4bc60569d 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -127,7 +127,7 @@ Debian Jessie / Ubuntu 14.04 preparations (git/pypi) # in case you get complaints about permission denied on /etc/fuse.conf: # on ubuntu this means your user is not in the "fuse" group. just add # yourself there, log out and log in again. - apt-get install libfuse-dev fuse + apt-get install libfuse-dev fuse pkg-config # optional: for unit testing apt-get install fakeroot @@ -151,7 +151,7 @@ Korora / Fedora 21 preparations (git/pypi) sudo dnf install lz4-devel # optional: FUSE support - to mount backup archives - sudo dnf install fuse-devel fuse + sudo dnf install fuse-devel fuse pkgconfig # optional: for unit testing sudo dnf install fakeroot From d74da7c031cc25da0b59ec420e8c815f9b6614b0 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 12 Sep 2015 19:26:46 +0200 Subject: [PATCH 012/151] llfuse 0.41 install troubles on some platforms, require < 0.41 UnicodeDecodeError exception due to non-ascii llfuse setup.py --- docs/installation.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 4bc60569d..4d025c822 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -201,7 +201,8 @@ This uses the latest (source package) release from PyPi. source borg-env/bin/activate # always before using! # install borg + dependencies into virtualenv - pip install llfuse # optional, for FUSE support + pip install 'llfuse<0.41' # optional, for FUSE support + # 0.41 and 0.41.1 have unicode issues at install time pip install borgbackup Note: we install into a virtual environment here, but this is not a requirement. @@ -223,7 +224,8 @@ While we try not to break master, there are no guarantees on anything. # install borg + dependencies into virtualenv pip install sphinx # optional, to build the docs - pip install llfuse # optional, for FUSE support + pip install 'llfuse<0.41' # optional, for FUSE support + # 0.41 and 0.41.1 have unicode issues at install time cd borg pip install -r requirements.d/development.txt pip install -e . # in-place editable mode From cff7dffc955cd5e1b5184dff2e8123f3c5925400 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 12 Sep 2015 19:38:38 +0200 Subject: [PATCH 013/151] detect lz4.h header file location use similar code as for openssl headers --- docs/usage.rst | 2 ++ setup.py | 28 +++++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index 0ce547b93..da6d93f11 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -69,6 +69,8 @@ Directories: Building: BORG_OPENSSL_PREFIX Adds given OpenSSL header file directory to the default locations (setup.py). + BORG_LZ4_PREFIX + Adds given LZ4 header file directory to the default locations (setup.py). Please note: diff --git a/setup.py b/setup.py index 667ba4ee2..3c1880421 100644 --- a/setup.py +++ b/setup.py @@ -71,14 +71,36 @@ def detect_openssl(prefixes): return prefix +def detect_lz4(prefixes): + for prefix in prefixes: + filename = os.path.join(prefix, 'include', 'lz4.h') + if os.path.exists(filename): + with open(filename, 'r') as fd: + if 'LZ4_decompress_safe' in fd.read(): + return prefix + + +include_dirs = [] +library_dirs = [] + possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl', '/usr/local/borg', '/opt/local'] if os.environ.get('BORG_OPENSSL_PREFIX'): possible_openssl_prefixes.insert(0, os.environ.get('BORG_OPENSSL_PREFIX')) ssl_prefix = detect_openssl(possible_openssl_prefixes) if not ssl_prefix: raise Exception('Unable to find OpenSSL >= 1.0 headers. (Looked here: {})'.format(', '.join(possible_openssl_prefixes))) -include_dirs = [os.path.join(ssl_prefix, 'include')] -library_dirs = [os.path.join(ssl_prefix, 'lib')] +include_dirs.append(os.path.join(ssl_prefix, 'include')) +library_dirs.append(os.path.join(ssl_prefix, 'lib')) + + +possible_lz4_prefixes = ['/usr', '/usr/local', '/usr/local/borg', '/opt/local'] +if os.environ.get('BORG_LZ4_PREFIX'): + possible_openssl_prefixes.insert(0, os.environ.get('BORG_LZ4_PREFIX')) +lz4_prefix = detect_lz4(possible_lz4_prefixes) +if not lz4_prefix: + raise Exception('Unable to find LZ4 headers. (Looked here: {})'.format(', '.join(possible_lz4_prefixes))) +include_dirs.append(os.path.join(lz4_prefix, 'include')) +library_dirs.append(os.path.join(lz4_prefix, 'lib')) with open('README.rst', 'r') as fd: @@ -87,7 +109,7 @@ with open('README.rst', 'r') as fd: cmdclass = {'build_ext': build_ext, 'sdist': Sdist} ext_modules = [ - Extension('borg.compress', [compress_source], libraries=['lz4']), + Extension('borg.compress', [compress_source], libraries=['lz4'], include_dirs=include_dirs, library_dirs=library_dirs), Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs), Extension('borg.chunker', [chunker_source]), Extension('borg.hashindex', [hashindex_source]) From 6c619000e3b6714e991d62aeaf316f9a53776235 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 12 Sep 2015 22:44:23 +0200 Subject: [PATCH 014/151] pull fixed argparse from pypi in case we have a buggy python see argparse 1.4.0 changelog for details --- setup.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 3c1880421..f59c734d9 100644 --- a/setup.py +++ b/setup.py @@ -4,10 +4,22 @@ import sys from glob import glob min_python = (3, 2) -if sys.version_info < min_python: +my_python = sys.version_info + +if my_python < min_python: print("Borg requires Python %d.%d or later" % min_python) sys.exit(1) +# msgpack pure python data corruption was fixed in 0.4.6. +# Also, we might use some rather recent API features. +install_requires=['msgpack-python>=0.4.6', ] + +if (my_python < (3, 2, 4) or + (3, 3, 0) <= my_python < (3, 3, 1)): + # argparse in stdlib does not work there due to a bug, + # pull a fixed argparse from pypi + install_requires.append("argparse>=1.4.0") + from setuptools import setup, Extension from setuptools.command.sdist import sdist @@ -158,7 +170,5 @@ setup( cmdclass=cmdclass, ext_modules=ext_modules, setup_requires=['setuptools_scm>=1.7'], - # msgpack pure python data corruption was fixed in 0.4.6. - # Also, we might use some rather recent API features. - install_requires=['msgpack-python>=0.4.6'], + install_requires=install_requires, ) From 03579ddb5a63d072f86b020b5ef219aaf1003cca Mon Sep 17 00:00:00 2001 From: Thomas Harold Date: Sat, 12 Sep 2015 17:21:49 -0400 Subject: [PATCH 015/151] Obtaining 'char *' from temporary Python value Old code causes a compile error on Mint 17.2 --- borg/hashindex.pyx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index 6652e057f..0b4dc2605 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -37,7 +37,8 @@ cdef class IndexBase: def __cinit__(self, capacity=0, path=None, key_size=32): self.key_size = key_size if path: - self.index = hashindex_read(os.fsencode(path)) + path = os.fsencode(path) + self.index = hashindex_read(path) if not self.index: raise Exception('hashindex_read failed') else: @@ -54,7 +55,8 @@ cdef class IndexBase: return cls(path=path) def write(self, path): - if not hashindex_write(self.index, os.fsencode(path)): + path = os.fsencode(path) + if not hashindex_write(self.index, path): raise Exception('hashindex_write failed') def clear(self): From 7774d4f82ce620abdcc2389ecb9f77a8fbc3070a Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 13 Sep 2015 00:36:17 +0200 Subject: [PATCH 016/151] ext3 seems to need a bit more space for a sparse file but it is still sparse, just needed some adjustment --- borg/testsuite/archiver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 95df90a0a..d001b5ca3 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -264,7 +264,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): st = os.stat(filename) self.assert_equal(st.st_size, total_len) if sparse_support and hasattr(st, 'st_blocks'): - self.assert_true(st.st_blocks * 512 < total_len / 10) # is input sparse? + self.assert_true(st.st_blocks * 512 < total_len / 9) # is input sparse? self.cmd('init', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') with changedir('output'): @@ -279,7 +279,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): st = os.stat(filename) self.assert_equal(st.st_size, total_len) if sparse_support and hasattr(st, 'st_blocks'): - self.assert_true(st.st_blocks * 512 < total_len / 10) # is output sparse? + self.assert_true(st.st_blocks * 512 < total_len / 9) # is output sparse? def test_unusual_filenames(self): filenames = ['normal', 'with some blanks', '(with_parens)', ] From 2b311846e08f5b42ced5c9fddecda9723d52d4fb Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 13 Sep 2015 00:58:57 +0200 Subject: [PATCH 017/151] add a argparse.py (from py 3.2.6) that is not broken also: remove previois attempt to fix this, installing pypi argparse into virtualenv does not work. --- borg/archiver.py | 4 +- borg/support/__init__.py | 16 + borg/support/argparse.py | 2383 ++++++++++++++++++++++++++++++++++++++ setup.py | 8 +- 4 files changed, 2403 insertions(+), 8 deletions(-) create mode 100644 borg/support/__init__.py create mode 100644 borg/support/argparse.py diff --git a/borg/archiver.py b/borg/archiver.py index fd6422781..465fcc85d 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -1,4 +1,6 @@ -import argparse +from .support import argparse # see support/__init__.py docstring + # DEPRECATED - remove after requiring py 3.4 + from binascii import hexlify from datetime import datetime from operator import attrgetter diff --git a/borg/support/__init__.py b/borg/support/__init__.py new file mode 100644 index 000000000..449fcebfc --- /dev/null +++ b/borg/support/__init__.py @@ -0,0 +1,16 @@ +""" +3rd party stuff that needed fixing + +Note: linux package maintainers feel free to remove any of these hacks + IF your python version is not affected. + +argparse is broken with default args (double conversion): +affects: 3.2.0 <= python < 3.2.4 +affects: 3.3.0 <= python < 3.3.1 + +as we still support 3.2 and 3.3 there is no other way than to bundle +a fixed version (I just took argparse.py from 3.2.6) and import it from +here (see import in archiver.py). +DEPRECATED - remove support.argparse after requiring python 3.4. +""" + diff --git a/borg/support/argparse.py b/borg/support/argparse.py new file mode 100644 index 000000000..da73bc5fc --- /dev/null +++ b/borg/support/argparse.py @@ -0,0 +1,2383 @@ +# Author: Steven J. Bethard . + +"""Command-line parsing library + +This module is an optparse-inspired command-line parsing library that: + + - handles both optional and positional arguments + - produces highly informative usage messages + - supports parsers that dispatch to sub-parsers + +The following is a simple usage example that sums integers from the +command-line and writes the result to a file:: + + parser = argparse.ArgumentParser( + description='sum the integers at the command line') + parser.add_argument( + 'integers', metavar='int', nargs='+', type=int, + help='an integer to be summed') + parser.add_argument( + '--log', default=sys.stdout, type=argparse.FileType('w'), + help='the file where the sum should be written') + args = parser.parse_args() + args.log.write('%s' % sum(args.integers)) + args.log.close() + +The module contains the following public classes: + + - ArgumentParser -- The main entry point for command-line parsing. As the + example above shows, the add_argument() method is used to populate + the parser with actions for optional and positional arguments. Then + the parse_args() method is invoked to convert the args at the + command-line into an object with attributes. + + - ArgumentError -- The exception raised by ArgumentParser objects when + there are errors with the parser's actions. Errors raised while + parsing the command-line are caught by ArgumentParser and emitted + as command-line messages. + + - FileType -- A factory for defining types of files to be created. As the + example above shows, instances of FileType are typically passed as + the type= argument of add_argument() calls. + + - Action -- The base class for parser actions. Typically actions are + selected by passing strings like 'store_true' or 'append_const' to + the action= argument of add_argument(). However, for greater + customization of ArgumentParser actions, subclasses of Action may + be defined and passed as the action= argument. + + - HelpFormatter, RawDescriptionHelpFormatter, RawTextHelpFormatter, + ArgumentDefaultsHelpFormatter -- Formatter classes which + may be passed as the formatter_class= argument to the + ArgumentParser constructor. HelpFormatter is the default, + RawDescriptionHelpFormatter and RawTextHelpFormatter tell the parser + not to change the formatting for help text, and + ArgumentDefaultsHelpFormatter adds information about argument defaults + to the help. + +All other classes in this module are considered implementation details. +(Also note that HelpFormatter and RawDescriptionHelpFormatter are only +considered public as object names -- the API of the formatter objects is +still considered an implementation detail.) +""" + +__version__ = '1.1' +__all__ = [ + 'ArgumentParser', + 'ArgumentError', + 'ArgumentTypeError', + 'FileType', + 'HelpFormatter', + 'ArgumentDefaultsHelpFormatter', + 'RawDescriptionHelpFormatter', + 'RawTextHelpFormatter', + 'Namespace', + 'Action', + 'ONE_OR_MORE', + 'OPTIONAL', + 'PARSER', + 'REMAINDER', + 'SUPPRESS', + 'ZERO_OR_MORE', +] + + +import collections as _collections +import copy as _copy +import os as _os +import re as _re +import sys as _sys +import textwrap as _textwrap + +try: + from gettext import gettext, ngettext +except ImportError: + def gettext(message): + return message + def ngettext(msg1, msg2, n): + return msg1 if n == 1 else msg2 +_ = gettext + + +SUPPRESS = '==SUPPRESS==' + +OPTIONAL = '?' +ZERO_OR_MORE = '*' +ONE_OR_MORE = '+' +PARSER = 'A...' +REMAINDER = '...' +_UNRECOGNIZED_ARGS_ATTR = '_unrecognized_args' + +# ============================= +# Utility functions and classes +# ============================= + +class _AttributeHolder(object): + """Abstract base class that provides __repr__. + + The __repr__ method returns a string in the format:: + ClassName(attr=name, attr=name, ...) + The attributes are determined either by a class-level attribute, + '_kwarg_names', or by inspecting the instance __dict__. + """ + + def __repr__(self): + type_name = type(self).__name__ + arg_strings = [] + for arg in self._get_args(): + arg_strings.append(repr(arg)) + for name, value in self._get_kwargs(): + arg_strings.append('%s=%r' % (name, value)) + return '%s(%s)' % (type_name, ', '.join(arg_strings)) + + def _get_kwargs(self): + return sorted(self.__dict__.items()) + + def _get_args(self): + return [] + + +def _ensure_value(namespace, name, value): + if getattr(namespace, name, None) is None: + setattr(namespace, name, value) + return getattr(namespace, name) + + +# =============== +# Formatting Help +# =============== + +class HelpFormatter(object): + """Formatter for generating usage messages and argument help strings. + + Only the name of this class is considered a public API. All the methods + provided by the class are considered an implementation detail. + """ + + def __init__(self, + prog, + indent_increment=2, + max_help_position=24, + width=None): + + # default setting for width + if width is None: + try: + width = int(_os.environ['COLUMNS']) + except (KeyError, ValueError): + width = 80 + width -= 2 + + self._prog = prog + self._indent_increment = indent_increment + self._max_help_position = max_help_position + self._width = width + + self._current_indent = 0 + self._level = 0 + self._action_max_length = 0 + + self._root_section = self._Section(self, None) + self._current_section = self._root_section + + self._whitespace_matcher = _re.compile(r'\s+') + self._long_break_matcher = _re.compile(r'\n\n\n+') + + # =============================== + # Section and indentation methods + # =============================== + def _indent(self): + self._current_indent += self._indent_increment + self._level += 1 + + def _dedent(self): + self._current_indent -= self._indent_increment + assert self._current_indent >= 0, 'Indent decreased below 0.' + self._level -= 1 + + class _Section(object): + + def __init__(self, formatter, parent, heading=None): + self.formatter = formatter + self.parent = parent + self.heading = heading + self.items = [] + + def format_help(self): + # format the indented section + if self.parent is not None: + self.formatter._indent() + join = self.formatter._join_parts + for func, args in self.items: + func(*args) + item_help = join([func(*args) for func, args in self.items]) + if self.parent is not None: + self.formatter._dedent() + + # return nothing if the section was empty + if not item_help: + return '' + + # add the heading if the section was non-empty + if self.heading is not SUPPRESS and self.heading is not None: + current_indent = self.formatter._current_indent + heading = '%*s%s:\n' % (current_indent, '', self.heading) + else: + heading = '' + + # join the section-initial newline, the heading and the help + return join(['\n', heading, item_help, '\n']) + + def _add_item(self, func, args): + self._current_section.items.append((func, args)) + + # ======================== + # Message building methods + # ======================== + def start_section(self, heading): + self._indent() + section = self._Section(self, self._current_section, heading) + self._add_item(section.format_help, []) + self._current_section = section + + def end_section(self): + self._current_section = self._current_section.parent + self._dedent() + + def add_text(self, text): + if text is not SUPPRESS and text is not None: + self._add_item(self._format_text, [text]) + + def add_usage(self, usage, actions, groups, prefix=None): + if usage is not SUPPRESS: + args = usage, actions, groups, prefix + self._add_item(self._format_usage, args) + + def add_argument(self, action): + if action.help is not SUPPRESS: + + # find all invocations + get_invocation = self._format_action_invocation + invocations = [get_invocation(action)] + for subaction in self._iter_indented_subactions(action): + invocations.append(get_invocation(subaction)) + + # update the maximum item length + invocation_length = max([len(s) for s in invocations]) + action_length = invocation_length + self._current_indent + self._action_max_length = max(self._action_max_length, + action_length) + + # add the item to the list + self._add_item(self._format_action, [action]) + + def add_arguments(self, actions): + for action in actions: + self.add_argument(action) + + # ======================= + # Help-formatting methods + # ======================= + def format_help(self): + help = self._root_section.format_help() + if help: + help = self._long_break_matcher.sub('\n\n', help) + help = help.strip('\n') + '\n' + return help + + def _join_parts(self, part_strings): + return ''.join([part + for part in part_strings + if part and part is not SUPPRESS]) + + def _format_usage(self, usage, actions, groups, prefix): + if prefix is None: + prefix = _('usage: ') + + # if usage is specified, use that + if usage is not None: + usage = usage % dict(prog=self._prog) + + # if no optionals or positionals are available, usage is just prog + elif usage is None and not actions: + usage = '%(prog)s' % dict(prog=self._prog) + + # if optionals and positionals are available, calculate usage + elif usage is None: + prog = '%(prog)s' % dict(prog=self._prog) + + # split optionals from positionals + optionals = [] + positionals = [] + for action in actions: + if action.option_strings: + optionals.append(action) + else: + positionals.append(action) + + # build full usage string + format = self._format_actions_usage + action_usage = format(optionals + positionals, groups) + usage = ' '.join([s for s in [prog, action_usage] if s]) + + # wrap the usage parts if it's too long + text_width = self._width - self._current_indent + if len(prefix) + len(usage) > text_width: + + # break usage into wrappable parts + part_regexp = r'\(.*?\)+|\[.*?\]+|\S+' + opt_usage = format(optionals, groups) + pos_usage = format(positionals, groups) + opt_parts = _re.findall(part_regexp, opt_usage) + pos_parts = _re.findall(part_regexp, pos_usage) + assert ' '.join(opt_parts) == opt_usage + assert ' '.join(pos_parts) == pos_usage + + # helper for wrapping lines + def get_lines(parts, indent, prefix=None): + lines = [] + line = [] + if prefix is not None: + line_len = len(prefix) - 1 + else: + line_len = len(indent) - 1 + for part in parts: + if line_len + 1 + len(part) > text_width: + lines.append(indent + ' '.join(line)) + line = [] + line_len = len(indent) - 1 + line.append(part) + line_len += len(part) + 1 + if line: + lines.append(indent + ' '.join(line)) + if prefix is not None: + lines[0] = lines[0][len(indent):] + return lines + + # if prog is short, follow it with optionals or positionals + if len(prefix) + len(prog) <= 0.75 * text_width: + indent = ' ' * (len(prefix) + len(prog) + 1) + if opt_parts: + lines = get_lines([prog] + opt_parts, indent, prefix) + lines.extend(get_lines(pos_parts, indent)) + elif pos_parts: + lines = get_lines([prog] + pos_parts, indent, prefix) + else: + lines = [prog] + + # if prog is long, put it on its own line + else: + indent = ' ' * len(prefix) + parts = opt_parts + pos_parts + lines = get_lines(parts, indent) + if len(lines) > 1: + lines = [] + lines.extend(get_lines(opt_parts, indent)) + lines.extend(get_lines(pos_parts, indent)) + lines = [prog] + lines + + # join lines into usage + usage = '\n'.join(lines) + + # prefix with 'usage:' + return '%s%s\n\n' % (prefix, usage) + + def _format_actions_usage(self, actions, groups): + # find group indices and identify actions in groups + group_actions = set() + inserts = {} + for group in groups: + try: + start = actions.index(group._group_actions[0]) + except ValueError: + continue + else: + end = start + len(group._group_actions) + if actions[start:end] == group._group_actions: + for action in group._group_actions: + group_actions.add(action) + if not group.required: + if start in inserts: + inserts[start] += ' [' + else: + inserts[start] = '[' + inserts[end] = ']' + else: + if start in inserts: + inserts[start] += ' (' + else: + inserts[start] = '(' + inserts[end] = ')' + for i in range(start + 1, end): + inserts[i] = '|' + + # collect all actions format strings + parts = [] + for i, action in enumerate(actions): + + # suppressed arguments are marked with None + # remove | separators for suppressed arguments + if action.help is SUPPRESS: + parts.append(None) + if inserts.get(i) == '|': + inserts.pop(i) + elif inserts.get(i + 1) == '|': + inserts.pop(i + 1) + + # produce all arg strings + elif not action.option_strings: + part = self._format_args(action, action.dest) + + # if it's in a group, strip the outer [] + if action in group_actions: + if part[0] == '[' and part[-1] == ']': + part = part[1:-1] + + # add the action string to the list + parts.append(part) + + # produce the first way to invoke the option in brackets + else: + option_string = action.option_strings[0] + + # if the Optional doesn't take a value, format is: + # -s or --long + if action.nargs == 0: + part = '%s' % option_string + + # if the Optional takes a value, format is: + # -s ARGS or --long ARGS + else: + default = action.dest.upper() + args_string = self._format_args(action, default) + part = '%s %s' % (option_string, args_string) + + # make it look optional if it's not required or in a group + if not action.required and action not in group_actions: + part = '[%s]' % part + + # add the action string to the list + parts.append(part) + + # insert things at the necessary indices + for i in sorted(inserts, reverse=True): + parts[i:i] = [inserts[i]] + + # join all the action items with spaces + text = ' '.join([item for item in parts if item is not None]) + + # clean up separators for mutually exclusive groups + open = r'[\[(]' + close = r'[\])]' + text = _re.sub(r'(%s) ' % open, r'\1', text) + text = _re.sub(r' (%s)' % close, r'\1', text) + text = _re.sub(r'%s *%s' % (open, close), r'', text) + text = _re.sub(r'\(([^|]*)\)', r'\1', text) + text = text.strip() + + # return the text + return text + + def _format_text(self, text): + if '%(prog)' in text: + text = text % dict(prog=self._prog) + text_width = self._width - self._current_indent + indent = ' ' * self._current_indent + return self._fill_text(text, text_width, indent) + '\n\n' + + def _format_action(self, action): + # determine the required width and the entry label + help_position = min(self._action_max_length + 2, + self._max_help_position) + help_width = self._width - help_position + action_width = help_position - self._current_indent - 2 + action_header = self._format_action_invocation(action) + + # ho nelp; start on same line and add a final newline + if not action.help: + tup = self._current_indent, '', action_header + action_header = '%*s%s\n' % tup + + # short action name; start on the same line and pad two spaces + elif len(action_header) <= action_width: + tup = self._current_indent, '', action_width, action_header + action_header = '%*s%-*s ' % tup + indent_first = 0 + + # long action name; start on the next line + else: + tup = self._current_indent, '', action_header + action_header = '%*s%s\n' % tup + indent_first = help_position + + # collect the pieces of the action help + parts = [action_header] + + # if there was help for the action, add lines of help text + if action.help: + help_text = self._expand_help(action) + help_lines = self._split_lines(help_text, help_width) + parts.append('%*s%s\n' % (indent_first, '', help_lines[0])) + for line in help_lines[1:]: + parts.append('%*s%s\n' % (help_position, '', line)) + + # or add a newline if the description doesn't end with one + elif not action_header.endswith('\n'): + parts.append('\n') + + # if there are any sub-actions, add their help as well + for subaction in self._iter_indented_subactions(action): + parts.append(self._format_action(subaction)) + + # return a single string + return self._join_parts(parts) + + def _format_action_invocation(self, action): + if not action.option_strings: + metavar, = self._metavar_formatter(action, action.dest)(1) + return metavar + + else: + parts = [] + + # if the Optional doesn't take a value, format is: + # -s, --long + if action.nargs == 0: + parts.extend(action.option_strings) + + # if the Optional takes a value, format is: + # -s ARGS, --long ARGS + else: + default = action.dest.upper() + args_string = self._format_args(action, default) + for option_string in action.option_strings: + parts.append('%s %s' % (option_string, args_string)) + + return ', '.join(parts) + + def _metavar_formatter(self, action, default_metavar): + if action.metavar is not None: + result = action.metavar + elif action.choices is not None: + choice_strs = [str(choice) for choice in action.choices] + result = '{%s}' % ','.join(choice_strs) + else: + result = default_metavar + + def format(tuple_size): + if isinstance(result, tuple): + return result + else: + return (result, ) * tuple_size + return format + + def _format_args(self, action, default_metavar): + get_metavar = self._metavar_formatter(action, default_metavar) + if action.nargs is None: + result = '%s' % get_metavar(1) + elif action.nargs == OPTIONAL: + result = '[%s]' % get_metavar(1) + elif action.nargs == ZERO_OR_MORE: + result = '[%s [%s ...]]' % get_metavar(2) + elif action.nargs == ONE_OR_MORE: + result = '%s [%s ...]' % get_metavar(2) + elif action.nargs == REMAINDER: + result = '...' + elif action.nargs == PARSER: + result = '%s ...' % get_metavar(1) + else: + formats = ['%s' for _ in range(action.nargs)] + result = ' '.join(formats) % get_metavar(action.nargs) + return result + + def _expand_help(self, action): + params = dict(vars(action), prog=self._prog) + for name in list(params): + if params[name] is SUPPRESS: + del params[name] + for name in list(params): + if hasattr(params[name], '__name__'): + params[name] = params[name].__name__ + if params.get('choices') is not None: + choices_str = ', '.join([str(c) for c in params['choices']]) + params['choices'] = choices_str + return self._get_help_string(action) % params + + def _iter_indented_subactions(self, action): + try: + get_subactions = action._get_subactions + except AttributeError: + pass + else: + self._indent() + for subaction in get_subactions(): + yield subaction + self._dedent() + + def _split_lines(self, text, width): + text = self._whitespace_matcher.sub(' ', text).strip() + return _textwrap.wrap(text, width) + + def _fill_text(self, text, width, indent): + text = self._whitespace_matcher.sub(' ', text).strip() + return _textwrap.fill(text, width, initial_indent=indent, + subsequent_indent=indent) + + def _get_help_string(self, action): + return action.help + + +class RawDescriptionHelpFormatter(HelpFormatter): + """Help message formatter which retains any formatting in descriptions. + + Only the name of this class is considered a public API. All the methods + provided by the class are considered an implementation detail. + """ + + def _fill_text(self, text, width, indent): + return ''.join([indent + line for line in text.splitlines(True)]) + + +class RawTextHelpFormatter(RawDescriptionHelpFormatter): + """Help message formatter which retains formatting of all help text. + + Only the name of this class is considered a public API. All the methods + provided by the class are considered an implementation detail. + """ + + def _split_lines(self, text, width): + return text.splitlines() + + +class ArgumentDefaultsHelpFormatter(HelpFormatter): + """Help message formatter which adds default values to argument help. + + Only the name of this class is considered a public API. All the methods + provided by the class are considered an implementation detail. + """ + + def _get_help_string(self, action): + help = action.help + if '%(default)' not in action.help: + if action.default is not SUPPRESS: + defaulting_nargs = [OPTIONAL, ZERO_OR_MORE] + if action.option_strings or action.nargs in defaulting_nargs: + help += ' (default: %(default)s)' + return help + + +# ===================== +# Options and Arguments +# ===================== + +def _get_action_name(argument): + if argument is None: + return None + elif argument.option_strings: + return '/'.join(argument.option_strings) + elif argument.metavar not in (None, SUPPRESS): + return argument.metavar + elif argument.dest not in (None, SUPPRESS): + return argument.dest + else: + return None + + +class ArgumentError(Exception): + """An error from creating or using an argument (optional or positional). + + The string value of this exception is the message, augmented with + information about the argument that caused it. + """ + + def __init__(self, argument, message): + self.argument_name = _get_action_name(argument) + self.message = message + + def __str__(self): + if self.argument_name is None: + format = '%(message)s' + else: + format = 'argument %(argument_name)s: %(message)s' + return format % dict(message=self.message, + argument_name=self.argument_name) + + +class ArgumentTypeError(Exception): + """An error from trying to convert a command line string to a type.""" + pass + + +# ============== +# Action classes +# ============== + +class Action(_AttributeHolder): + """Information about how to convert command line strings to Python objects. + + Action objects are used by an ArgumentParser to represent the information + needed to parse a single argument from one or more strings from the + command line. The keyword arguments to the Action constructor are also + all attributes of Action instances. + + Keyword Arguments: + + - option_strings -- A list of command-line option strings which + should be associated with this action. + + - dest -- The name of the attribute to hold the created object(s) + + - nargs -- The number of command-line arguments that should be + consumed. By default, one argument will be consumed and a single + value will be produced. Other values include: + - N (an integer) consumes N arguments (and produces a list) + - '?' consumes zero or one arguments + - '*' consumes zero or more arguments (and produces a list) + - '+' consumes one or more arguments (and produces a list) + Note that the difference between the default and nargs=1 is that + with the default, a single value will be produced, while with + nargs=1, a list containing a single value will be produced. + + - const -- The value to be produced if the option is specified and the + option uses an action that takes no values. + + - default -- The value to be produced if the option is not specified. + + - type -- A callable that accepts a single string argument, and + returns the converted value. The standard Python types str, int, + float, and complex are useful examples of such callables. If None, + str is used. + + - choices -- A container of values that should be allowed. If not None, + after a command-line argument has been converted to the appropriate + type, an exception will be raised if it is not a member of this + collection. + + - required -- True if the action must always be specified at the + command line. This is only meaningful for optional command-line + arguments. + + - help -- The help string describing the argument. + + - metavar -- The name to be used for the option's argument with the + help string. If None, the 'dest' value will be used as the name. + """ + + def __init__(self, + option_strings, + dest, + nargs=None, + const=None, + default=None, + type=None, + choices=None, + required=False, + help=None, + metavar=None): + self.option_strings = option_strings + self.dest = dest + self.nargs = nargs + self.const = const + self.default = default + self.type = type + self.choices = choices + self.required = required + self.help = help + self.metavar = metavar + + def _get_kwargs(self): + names = [ + 'option_strings', + 'dest', + 'nargs', + 'const', + 'default', + 'type', + 'choices', + 'help', + 'metavar', + ] + return [(name, getattr(self, name)) for name in names] + + def __call__(self, parser, namespace, values, option_string=None): + raise NotImplementedError(_('.__call__() not defined')) + + +class _StoreAction(Action): + + def __init__(self, + option_strings, + dest, + nargs=None, + const=None, + default=None, + type=None, + choices=None, + required=False, + help=None, + metavar=None): + if nargs == 0: + raise ValueError('nargs for store actions must be > 0; if you ' + 'have nothing to store, actions such as store ' + 'true or store const may be more appropriate') + if const is not None and nargs != OPTIONAL: + raise ValueError('nargs must be %r to supply const' % OPTIONAL) + super(_StoreAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=nargs, + const=const, + default=default, + type=type, + choices=choices, + required=required, + help=help, + metavar=metavar) + + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, values) + + +class _StoreConstAction(Action): + + def __init__(self, + option_strings, + dest, + const, + default=None, + required=False, + help=None, + metavar=None): + super(_StoreConstAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=0, + const=const, + default=default, + required=required, + help=help) + + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, self.const) + + +class _StoreTrueAction(_StoreConstAction): + + def __init__(self, + option_strings, + dest, + default=False, + required=False, + help=None): + super(_StoreTrueAction, self).__init__( + option_strings=option_strings, + dest=dest, + const=True, + default=default, + required=required, + help=help) + + +class _StoreFalseAction(_StoreConstAction): + + def __init__(self, + option_strings, + dest, + default=True, + required=False, + help=None): + super(_StoreFalseAction, self).__init__( + option_strings=option_strings, + dest=dest, + const=False, + default=default, + required=required, + help=help) + + +class _AppendAction(Action): + + def __init__(self, + option_strings, + dest, + nargs=None, + const=None, + default=None, + type=None, + choices=None, + required=False, + help=None, + metavar=None): + if nargs == 0: + raise ValueError('nargs for append actions must be > 0; if arg ' + 'strings are not supplying the value to append, ' + 'the append const action may be more appropriate') + if const is not None and nargs != OPTIONAL: + raise ValueError('nargs must be %r to supply const' % OPTIONAL) + super(_AppendAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=nargs, + const=const, + default=default, + type=type, + choices=choices, + required=required, + help=help, + metavar=metavar) + + def __call__(self, parser, namespace, values, option_string=None): + items = _copy.copy(_ensure_value(namespace, self.dest, [])) + items.append(values) + setattr(namespace, self.dest, items) + + +class _AppendConstAction(Action): + + def __init__(self, + option_strings, + dest, + const, + default=None, + required=False, + help=None, + metavar=None): + super(_AppendConstAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=0, + const=const, + default=default, + required=required, + help=help, + metavar=metavar) + + def __call__(self, parser, namespace, values, option_string=None): + items = _copy.copy(_ensure_value(namespace, self.dest, [])) + items.append(self.const) + setattr(namespace, self.dest, items) + + +class _CountAction(Action): + + def __init__(self, + option_strings, + dest, + default=None, + required=False, + help=None): + super(_CountAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=0, + default=default, + required=required, + help=help) + + def __call__(self, parser, namespace, values, option_string=None): + new_count = _ensure_value(namespace, self.dest, 0) + 1 + setattr(namespace, self.dest, new_count) + + +class _HelpAction(Action): + + def __init__(self, + option_strings, + dest=SUPPRESS, + default=SUPPRESS, + help=None): + super(_HelpAction, self).__init__( + option_strings=option_strings, + dest=dest, + default=default, + nargs=0, + help=help) + + def __call__(self, parser, namespace, values, option_string=None): + parser.print_help() + parser.exit() + + +class _VersionAction(Action): + + def __init__(self, + option_strings, + version=None, + dest=SUPPRESS, + default=SUPPRESS, + help="show program's version number and exit"): + super(_VersionAction, self).__init__( + option_strings=option_strings, + dest=dest, + default=default, + nargs=0, + help=help) + self.version = version + + def __call__(self, parser, namespace, values, option_string=None): + version = self.version + if version is None: + version = parser.version + formatter = parser._get_formatter() + formatter.add_text(version) + parser.exit(message=formatter.format_help()) + + +class _SubParsersAction(Action): + + class _ChoicesPseudoAction(Action): + + def __init__(self, name, aliases, help): + metavar = dest = name + if aliases: + metavar += ' (%s)' % ', '.join(aliases) + sup = super(_SubParsersAction._ChoicesPseudoAction, self) + sup.__init__(option_strings=[], dest=dest, help=help, + metavar=metavar) + + def __init__(self, + option_strings, + prog, + parser_class, + dest=SUPPRESS, + help=None, + metavar=None): + + self._prog_prefix = prog + self._parser_class = parser_class + self._name_parser_map = _collections.OrderedDict() + self._choices_actions = [] + + super(_SubParsersAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=PARSER, + choices=self._name_parser_map, + help=help, + metavar=metavar) + + def add_parser(self, name, **kwargs): + # set prog from the existing prefix + if kwargs.get('prog') is None: + kwargs['prog'] = '%s %s' % (self._prog_prefix, name) + + aliases = kwargs.pop('aliases', ()) + + # create a pseudo-action to hold the choice help + if 'help' in kwargs: + help = kwargs.pop('help') + choice_action = self._ChoicesPseudoAction(name, aliases, help) + self._choices_actions.append(choice_action) + + # create the parser and add it to the map + parser = self._parser_class(**kwargs) + self._name_parser_map[name] = parser + + # make parser available under aliases also + for alias in aliases: + self._name_parser_map[alias] = parser + + return parser + + def _get_subactions(self): + return self._choices_actions + + def __call__(self, parser, namespace, values, option_string=None): + parser_name = values[0] + arg_strings = values[1:] + + # set the parser name if requested + if self.dest is not SUPPRESS: + setattr(namespace, self.dest, parser_name) + + # select the parser + try: + parser = self._name_parser_map[parser_name] + except KeyError: + args = {'parser_name': parser_name, + 'choices': ', '.join(self._name_parser_map)} + msg = _('unknown parser %(parser_name)r (choices: %(choices)s)') % args + raise ArgumentError(self, msg) + + # parse all the remaining options into the namespace + # store any unrecognized options on the object, so that the top + # level parser can decide what to do with them + namespace, arg_strings = parser.parse_known_args(arg_strings, namespace) + if arg_strings: + vars(namespace).setdefault(_UNRECOGNIZED_ARGS_ATTR, []) + getattr(namespace, _UNRECOGNIZED_ARGS_ATTR).extend(arg_strings) + + +# ============== +# Type classes +# ============== + +class FileType(object): + """Factory for creating file object types + + Instances of FileType are typically passed as type= arguments to the + ArgumentParser add_argument() method. + + Keyword Arguments: + - mode -- A string indicating how the file is to be opened. Accepts the + same values as the builtin open() function. + - bufsize -- The file's desired buffer size. Accepts the same values as + the builtin open() function. + """ + + def __init__(self, mode='r', bufsize=-1): + self._mode = mode + self._bufsize = bufsize + + def __call__(self, string): + # the special argument "-" means sys.std{in,out} + if string == '-': + if 'r' in self._mode: + return _sys.stdin + elif 'w' in self._mode: + return _sys.stdout + else: + msg = _('argument "-" with mode %r') % self._mode + raise ValueError(msg) + + # all other arguments are used as file names + try: + return open(string, self._mode, self._bufsize) + except IOError as e: + message = _("can't open '%s': %s") + raise ArgumentTypeError(message % (string, e)) + + def __repr__(self): + args = self._mode, self._bufsize + args_str = ', '.join(repr(arg) for arg in args if arg != -1) + return '%s(%s)' % (type(self).__name__, args_str) + +# =========================== +# Optional and Positional Parsing +# =========================== + +class Namespace(_AttributeHolder): + """Simple object for storing attributes. + + Implements equality by attribute names and values, and provides a simple + string representation. + """ + + def __init__(self, **kwargs): + for name in kwargs: + setattr(self, name, kwargs[name]) + + def __eq__(self, other): + return vars(self) == vars(other) + + def __ne__(self, other): + return not (self == other) + + def __contains__(self, key): + return key in self.__dict__ + + +class _ActionsContainer(object): + + def __init__(self, + description, + prefix_chars, + argument_default, + conflict_handler): + super(_ActionsContainer, self).__init__() + + self.description = description + self.argument_default = argument_default + self.prefix_chars = prefix_chars + self.conflict_handler = conflict_handler + + # set up registries + self._registries = {} + + # register actions + self.register('action', None, _StoreAction) + self.register('action', 'store', _StoreAction) + self.register('action', 'store_const', _StoreConstAction) + self.register('action', 'store_true', _StoreTrueAction) + self.register('action', 'store_false', _StoreFalseAction) + self.register('action', 'append', _AppendAction) + self.register('action', 'append_const', _AppendConstAction) + self.register('action', 'count', _CountAction) + self.register('action', 'help', _HelpAction) + self.register('action', 'version', _VersionAction) + self.register('action', 'parsers', _SubParsersAction) + + # raise an exception if the conflict handler is invalid + self._get_handler() + + # action storage + self._actions = [] + self._option_string_actions = {} + + # groups + self._action_groups = [] + self._mutually_exclusive_groups = [] + + # defaults storage + self._defaults = {} + + # determines whether an "option" looks like a negative number + self._negative_number_matcher = _re.compile(r'^-\d+$|^-\d*\.\d+$') + + # whether or not there are any optionals that look like negative + # numbers -- uses a list so it can be shared and edited + self._has_negative_number_optionals = [] + + # ==================== + # Registration methods + # ==================== + def register(self, registry_name, value, object): + registry = self._registries.setdefault(registry_name, {}) + registry[value] = object + + def _registry_get(self, registry_name, value, default=None): + return self._registries[registry_name].get(value, default) + + # ================================== + # Namespace default accessor methods + # ================================== + def set_defaults(self, **kwargs): + self._defaults.update(kwargs) + + # if these defaults match any existing arguments, replace + # the previous default on the object with the new one + for action in self._actions: + if action.dest in kwargs: + action.default = kwargs[action.dest] + + def get_default(self, dest): + for action in self._actions: + if action.dest == dest and action.default is not None: + return action.default + return self._defaults.get(dest, None) + + + # ======================= + # Adding argument actions + # ======================= + def add_argument(self, *args, **kwargs): + """ + add_argument(dest, ..., name=value, ...) + add_argument(option_string, option_string, ..., name=value, ...) + """ + + # if no positional args are supplied or only one is supplied and + # it doesn't look like an option string, parse a positional + # argument + chars = self.prefix_chars + if not args or len(args) == 1 and args[0][0] not in chars: + if args and 'dest' in kwargs: + raise ValueError('dest supplied twice for positional argument') + kwargs = self._get_positional_kwargs(*args, **kwargs) + + # otherwise, we're adding an optional argument + else: + kwargs = self._get_optional_kwargs(*args, **kwargs) + + # if no default was supplied, use the parser-level default + if 'default' not in kwargs: + dest = kwargs['dest'] + if dest in self._defaults: + kwargs['default'] = self._defaults[dest] + elif self.argument_default is not None: + kwargs['default'] = self.argument_default + + # create the action object, and add it to the parser + action_class = self._pop_action_class(kwargs) + if not callable(action_class): + raise ValueError('unknown action "%s"' % (action_class,)) + action = action_class(**kwargs) + + # raise an error if the action type is not callable + type_func = self._registry_get('type', action.type, action.type) + if not callable(type_func): + raise ValueError('%r is not callable' % (type_func,)) + + # raise an error if the metavar does not match the type + if hasattr(self, "_get_formatter"): + try: + self._get_formatter()._format_args(action, None) + except TypeError: + raise ValueError("length of metavar tuple does not match nargs") + + return self._add_action(action) + + def add_argument_group(self, *args, **kwargs): + group = _ArgumentGroup(self, *args, **kwargs) + self._action_groups.append(group) + return group + + def add_mutually_exclusive_group(self, **kwargs): + group = _MutuallyExclusiveGroup(self, **kwargs) + self._mutually_exclusive_groups.append(group) + return group + + def _add_action(self, action): + # resolve any conflicts + self._check_conflict(action) + + # add to actions list + self._actions.append(action) + action.container = self + + # index the action by any option strings it has + for option_string in action.option_strings: + self._option_string_actions[option_string] = action + + # set the flag if any option strings look like negative numbers + for option_string in action.option_strings: + if self._negative_number_matcher.match(option_string): + if not self._has_negative_number_optionals: + self._has_negative_number_optionals.append(True) + + # return the created action + return action + + def _remove_action(self, action): + self._actions.remove(action) + + def _add_container_actions(self, container): + # collect groups by titles + title_group_map = {} + for group in self._action_groups: + if group.title in title_group_map: + msg = _('cannot merge actions - two groups are named %r') + raise ValueError(msg % (group.title)) + title_group_map[group.title] = group + + # map each action to its group + group_map = {} + for group in container._action_groups: + + # if a group with the title exists, use that, otherwise + # create a new group matching the container's group + if group.title not in title_group_map: + title_group_map[group.title] = self.add_argument_group( + title=group.title, + description=group.description, + conflict_handler=group.conflict_handler) + + # map the actions to their new group + for action in group._group_actions: + group_map[action] = title_group_map[group.title] + + # add container's mutually exclusive groups + # NOTE: if add_mutually_exclusive_group ever gains title= and + # description= then this code will need to be expanded as above + for group in container._mutually_exclusive_groups: + mutex_group = self.add_mutually_exclusive_group( + required=group.required) + + # map the actions to their new mutex group + for action in group._group_actions: + group_map[action] = mutex_group + + # add all actions to this container or their group + for action in container._actions: + group_map.get(action, self)._add_action(action) + + def _get_positional_kwargs(self, dest, **kwargs): + # make sure required is not specified + if 'required' in kwargs: + msg = _("'required' is an invalid argument for positionals") + raise TypeError(msg) + + # mark positional arguments as required if at least one is + # always required + if kwargs.get('nargs') not in [OPTIONAL, ZERO_OR_MORE]: + kwargs['required'] = True + if kwargs.get('nargs') == ZERO_OR_MORE and 'default' not in kwargs: + kwargs['required'] = True + + # return the keyword arguments with no option strings + return dict(kwargs, dest=dest, option_strings=[]) + + def _get_optional_kwargs(self, *args, **kwargs): + # determine short and long option strings + option_strings = [] + long_option_strings = [] + for option_string in args: + # error on strings that don't start with an appropriate prefix + if not option_string[0] in self.prefix_chars: + args = {'option': option_string, + 'prefix_chars': self.prefix_chars} + msg = _('invalid option string %(option)r: ' + 'must start with a character %(prefix_chars)r') + raise ValueError(msg % args) + + # strings starting with two prefix characters are long options + option_strings.append(option_string) + if option_string[0] in self.prefix_chars: + if len(option_string) > 1: + if option_string[1] in self.prefix_chars: + long_option_strings.append(option_string) + + # infer destination, '--foo-bar' -> 'foo_bar' and '-x' -> 'x' + dest = kwargs.pop('dest', None) + if dest is None: + if long_option_strings: + dest_option_string = long_option_strings[0] + else: + dest_option_string = option_strings[0] + dest = dest_option_string.lstrip(self.prefix_chars) + if not dest: + msg = _('dest= is required for options like %r') + raise ValueError(msg % option_string) + dest = dest.replace('-', '_') + + # return the updated keyword arguments + return dict(kwargs, dest=dest, option_strings=option_strings) + + def _pop_action_class(self, kwargs, default=None): + action = kwargs.pop('action', default) + return self._registry_get('action', action, action) + + def _get_handler(self): + # determine function from conflict handler string + handler_func_name = '_handle_conflict_%s' % self.conflict_handler + try: + return getattr(self, handler_func_name) + except AttributeError: + msg = _('invalid conflict_resolution value: %r') + raise ValueError(msg % self.conflict_handler) + + def _check_conflict(self, action): + + # find all options that conflict with this option + confl_optionals = [] + for option_string in action.option_strings: + if option_string in self._option_string_actions: + confl_optional = self._option_string_actions[option_string] + confl_optionals.append((option_string, confl_optional)) + + # resolve any conflicts + if confl_optionals: + conflict_handler = self._get_handler() + conflict_handler(action, confl_optionals) + + def _handle_conflict_error(self, action, conflicting_actions): + message = ngettext('conflicting option string: %s', + 'conflicting option strings: %s', + len(conflicting_actions)) + conflict_string = ', '.join([option_string + for option_string, action + in conflicting_actions]) + raise ArgumentError(action, message % conflict_string) + + def _handle_conflict_resolve(self, action, conflicting_actions): + + # remove all conflicting options + for option_string, action in conflicting_actions: + + # remove the conflicting option + action.option_strings.remove(option_string) + self._option_string_actions.pop(option_string, None) + + # if the option now has no option string, remove it from the + # container holding it + if not action.option_strings: + action.container._remove_action(action) + + +class _ArgumentGroup(_ActionsContainer): + + def __init__(self, container, title=None, description=None, **kwargs): + # add any missing keyword arguments by checking the container + update = kwargs.setdefault + update('conflict_handler', container.conflict_handler) + update('prefix_chars', container.prefix_chars) + update('argument_default', container.argument_default) + super_init = super(_ArgumentGroup, self).__init__ + super_init(description=description, **kwargs) + + # group attributes + self.title = title + self._group_actions = [] + + # share most attributes with the container + self._registries = container._registries + self._actions = container._actions + self._option_string_actions = container._option_string_actions + self._defaults = container._defaults + self._has_negative_number_optionals = \ + container._has_negative_number_optionals + self._mutually_exclusive_groups = container._mutually_exclusive_groups + + def _add_action(self, action): + action = super(_ArgumentGroup, self)._add_action(action) + self._group_actions.append(action) + return action + + def _remove_action(self, action): + super(_ArgumentGroup, self)._remove_action(action) + self._group_actions.remove(action) + + +class _MutuallyExclusiveGroup(_ArgumentGroup): + + def __init__(self, container, required=False): + super(_MutuallyExclusiveGroup, self).__init__(container) + self.required = required + self._container = container + + def _add_action(self, action): + if action.required: + msg = _('mutually exclusive arguments must be optional') + raise ValueError(msg) + action = self._container._add_action(action) + self._group_actions.append(action) + return action + + def _remove_action(self, action): + self._container._remove_action(action) + self._group_actions.remove(action) + + +class ArgumentParser(_AttributeHolder, _ActionsContainer): + """Object for parsing command line strings into Python objects. + + Keyword Arguments: + - prog -- The name of the program (default: sys.argv[0]) + - usage -- A usage message (default: auto-generated from arguments) + - description -- A description of what the program does + - epilog -- Text following the argument descriptions + - parents -- Parsers whose arguments should be copied into this one + - formatter_class -- HelpFormatter class for printing help messages + - prefix_chars -- Characters that prefix optional arguments + - fromfile_prefix_chars -- Characters that prefix files containing + additional arguments + - argument_default -- The default value for all arguments + - conflict_handler -- String indicating how to handle conflicts + - add_help -- Add a -h/-help option + """ + + def __init__(self, + prog=None, + usage=None, + description=None, + epilog=None, + version=None, + parents=[], + formatter_class=HelpFormatter, + prefix_chars='-', + fromfile_prefix_chars=None, + argument_default=None, + conflict_handler='error', + add_help=True): + + if version is not None: + import warnings + warnings.warn( + """The "version" argument to ArgumentParser is deprecated. """ + """Please use """ + """"add_argument(..., action='version', version="N", ...)" """ + """instead""", DeprecationWarning) + + superinit = super(ArgumentParser, self).__init__ + superinit(description=description, + prefix_chars=prefix_chars, + argument_default=argument_default, + conflict_handler=conflict_handler) + + # default setting for prog + if prog is None: + prog = _os.path.basename(_sys.argv[0]) + + self.prog = prog + self.usage = usage + self.epilog = epilog + self.version = version + self.formatter_class = formatter_class + self.fromfile_prefix_chars = fromfile_prefix_chars + self.add_help = add_help + + add_group = self.add_argument_group + self._positionals = add_group(_('positional arguments')) + self._optionals = add_group(_('optional arguments')) + self._subparsers = None + + # register types + def identity(string): + return string + self.register('type', None, identity) + + # add help and version arguments if necessary + # (using explicit default to override global argument_default) + default_prefix = '-' if '-' in prefix_chars else prefix_chars[0] + if self.add_help: + self.add_argument( + default_prefix+'h', default_prefix*2+'help', + action='help', default=SUPPRESS, + help=_('show this help message and exit')) + if self.version: + self.add_argument( + default_prefix+'v', default_prefix*2+'version', + action='version', default=SUPPRESS, + version=self.version, + help=_("show program's version number and exit")) + + # add parent arguments and defaults + for parent in parents: + self._add_container_actions(parent) + try: + defaults = parent._defaults + except AttributeError: + pass + else: + self._defaults.update(defaults) + + # ======================= + # Pretty __repr__ methods + # ======================= + def _get_kwargs(self): + names = [ + 'prog', + 'usage', + 'description', + 'version', + 'formatter_class', + 'conflict_handler', + 'add_help', + ] + return [(name, getattr(self, name)) for name in names] + + # ================================== + # Optional/Positional adding methods + # ================================== + def add_subparsers(self, **kwargs): + if self._subparsers is not None: + self.error(_('cannot have multiple subparser arguments')) + + # add the parser class to the arguments if it's not present + kwargs.setdefault('parser_class', type(self)) + + if 'title' in kwargs or 'description' in kwargs: + title = _(kwargs.pop('title', 'subcommands')) + description = _(kwargs.pop('description', None)) + self._subparsers = self.add_argument_group(title, description) + else: + self._subparsers = self._positionals + + # prog defaults to the usage message of this parser, skipping + # optional arguments and with no "usage:" prefix + if kwargs.get('prog') is None: + formatter = self._get_formatter() + positionals = self._get_positional_actions() + groups = self._mutually_exclusive_groups + formatter.add_usage(self.usage, positionals, groups, '') + kwargs['prog'] = formatter.format_help().strip() + + # create the parsers action and add it to the positionals list + parsers_class = self._pop_action_class(kwargs, 'parsers') + action = parsers_class(option_strings=[], **kwargs) + self._subparsers._add_action(action) + + # return the created parsers action + return action + + def _add_action(self, action): + if action.option_strings: + self._optionals._add_action(action) + else: + self._positionals._add_action(action) + return action + + def _get_optional_actions(self): + return [action + for action in self._actions + if action.option_strings] + + def _get_positional_actions(self): + return [action + for action in self._actions + if not action.option_strings] + + # ===================================== + # Command line argument parsing methods + # ===================================== + def parse_args(self, args=None, namespace=None): + args, argv = self.parse_known_args(args, namespace) + if argv: + msg = _('unrecognized arguments: %s') + self.error(msg % ' '.join(argv)) + return args + + def parse_known_args(self, args=None, namespace=None): + if args is None: + # args default to the system args + args = _sys.argv[1:] + else: + # make sure that args are mutable + args = list(args) + + # default Namespace built from parser defaults + if namespace is None: + namespace = Namespace() + + # add any action defaults that aren't present + for action in self._actions: + if action.dest is not SUPPRESS: + if not hasattr(namespace, action.dest): + if action.default is not SUPPRESS: + setattr(namespace, action.dest, action.default) + + # add any parser defaults that aren't present + for dest in self._defaults: + if not hasattr(namespace, dest): + setattr(namespace, dest, self._defaults[dest]) + + # parse the arguments and exit if there are any errors + try: + namespace, args = self._parse_known_args(args, namespace) + if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): + args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR)) + delattr(namespace, _UNRECOGNIZED_ARGS_ATTR) + return namespace, args + except ArgumentError: + err = _sys.exc_info()[1] + self.error(str(err)) + + def _parse_known_args(self, arg_strings, namespace): + # replace arg strings that are file references + if self.fromfile_prefix_chars is not None: + arg_strings = self._read_args_from_files(arg_strings) + + # map all mutually exclusive arguments to the other arguments + # they can't occur with + action_conflicts = {} + for mutex_group in self._mutually_exclusive_groups: + group_actions = mutex_group._group_actions + for i, mutex_action in enumerate(mutex_group._group_actions): + conflicts = action_conflicts.setdefault(mutex_action, []) + conflicts.extend(group_actions[:i]) + conflicts.extend(group_actions[i + 1:]) + + # find all option indices, and determine the arg_string_pattern + # which has an 'O' if there is an option at an index, + # an 'A' if there is an argument, or a '-' if there is a '--' + option_string_indices = {} + arg_string_pattern_parts = [] + arg_strings_iter = iter(arg_strings) + for i, arg_string in enumerate(arg_strings_iter): + + # all args after -- are non-options + if arg_string == '--': + arg_string_pattern_parts.append('-') + for arg_string in arg_strings_iter: + arg_string_pattern_parts.append('A') + + # otherwise, add the arg to the arg strings + # and note the index if it was an option + else: + option_tuple = self._parse_optional(arg_string) + if option_tuple is None: + pattern = 'A' + else: + option_string_indices[i] = option_tuple + pattern = 'O' + arg_string_pattern_parts.append(pattern) + + # join the pieces together to form the pattern + arg_strings_pattern = ''.join(arg_string_pattern_parts) + + # converts arg strings to the appropriate and then takes the action + seen_actions = set() + seen_non_default_actions = set() + + def take_action(action, argument_strings, option_string=None): + seen_actions.add(action) + argument_values = self._get_values(action, argument_strings) + + # error if this argument is not allowed with other previously + # seen arguments, assuming that actions that use the default + # value don't really count as "present" + if argument_values is not action.default: + seen_non_default_actions.add(action) + for conflict_action in action_conflicts.get(action, []): + if conflict_action in seen_non_default_actions: + msg = _('not allowed with argument %s') + action_name = _get_action_name(conflict_action) + raise ArgumentError(action, msg % action_name) + + # take the action if we didn't receive a SUPPRESS value + # (e.g. from a default) + if argument_values is not SUPPRESS: + action(self, namespace, argument_values, option_string) + + # function to convert arg_strings into an optional action + def consume_optional(start_index): + + # get the optional identified at this index + option_tuple = option_string_indices[start_index] + action, option_string, explicit_arg = option_tuple + + # identify additional optionals in the same arg string + # (e.g. -xyz is the same as -x -y -z if no args are required) + match_argument = self._match_argument + action_tuples = [] + while True: + + # if we found no optional action, skip it + if action is None: + extras.append(arg_strings[start_index]) + return start_index + 1 + + # if there is an explicit argument, try to match the + # optional's string arguments to only this + if explicit_arg is not None: + arg_count = match_argument(action, 'A') + + # if the action is a single-dash option and takes no + # arguments, try to parse more single-dash options out + # of the tail of the option string + chars = self.prefix_chars + if arg_count == 0 and option_string[1] not in chars: + action_tuples.append((action, [], option_string)) + char = option_string[0] + option_string = char + explicit_arg[0] + new_explicit_arg = explicit_arg[1:] or None + optionals_map = self._option_string_actions + if option_string in optionals_map: + action = optionals_map[option_string] + explicit_arg = new_explicit_arg + else: + msg = _('ignored explicit argument %r') + raise ArgumentError(action, msg % explicit_arg) + + # if the action expect exactly one argument, we've + # successfully matched the option; exit the loop + elif arg_count == 1: + stop = start_index + 1 + args = [explicit_arg] + action_tuples.append((action, args, option_string)) + break + + # error if a double-dash option did not use the + # explicit argument + else: + msg = _('ignored explicit argument %r') + raise ArgumentError(action, msg % explicit_arg) + + # if there is no explicit argument, try to match the + # optional's string arguments with the following strings + # if successful, exit the loop + else: + start = start_index + 1 + selected_patterns = arg_strings_pattern[start:] + arg_count = match_argument(action, selected_patterns) + stop = start + arg_count + args = arg_strings[start:stop] + action_tuples.append((action, args, option_string)) + break + + # add the Optional to the list and return the index at which + # the Optional's string args stopped + assert action_tuples + for action, args, option_string in action_tuples: + take_action(action, args, option_string) + return stop + + # the list of Positionals left to be parsed; this is modified + # by consume_positionals() + positionals = self._get_positional_actions() + + # function to convert arg_strings into positional actions + def consume_positionals(start_index): + # match as many Positionals as possible + match_partial = self._match_arguments_partial + selected_pattern = arg_strings_pattern[start_index:] + arg_counts = match_partial(positionals, selected_pattern) + + # slice off the appropriate arg strings for each Positional + # and add the Positional and its args to the list + for action, arg_count in zip(positionals, arg_counts): + args = arg_strings[start_index: start_index + arg_count] + start_index += arg_count + take_action(action, args) + + # slice off the Positionals that we just parsed and return the + # index at which the Positionals' string args stopped + positionals[:] = positionals[len(arg_counts):] + return start_index + + # consume Positionals and Optionals alternately, until we have + # passed the last option string + extras = [] + start_index = 0 + if option_string_indices: + max_option_string_index = max(option_string_indices) + else: + max_option_string_index = -1 + while start_index <= max_option_string_index: + + # consume any Positionals preceding the next option + next_option_string_index = min([ + index + for index in option_string_indices + if index >= start_index]) + if start_index != next_option_string_index: + positionals_end_index = consume_positionals(start_index) + + # only try to parse the next optional if we didn't consume + # the option string during the positionals parsing + if positionals_end_index > start_index: + start_index = positionals_end_index + continue + else: + start_index = positionals_end_index + + # if we consumed all the positionals we could and we're not + # at the index of an option string, there were extra arguments + if start_index not in option_string_indices: + strings = arg_strings[start_index:next_option_string_index] + extras.extend(strings) + start_index = next_option_string_index + + # consume the next optional and any arguments for it + start_index = consume_optional(start_index) + + # consume any positionals following the last Optional + stop_index = consume_positionals(start_index) + + # if we didn't consume all the argument strings, there were extras + extras.extend(arg_strings[stop_index:]) + + # if we didn't use all the Positional objects, there were too few + # arg strings supplied. + if positionals: + self.error(_('too few arguments')) + + # make sure all required actions were present, and convert defaults. + for action in self._actions: + if action not in seen_actions: + if action.required: + name = _get_action_name(action) + self.error(_('argument %s is required') % name) + else: + # Convert action default now instead of doing it before + # parsing arguments to avoid calling convert functions + # twice (which may fail) if the argument was given, but + # only if it was defined already in the namespace + if (action.default is not None and + isinstance(action.default, str) and + hasattr(namespace, action.dest) and + action.default is getattr(namespace, action.dest)): + setattr(namespace, action.dest, + self._get_value(action, action.default)) + + # make sure all required groups had one option present + for group in self._mutually_exclusive_groups: + if group.required: + for action in group._group_actions: + if action in seen_non_default_actions: + break + + # if no actions were used, report the error + else: + names = [_get_action_name(action) + for action in group._group_actions + if action.help is not SUPPRESS] + msg = _('one of the arguments %s is required') + self.error(msg % ' '.join(names)) + + # return the updated namespace and the extra arguments + return namespace, extras + + def _read_args_from_files(self, arg_strings): + # expand arguments referencing files + new_arg_strings = [] + for arg_string in arg_strings: + + # for regular arguments, just add them back into the list + if not arg_string or arg_string[0] not in self.fromfile_prefix_chars: + new_arg_strings.append(arg_string) + + # replace arguments referencing files with the file content + else: + try: + args_file = open(arg_string[1:]) + try: + arg_strings = [] + for arg_line in args_file.read().splitlines(): + for arg in self.convert_arg_line_to_args(arg_line): + arg_strings.append(arg) + arg_strings = self._read_args_from_files(arg_strings) + new_arg_strings.extend(arg_strings) + finally: + args_file.close() + except IOError: + err = _sys.exc_info()[1] + self.error(str(err)) + + # return the modified argument list + return new_arg_strings + + def convert_arg_line_to_args(self, arg_line): + return [arg_line] + + def _match_argument(self, action, arg_strings_pattern): + # match the pattern for this action to the arg strings + nargs_pattern = self._get_nargs_pattern(action) + match = _re.match(nargs_pattern, arg_strings_pattern) + + # raise an exception if we weren't able to find a match + if match is None: + nargs_errors = { + None: _('expected one argument'), + OPTIONAL: _('expected at most one argument'), + ONE_OR_MORE: _('expected at least one argument'), + } + default = ngettext('expected %s argument', + 'expected %s arguments', + action.nargs) % action.nargs + msg = nargs_errors.get(action.nargs, default) + raise ArgumentError(action, msg) + + # return the number of arguments matched + return len(match.group(1)) + + def _match_arguments_partial(self, actions, arg_strings_pattern): + # progressively shorten the actions list by slicing off the + # final actions until we find a match + result = [] + for i in range(len(actions), 0, -1): + actions_slice = actions[:i] + pattern = ''.join([self._get_nargs_pattern(action) + for action in actions_slice]) + match = _re.match(pattern, arg_strings_pattern) + if match is not None: + result.extend([len(string) for string in match.groups()]) + break + + # return the list of arg string counts + return result + + def _parse_optional(self, arg_string): + # if it's an empty string, it was meant to be a positional + if not arg_string: + return None + + # if it doesn't start with a prefix, it was meant to be positional + if not arg_string[0] in self.prefix_chars: + return None + + # if the option string is present in the parser, return the action + if arg_string in self._option_string_actions: + action = self._option_string_actions[arg_string] + return action, arg_string, None + + # if it's just a single character, it was meant to be positional + if len(arg_string) == 1: + return None + + # if the option string before the "=" is present, return the action + if '=' in arg_string: + option_string, explicit_arg = arg_string.split('=', 1) + if option_string in self._option_string_actions: + action = self._option_string_actions[option_string] + return action, option_string, explicit_arg + + # search through all possible prefixes of the option string + # and all actions in the parser for possible interpretations + option_tuples = self._get_option_tuples(arg_string) + + # if multiple actions match, the option string was ambiguous + if len(option_tuples) > 1: + options = ', '.join([option_string + for action, option_string, explicit_arg in option_tuples]) + args = {'option': arg_string, 'matches': options} + msg = _('ambiguous option: %(option)s could match %(matches)s') + self.error(msg % args) + + # if exactly one action matched, this segmentation is good, + # so return the parsed action + elif len(option_tuples) == 1: + option_tuple, = option_tuples + return option_tuple + + # if it was not found as an option, but it looks like a negative + # number, it was meant to be positional + # unless there are negative-number-like options + if self._negative_number_matcher.match(arg_string): + if not self._has_negative_number_optionals: + return None + + # if it contains a space, it was meant to be a positional + if ' ' in arg_string: + return None + + # it was meant to be an optional but there is no such option + # in this parser (though it might be a valid option in a subparser) + return None, arg_string, None + + def _get_option_tuples(self, option_string): + result = [] + + # option strings starting with two prefix characters are only + # split at the '=' + chars = self.prefix_chars + if option_string[0] in chars and option_string[1] in chars: + if '=' in option_string: + option_prefix, explicit_arg = option_string.split('=', 1) + else: + option_prefix = option_string + explicit_arg = None + for option_string in self._option_string_actions: + if option_string.startswith(option_prefix): + action = self._option_string_actions[option_string] + tup = action, option_string, explicit_arg + result.append(tup) + + # single character options can be concatenated with their arguments + # but multiple character options always have to have their argument + # separate + elif option_string[0] in chars and option_string[1] not in chars: + option_prefix = option_string + explicit_arg = None + short_option_prefix = option_string[:2] + short_explicit_arg = option_string[2:] + + for option_string in self._option_string_actions: + if option_string == short_option_prefix: + action = self._option_string_actions[option_string] + tup = action, option_string, short_explicit_arg + result.append(tup) + elif option_string.startswith(option_prefix): + action = self._option_string_actions[option_string] + tup = action, option_string, explicit_arg + result.append(tup) + + # shouldn't ever get here + else: + self.error(_('unexpected option string: %s') % option_string) + + # return the collected option tuples + return result + + def _get_nargs_pattern(self, action): + # in all examples below, we have to allow for '--' args + # which are represented as '-' in the pattern + nargs = action.nargs + + # the default (None) is assumed to be a single argument + if nargs is None: + nargs_pattern = '(-*A-*)' + + # allow zero or one arguments + elif nargs == OPTIONAL: + nargs_pattern = '(-*A?-*)' + + # allow zero or more arguments + elif nargs == ZERO_OR_MORE: + nargs_pattern = '(-*[A-]*)' + + # allow one or more arguments + elif nargs == ONE_OR_MORE: + nargs_pattern = '(-*A[A-]*)' + + # allow any number of options or arguments + elif nargs == REMAINDER: + nargs_pattern = '([-AO]*)' + + # allow one argument followed by any number of options or arguments + elif nargs == PARSER: + nargs_pattern = '(-*A[-AO]*)' + + # all others should be integers + else: + nargs_pattern = '(-*%s-*)' % '-*'.join('A' * nargs) + + # if this is an optional action, -- is not allowed + if action.option_strings: + nargs_pattern = nargs_pattern.replace('-*', '') + nargs_pattern = nargs_pattern.replace('-', '') + + # return the pattern + return nargs_pattern + + # ======================== + # Value conversion methods + # ======================== + def _get_values(self, action, arg_strings): + # for everything but PARSER, REMAINDER args, strip out first '--' + if action.nargs not in [PARSER, REMAINDER]: + try: + arg_strings.remove('--') + except ValueError: + pass + + # optional argument produces a default when not present + if not arg_strings and action.nargs == OPTIONAL: + if action.option_strings: + value = action.const + else: + value = action.default + if isinstance(value, str): + value = self._get_value(action, value) + self._check_value(action, value) + + # when nargs='*' on a positional, if there were no command-line + # args, use the default if it is anything other than None + elif (not arg_strings and action.nargs == ZERO_OR_MORE and + not action.option_strings): + if action.default is not None: + value = action.default + else: + value = arg_strings + self._check_value(action, value) + + # single argument or optional argument produces a single value + elif len(arg_strings) == 1 and action.nargs in [None, OPTIONAL]: + arg_string, = arg_strings + value = self._get_value(action, arg_string) + self._check_value(action, value) + + # REMAINDER arguments convert all values, checking none + elif action.nargs == REMAINDER: + value = [self._get_value(action, v) for v in arg_strings] + + # PARSER arguments convert all values, but check only the first + elif action.nargs == PARSER: + value = [self._get_value(action, v) for v in arg_strings] + self._check_value(action, value[0]) + + # all other types of nargs produce a list + else: + value = [self._get_value(action, v) for v in arg_strings] + for v in value: + self._check_value(action, v) + + # return the converted value + return value + + def _get_value(self, action, arg_string): + type_func = self._registry_get('type', action.type, action.type) + if not callable(type_func): + msg = _('%r is not callable') + raise ArgumentError(action, msg % type_func) + + # convert the value to the appropriate type + try: + result = type_func(arg_string) + + # ArgumentTypeErrors indicate errors + except ArgumentTypeError: + name = getattr(action.type, '__name__', repr(action.type)) + msg = str(_sys.exc_info()[1]) + raise ArgumentError(action, msg) + + # TypeErrors or ValueErrors also indicate errors + except (TypeError, ValueError): + name = getattr(action.type, '__name__', repr(action.type)) + args = {'type': name, 'value': arg_string} + msg = _('invalid %(type)s value: %(value)r') + raise ArgumentError(action, msg % args) + + # return the converted value + return result + + def _check_value(self, action, value): + # converted value must be one of the choices (if specified) + if action.choices is not None and value not in action.choices: + args = {'value': value, + 'choices': ', '.join(map(repr, action.choices))} + msg = _('invalid choice: %(value)r (choose from %(choices)s)') + raise ArgumentError(action, msg % args) + + # ======================= + # Help-formatting methods + # ======================= + def format_usage(self): + formatter = self._get_formatter() + formatter.add_usage(self.usage, self._actions, + self._mutually_exclusive_groups) + return formatter.format_help() + + def format_help(self): + formatter = self._get_formatter() + + # usage + formatter.add_usage(self.usage, self._actions, + self._mutually_exclusive_groups) + + # description + formatter.add_text(self.description) + + # positionals, optionals and user-defined groups + for action_group in self._action_groups: + formatter.start_section(action_group.title) + formatter.add_text(action_group.description) + formatter.add_arguments(action_group._group_actions) + formatter.end_section() + + # epilog + formatter.add_text(self.epilog) + + # determine help from format above + return formatter.format_help() + + def format_version(self): + import warnings + warnings.warn( + 'The format_version method is deprecated -- the "version" ' + 'argument to ArgumentParser is no longer supported.', + DeprecationWarning) + formatter = self._get_formatter() + formatter.add_text(self.version) + return formatter.format_help() + + def _get_formatter(self): + return self.formatter_class(prog=self.prog) + + # ===================== + # Help-printing methods + # ===================== + def print_usage(self, file=None): + if file is None: + file = _sys.stdout + self._print_message(self.format_usage(), file) + + def print_help(self, file=None): + if file is None: + file = _sys.stdout + self._print_message(self.format_help(), file) + + def print_version(self, file=None): + import warnings + warnings.warn( + 'The print_version method is deprecated -- the "version" ' + 'argument to ArgumentParser is no longer supported.', + DeprecationWarning) + self._print_message(self.format_version(), file) + + def _print_message(self, message, file=None): + if message: + if file is None: + file = _sys.stderr + file.write(message) + + # =============== + # Exiting methods + # =============== + def exit(self, status=0, message=None): + if message: + self._print_message(message, _sys.stderr) + _sys.exit(status) + + def error(self, message): + """error(message: string) + + Prints a usage message incorporating the message to stderr and + exits. + + If you override this in a subclass, it should not return -- it + should either exit or raise an exception. + """ + self.print_usage(_sys.stderr) + args = {'prog': self.prog, 'message': message} + self.exit(2, _('%(prog)s: error: %(message)s\n') % args) diff --git a/setup.py b/setup.py index f59c734d9..c68b9651b 100644 --- a/setup.py +++ b/setup.py @@ -14,12 +14,6 @@ if my_python < min_python: # Also, we might use some rather recent API features. install_requires=['msgpack-python>=0.4.6', ] -if (my_python < (3, 2, 4) or - (3, 3, 0) <= my_python < (3, 3, 1)): - # argparse in stdlib does not work there due to a bug, - # pull a fixed argparse from pypi - install_requires.append("argparse>=1.4.0") - from setuptools import setup, Extension from setuptools.command.sdist import sdist @@ -161,7 +155,7 @@ setup( 'Topic :: Security :: Cryptography', 'Topic :: System :: Archiving :: Backup', ], - packages=['borg', 'borg.testsuite'], + packages=['borg', 'borg.testsuite', 'borg.support', ], entry_points={ 'console_scripts': [ 'borg = borg.archiver:main', From bc2cfdfc595508d0bdd2156f066124a282f89291 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 13 Sep 2015 01:01:48 +0200 Subject: [PATCH 018/151] fix the other argparse import also --- borg/helpers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/borg/helpers.py b/borg/helpers.py index 0da9918f8..45d200a48 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -1,4 +1,6 @@ -import argparse +from .support import argparse # see support/__init__.py docstring + # DEPRECATED - remove after requiring py 3.4 + import binascii from collections import namedtuple from functools import wraps From c57841940122e564383c82077a112bb4ff1b69b1 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 13 Sep 2015 01:21:45 +0200 Subject: [PATCH 019/151] omit support files from coverage metrics --- .coveragerc | 1 + 1 file changed, 1 insertion(+) diff --git a/.coveragerc b/.coveragerc index 620f29fef..7c4ccf9e1 100644 --- a/.coveragerc +++ b/.coveragerc @@ -5,6 +5,7 @@ omit = borg/__init__.py borg/__main__.py borg/_version.py + borg/support/*.py [report] exclude_lines = From 5eb04969f877fa9e6b0b99438133e7f20d30d794 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 13 Sep 2015 18:03:43 +0200 Subject: [PATCH 020/151] setup.py: add the place where we link the lz4 header/lib on the darwin vagrant VM --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c68b9651b..2e5e04b50 100644 --- a/setup.py +++ b/setup.py @@ -99,7 +99,7 @@ include_dirs.append(os.path.join(ssl_prefix, 'include')) library_dirs.append(os.path.join(ssl_prefix, 'lib')) -possible_lz4_prefixes = ['/usr', '/usr/local', '/usr/local/borg', '/opt/local'] +possible_lz4_prefixes = ['/usr', '/usr/local', '/usr/local/opt/lz4', '/usr/local/lz4', '/usr/local/borg', '/opt/local'] if os.environ.get('BORG_LZ4_PREFIX'): possible_openssl_prefixes.insert(0, os.environ.get('BORG_LZ4_PREFIX')) lz4_prefix = detect_lz4(possible_lz4_prefixes) From 98c464f06b4497b1e96a6c94bdbd7b2590114af4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 13 Sep 2015 18:05:03 +0200 Subject: [PATCH 021/151] vagrant: refine darwin, use osxfuse 3.0.x --- Vagrantfile | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index 36819b503..03262eb3f 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -49,11 +49,26 @@ end def packages_darwin return <<-EOF ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" - brew update || brew update - brew outdated openssl || brew upgrade openssl - brew outdated pyenv || brew upgrade pyenv + brew update + # this installs osxfuse 2.8.0 (which is based on libfuse 2.7.3). + # llfuse later complains about needing (libfuse) 2.8.0 at least. + #brew install caskroom/cask/brew-cask + #brew cask install osxfuse # needs cask install because of apple's unsigned kext ban + # get osxfuse 3.0.x pre-release code from github: + curl https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.0.5/osxfuse-3.0.5.dmg -L >osxfuse.dmg + MOUNTDIR=$(echo `hdiutil mount osxfuse.dmg | tail -1 | awk '{$1="" ; print $0}'` | xargs -0 echo) \ + && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.0.5.pkg" -target / + brew install openssl brew install lz4 - brew install osxfuse + # looks dirty, is there a better way without root?: + mkdir -p /usr/local/opt/lz4 + ln -s /usr/local/Cellar/lz4/r*/include /usr/local/opt/lz4/ + ln -s /usr/local/Cellar/lz4/r*/lib /usr/local/opt/lz4/ + brew install fakeroot + brew install pyenv + if which pyenv > /dev/null; then + eval "$(pyenv init -)" + fi pyenv install 3.4.3 pyenv global 3.4.3 pyenv rehash @@ -66,8 +81,13 @@ def prepare_user(boxname) echo export 'PATH=/usr/local/bin:$PATH' >> ~/.profile . ~/.profile + # initialize python on darwin + if which pyenv > /dev/null; then + eval "$(pyenv init -)" + fi + cd /vagrant/borg - virtualenv --python=python3 borg-env + python -m virtualenv --python=python3 borg-env . borg-env/bin/activate cd borg @@ -99,7 +119,7 @@ Vagrant.configure(2) do |config| config.vm.provision "fix perms", :type => :shell, :inline => fix_perms config.vm.provider :virtualbox do |v| - v.gui = false + #v.gui = true v.cpus = 2 end @@ -136,7 +156,7 @@ Vagrant.configure(2) do |config| b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("freebsd") end - # OS X - TODO: make rsync/ssh work + # OS X config.vm.define "darwin" do |b| b.vm.box = "jhcook/yosemite-clitools" b.vm.provision "packages darwin", :type => :shell, :privileged => false, :inline => packages_darwin From 7bbe17fc773d200c11f12339679fb6e8d35c7a41 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 13 Sep 2015 23:39:34 +0200 Subject: [PATCH 022/151] vagrant: add centos 7 / 64 vm --- Vagrantfile | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/Vagrantfile b/Vagrantfile index 03262eb3f..a574a28d8 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -15,6 +15,22 @@ def packages_prepare_precise EOF end +def packages_centos + return <<-EOF + yum install -y epel-release + yum update -y + yum install -y python34 python34-devel + ln -s /usr/bin/python3.4 /usr/bin/python3 + yum install -y openssl-devel openssl + yum install -y libacl-devel libacl + yum install -y lz4-devel + yum install -y fuse-devel fuse pkgconfig + yum install -y fakeroot gcc git + yum install -y python-pip + pip install virtualenv + EOF +end + def packages_debianoid return <<-EOF apt-get update @@ -123,6 +139,12 @@ Vagrant.configure(2) do |config| v.cpus = 2 end + config.vm.define "centos7" do |b| + b.vm.box = "centos/7" + b.vm.provision "packages centos7 64", :type => :shell, :inline => packages_centos + b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("centos7_64") + end + config.vm.define "trusty64" do |b| b.vm.box = "ubuntu/trusty64" b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid From 13ded3d5e7f2a9fc55938cde74db28608d0b9711 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 14 Sep 2015 01:26:20 +0200 Subject: [PATCH 023/151] xattr tests: ignore security.selinux attribute showing up --- borg/testsuite/xattr.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/borg/testsuite/xattr.py b/borg/testsuite/xattr.py index d73856953..df0130c90 100644 --- a/borg/testsuite/xattr.py +++ b/borg/testsuite/xattr.py @@ -17,17 +17,23 @@ class XattrTestCase(BaseTestCase): def tearDown(self): os.unlink(self.symlink) + def assert_equal_se(self, is_x, want_x): + # check 2 xattr lists for equality, but ignore security.selinux attr + is_x = set(is_x) - {'security.selinux'} + want_x = set(want_x) + self.assert_equal(is_x, want_x) + def test(self): - self.assert_equal(listxattr(self.tmpfile.name), []) - self.assert_equal(listxattr(self.tmpfile.fileno()), []) - self.assert_equal(listxattr(self.symlink), []) + self.assert_equal_se(listxattr(self.tmpfile.name), []) + self.assert_equal_se(listxattr(self.tmpfile.fileno()), []) + self.assert_equal_se(listxattr(self.symlink), []) setxattr(self.tmpfile.name, 'user.foo', b'bar') setxattr(self.tmpfile.fileno(), 'user.bar', b'foo') setxattr(self.tmpfile.name, 'user.empty', None) - self.assert_equal(set(listxattr(self.tmpfile.name)), set(['user.foo', 'user.bar', 'user.empty'])) - self.assert_equal(set(listxattr(self.tmpfile.fileno())), set(['user.foo', 'user.bar', 'user.empty'])) - self.assert_equal(set(listxattr(self.symlink)), set(['user.foo', 'user.bar', 'user.empty'])) - self.assert_equal(listxattr(self.symlink, follow_symlinks=False), []) + self.assert_equal_se(listxattr(self.tmpfile.name), ['user.foo', 'user.bar', 'user.empty']) + self.assert_equal_se(listxattr(self.tmpfile.fileno()), ['user.foo', 'user.bar', 'user.empty']) + self.assert_equal_se(listxattr(self.symlink), ['user.foo', 'user.bar', 'user.empty']) + self.assert_equal_se(listxattr(self.symlink, follow_symlinks=False), []) self.assert_equal(getxattr(self.tmpfile.name, 'user.foo'), b'bar') self.assert_equal(getxattr(self.tmpfile.fileno(), 'user.foo'), b'bar') self.assert_equal(getxattr(self.symlink, 'user.foo'), b'bar') From a9ee79309d012e856a62f7684653cee7218f3415 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 14 Sep 2015 02:44:17 +0200 Subject: [PATCH 024/151] update CHANGES --- CHANGES.rst | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index eb7b93667..243b9703c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -7,8 +7,11 @@ Version 0.26.0 (not released yet) New features: +- Faster cache sync (do all in one pass, remove tar/compression stuff), #163 - BORG_REPO env var to specify the default repo, #168 - read special files as if they were regular files, #79 +- implement borg create --dry-run, attic issue #267 +- Normalize paths before pattern matching on OS X, #143 Bug fixes: @@ -16,11 +19,30 @@ Bug fixes: - chunker: use off_t to get 64bit on 32bit platform, #178 - initialize chunker fd to -1, so it's not equal to STDIN_FILENO (0) - fix reaction to "no" answer at delete repo prompt, #182 +- setup.py: detect lz4.h header file location +- to support python < 3.2.4, add less buggy argparse lib from 3.2.6 (#194) +- fix for obtaining 'char *' from temporary Python value (old code causes + a compile error on Mint 17.2) +- llfuse 0.41 install troubles on some platforms, require < 0.41 + (UnicodeDecodeError exception due to non-ascii llfuse setup.py) +- cython code: add some int types to get rid of unspecific python add / + subtract operations (avoid undefined symbol FPE_... error on some platforms) +- fix verbose mode display of stdin backup Other changes: - detect inconsistency / corruption / hash collision, #170 - replace versioneer with setuptools_scm, #106 +- docs: pkg-config is needed for llfuse installation +- xattr tests: ignore security.selinux attribute showing up +- fix tests on ext3, seems to need a bit more space for a sparse file +- do not test lzma level 9 compression (avoid MemoryError) +- use vagrant to do easy cross-platform testing (#196), currently: + debian 7 wheezy 32bit, debian 8 jessie 64bit + ubuntu 12.04 32bit, ubuntu 14.04 64bit + centos 7 64bit + freebsd 10.2 + darwin (OS X Yosemite) Version 0.25.0 From 568963eca78485586edd10f05bda466d0211daf1 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 14 Sep 2015 17:35:04 +0200 Subject: [PATCH 025/151] vagrant: added openbsd --- Vagrantfile | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/Vagrantfile b/Vagrantfile index a574a28d8..0599bb6de 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -62,6 +62,28 @@ def packages_freebsd EOF end +def packages_openbsd + return <<-EOF + . ~/.profile + mkdir -p /home/vagrant/borg + rsync -aH /vagrant/borg/ /home/vagrant/borg/ + rm -rf /vagrant/borg + ln -sf /home/vagrant/borg /vagrant/ + pkg_add bash + chsh -s /usr/local/bin/bash vagrant + pkg_add python-3.4.2 + pkg_add py3-setuptools + ln -sf /usr/local/bin/python3.4 /usr/local/bin/python3 + ln -sf /usr/local/bin/python3.4 /usr/local/bin/python + pkg_add openssl + pkg_add lz4 + # pkg_add fuse # does not install, sdl dependency missing + pkg_add git # no fakeroot + easy_install-3.4 pip + pip3 install virtualenv + EOF +end + def packages_darwin return <<-EOF ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" @@ -178,6 +200,12 @@ Vagrant.configure(2) do |config| b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("freebsd") end + config.vm.define "openbsd" do |b| + b.vm.box = "bodgit/openbsd-5.7-amd64" + b.vm.provision "packages openbsd", :type => :shell, :inline => packages_openbsd + b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("openbsd") + end + # OS X config.vm.define "darwin" do |b| b.vm.box = "jhcook/yosemite-clitools" From bc5949a7f4075f6af75be91300d4e32685e572e9 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 14 Sep 2015 17:36:04 +0200 Subject: [PATCH 026/151] chunker: add a check whether the POSIX_FADV_DONTNEED constant is defined on openbsd, it isn't. --- borg/_chunker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index b817775b0..5f761c51e 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -156,7 +156,7 @@ chunker_fill(Chunker *c) return 0; } length = c->bytes_read - offset; - #if ( _XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L ) + #if ( ( _XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L ) && defined(POSIX_FADV_DONTNEED) ) // We tell the OS that we do not need the data that we just have read any // more (that it maybe has in the cache). This avoids that we spoil the // complete cache with data that we only read once and (due to cache From cf9ba87734f51654df7cf26610dd368b509c4493 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 15 Sep 2015 00:41:32 +0200 Subject: [PATCH 027/151] test setup: do not set the sticky bit on a regular file sticky bit only has a function on directories. openbsd does not let one set sticky on files. other systems seem to just ignore it. --- borg/testsuite/archiver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index d001b5ca3..02a6bc756 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -162,7 +162,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): # Directory self.create_regular_file('dir2/file2', size=1024 * 80) # File mode - os.chmod('input/file1', 0o7755) + os.chmod('input/file1', 0o6755) # Hard link os.link(os.path.join(self.input_path, 'file1'), os.path.join(self.input_path, 'hardlink')) From dc0938f6397bbb6e044e4a8b4b8e3fea3a9f6d99 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 15 Sep 2015 14:46:26 +0200 Subject: [PATCH 028/151] vagrant: add netbsd (needs more work) there's a problem with the shell: - either it is no login shell, then e.g. PKG_PATH is not set (sh) - or it is not found (no bash here) - or it is not compatible enough (csh) depending on what shell is configured, one or the other vagrant mode / feature does not work. --- Vagrantfile | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/Vagrantfile b/Vagrantfile index 0599bb6de..0b3c4b0cc 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -84,6 +84,27 @@ def packages_openbsd EOF end +def packages_netbsd + # ftp://ftp.netbsd.org/pub/pkgsrc/packages/NetBSD/amd64/6.1.5/All/ + return <<-EOF + #ftp ftp://ftp.NetBSD.org/pub/pkgsrc/current/pkgsrc.tar.gz + #tar xzf pkgsrc.tar.gz + #cd pkgsrc/bootstrap + #./bootstrap + #PATH="/usr/pkg/sbin:$PATH" + PKG_PATH="ftp://ftp.NetBSD.org/pub/pkgsrc/packages/NetBSD/amd64/6.1.5/All/" + export PKG_PATH + pkg_add python34 py34-setuptools + ln -s /usr/pkg/bin/python3.4 /usr/pkg/bin/python + ln -s /usr/pkg/bin/python3.4 /usr/pkg/bin/python3 + pkg_add mozilla-rootcerts lz4 git + mozilla-rootcerts install + #pkg_add pkg-config fuse-2.9.3 # llfuse does not support netbsd + easy_install-3.4 pip + pip install virtualenv + EOF +end + def packages_darwin return <<-EOF ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" @@ -206,6 +227,15 @@ Vagrant.configure(2) do |config| b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("openbsd") end + config.vm.define "netbsd" do |b| + #b.vm.box = "Kralian/netbsd_6.1.5_amd64" + b.vm.box = "alex-skimlinks/netbsd-6.1.5-amd64" + b.ssh.shell = "ksh -l" + #b.ssh.shell = "sh" + b.vm.provision "packages netbsd", :type => :shell, :inline => packages_netbsd + b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("netbsd") + end + # OS X config.vm.define "darwin" do |b| b.vm.box = "jhcook/yosemite-clitools" From 56bf8b79cb8df28537250e45025c42f518a80410 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 15 Sep 2015 18:27:16 +0200 Subject: [PATCH 029/151] add .vagrant to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5debd74ed..4f7c67672 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ borg.build/ borg.dist/ borg.exe .coverage +.vagrant From e8571c1c85db79cb24a4e5252697b643d1991778 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 15 Sep 2015 23:45:12 +0200 Subject: [PATCH 030/151] vagrant: more docs, some fixes --- Vagrantfile | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index 0b3c4b0cc..63e74396d 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -1,6 +1,24 @@ # -*- mode: ruby -*- # vi: set ft=ruby : +# Automated creation of testing environments on misc. platforms +# Usage: +# vagrant up OS +# vagrant ssh OS command +# vagrant halt OS +# +# packages_OS goals: +# - have all dependencies installed +# - have a working "virtualenv" command +# - have a working "python3" command +# +# packages_prepare_OS goals: (for some older OS) +# - adds additional package sources, so packages_OS can find all it needs. +# +# prepare_user goals: +# - have a working "borg-env" virtual env installed, with code from "borg". +# both directories are in /vagrant/borg/. + def packages_prepare_wheezy return <<-EOF # debian 7 wheezy does not have lz4, but it is available from wheezy-backports: @@ -92,12 +110,18 @@ def packages_netbsd #cd pkgsrc/bootstrap #./bootstrap #PATH="/usr/pkg/sbin:$PATH" + hostname netbsd # the box we use has an invalid hostname PKG_PATH="ftp://ftp.NetBSD.org/pub/pkgsrc/packages/NetBSD/amd64/6.1.5/All/" export PKG_PATH pkg_add python34 py34-setuptools ln -s /usr/pkg/bin/python3.4 /usr/pkg/bin/python ln -s /usr/pkg/bin/python3.4 /usr/pkg/bin/python3 pkg_add mozilla-rootcerts lz4 git + mkdir -p /usr/local/opt/lz4/include + mkdir -p /usr/local/opt/lz4/lib + ln -s /usr/pkg/include/lz4*.h /usr/local/opt/lz4/include/ + ln -s /usr/pkg/lib/liblz4* /usr/local/opt/lz4/lib/ + touch /etc/openssl/openssl.cnf # avoids a flood of "can't open ..." mozilla-rootcerts install #pkg_add pkg-config fuse-2.9.3 # llfuse does not support netbsd easy_install-3.4 pip @@ -131,7 +155,7 @@ def packages_darwin pyenv install 3.4.3 pyenv global 3.4.3 pyenv rehash - python -m pip install --user virtualenv + python -m pip install virtualenv EOF end @@ -146,11 +170,12 @@ def prepare_user(boxname) fi cd /vagrant/borg - python -m virtualenv --python=python3 borg-env + #python -m virtualenv --python=python3 borg-env + virtualenv --python=python3 borg-env . borg-env/bin/activate cd borg - pip install -U pip setuptools + # pip install -U pip setuptools # we fetch a current virtualenv, so these are fresh also pip install 'llfuse<0.41' # 0.41 does not install due to UnicodeDecodeError pip install -r requirements.d/development.txt pip install -e . From 41860ef5f04d704be0ef9c3d0214875e75441e26 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 15 Sep 2015 23:52:17 +0200 Subject: [PATCH 031/151] test setup: stay away from the setgid mode bit for vagrant testing on misc. platforms, we can't know the group / we can't have the same group everywhere. but the OS won't let us set setgid bit if the file does not have our group. on netbsd, the created file somehow happens to have group "wheel", but vagrant is not in group wheel. --- borg/testsuite/archiver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 02a6bc756..2ae565bb8 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -162,7 +162,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): # Directory self.create_regular_file('dir2/file2', size=1024 * 80) # File mode - os.chmod('input/file1', 0o6755) + os.chmod('input/file1', 0o4755) # Hard link os.link(os.path.join(self.input_path, 'file1'), os.path.join(self.input_path, 'hardlink')) From f138d06dd5c014b759cc745873d79be0595f5446 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 16 Sep 2015 11:44:53 +0200 Subject: [PATCH 032/151] more platform support, works on py 3.5 xattrs and ACLs on OpenBSD and NetBSD need development or adaptation of the platform-specific code. --- README.rst | 2 +- docs/faq.rst | 2 +- setup.py | 5 ++++- tox.ini | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 8180fd2ab..177ad27f1 100644 --- a/README.rst +++ b/README.rst @@ -65,7 +65,7 @@ Main features **Platforms Borg works on** * Linux - * FreeBSD + * FreeBSD, OpenBSD, NetBSD * Mac OS X * Cygwin (unsupported) diff --git a/docs/faq.rst b/docs/faq.rst index d13fe67f1..a51ddc2c1 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -41,7 +41,7 @@ Which file types, attributes, etc. are preserved? * User ID of owner * Group ID of owner * Unix Mode/Permissions (u/g/o permissions, suid, sgid, sticky) - * Extended Attributes (xattrs) + * Extended Attributes (xattrs) on Linux, OS X and FreeBSD * Access Control Lists (ACL_) on Linux, OS X and FreeBSD * BSD flags on OS X and FreeBSD diff --git a/setup.py b/setup.py index 2e5e04b50..e3f66ff9a 100644 --- a/setup.py +++ b/setup.py @@ -138,13 +138,15 @@ setup( description='Deduplicated, encrypted, authenticated and compressed backups', long_description=long_description, license='BSD', - platforms=['Linux', 'MacOS X', 'FreeBSD', ], + platforms=['Linux', 'MacOS X', 'FreeBSD', 'OpenBSD', 'NetBSD', ], classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: Console', 'Intended Audience :: System Administrators', 'License :: OSI Approved :: BSD License', 'Operating System :: POSIX :: BSD :: FreeBSD', + 'Operating System :: POSIX :: BSD :: OpenBSD', + 'Operating System :: POSIX :: BSD :: NetBSD', 'Operating System :: MacOS :: MacOS X', 'Operating System :: POSIX :: Linux', 'Programming Language :: Python', @@ -152,6 +154,7 @@ setup( 'Programming Language :: Python :: 3.2', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', 'Topic :: Security :: Cryptography', 'Topic :: System :: Archiving :: Backup', ], diff --git a/tox.ini b/tox.ini index a120a237a..d177c121a 100644 --- a/tox.ini +++ b/tox.ini @@ -2,7 +2,7 @@ # fakeroot -u tox --recreate [tox] -envlist = py32, py33, py34 +envlist = py32, py33, py34, py35 [testenv] # Change dir to avoid import problem for cython code. The directory does From ff64b4c424c80406e8f8d61b24dd1a25c3c001ef Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 16 Sep 2015 11:53:53 +0200 Subject: [PATCH 033/151] be more precise about OpenBSD/NetBSD support xattrs/ACLs need platform specific code and new have none (yet) for them. --- README.rst | 3 ++- docs/faq.rst | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 177ad27f1..b8fb652b4 100644 --- a/README.rst +++ b/README.rst @@ -65,8 +65,9 @@ Main features **Platforms Borg works on** * Linux - * FreeBSD, OpenBSD, NetBSD * Mac OS X + * FreeBSD + * OpenBSD and NetBSD (for both: no xattrs/ACLs support yet) * Cygwin (unsupported) **Free and Open Source Software** diff --git a/docs/faq.rst b/docs/faq.rst index a51ddc2c1..bb569e4f1 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -6,6 +6,7 @@ Frequently asked questions Which platforms are supported? Currently Linux, FreeBSD and MacOS X are supported. + OpenBSD and NetBSD work also, except for xattrs and ACLs. You can try your luck on other POSIX-like systems, like Cygwin, other BSDs, etc. but they are not officially supported. From 48634d4e961d523e88019515f18466c2feec2577 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 17 Sep 2015 22:41:49 +0200 Subject: [PATCH 034/151] tests: ignore st_rdev if file is not a block/char device, fixes #203 --- borg/testsuite/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/borg/testsuite/__init__.py b/borg/testsuite/__init__.py index 9872edeb6..0ef950a97 100644 --- a/borg/testsuite/__init__.py +++ b/borg/testsuite/__init__.py @@ -2,6 +2,7 @@ from contextlib import contextmanager import filecmp import os import posix +import stat import sys import sysconfig import time @@ -72,6 +73,11 @@ class BaseTestCase(unittest.TestCase): attrs.append('st_nlink') d1 = [filename] + [getattr(s1, a) for a in attrs] d2 = [filename] + [getattr(s2, a) for a in attrs] + # ignore st_rdev if file is not a block/char device, fixes #203 + if not stat.S_ISCHR(d1[1]) and not stat.S_ISBLK(d1[1]): + d1[4] = None + if not stat.S_ISCHR(d2[1]) and not stat.S_ISBLK(d2[1]): + d2[4] = None if not os.path.islink(path1) or utime_supports_fd: # Older versions of llfuse do not support ns precision properly if fuse and not have_fuse_mtime_ns: From 375717c0954dd99ce4f8484938f56cb62c540951 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 18 Sep 2015 00:02:44 +0200 Subject: [PATCH 035/151] tests: work around strange mtime granularity issue on netbsd, fixes #204 not sure where the problem is: it seems to announce it supports st_mtime_ns, but if one uses it and has a file with ...123ns, i t gets restored as ...000ns. Then I tried setting st_mtime_ns_round to -3, but it still failed with +1000ns difference. Maybe rounding is incorrect and it should be truncating? Issue with granularity could be in python, in netbsd (netbsd platform code), in ffs filesystem, ... --- borg/testsuite/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/borg/testsuite/__init__.py b/borg/testsuite/__init__.py index 0ef950a97..cd790b571 100644 --- a/borg/testsuite/__init__.py +++ b/borg/testsuite/__init__.py @@ -28,6 +28,8 @@ elif 'HAVE_UTIMES' in sysconfig.get_config_vars(): else: st_mtime_ns_round = -9 +if sys.platform.startswith('netbsd'): + st_mtime_ns_round = -4 # only >1 microsecond resolution here? has_mtime_ns = sys.version >= '3.3' utime_supports_fd = os.utime in getattr(os, 'supports_fd', {}) From cad0515178f999467d7d9169cf47eb845ebd59f2 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 19 Sep 2015 16:09:20 +0200 Subject: [PATCH 036/151] archive names with slashes are invalid, attic issue #180 for borg mount's FUSE filesystem, we use the archive name as a directory name, thus slashes are not allowed. --- borg/archiver.py | 2 +- borg/helpers.py | 10 ++++++---- borg/testsuite/helpers.py | 10 ++++++++++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 465fcc85d..a6b5acdd4 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -718,7 +718,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") help='do not create a backup archive') subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), - help='archive to create') + help='name of archive to create (must be also a valid directory name)') subparser.add_argument('paths', metavar='PATH', nargs='+', type=str, help='paths to archive') diff --git a/borg/helpers.py b/borg/helpers.py index 45d200a48..9dab70aa8 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -490,18 +490,20 @@ class Location: """Object representing a repository / archive location """ proto = user = host = port = path = archive = None + # borg mount's FUSE filesystem creates one level of directories from + # the archive names. Thus, we must not accept "/" in archive names. ssh_re = re.compile(r'(?Pssh)://(?:(?P[^@]+)@)?' r'(?P[^:/#]+)(?::(?P\d+))?' - r'(?P[^:]+)(?:::(?P.+))?$') + r'(?P[^:]+)(?:::(?P[^/]+))?$') file_re = re.compile(r'(?Pfile)://' - r'(?P[^:]+)(?:::(?P.+))?$') + r'(?P[^:]+)(?:::(?P[^/]+))?$') scp_re = re.compile(r'((?:(?P[^@]+)@)?(?P[^:/]+):)?' - r'(?P[^:]+)(?:::(?P.+))?$') + r'(?P[^:]+)(?:::(?P[^/]+))?$') # get the repo from BORG_RE env and the optional archive from param. # if the syntax requires giving REPOSITORY (see "borg mount"), # use "::" to let it use the env var. # if REPOSITORY argument is optional, it'll automatically use the env. - env_re = re.compile(r'(?:::(?P.+)?)?$') + env_re = re.compile(r'(?:::(?P[^/]+)?)?$') def __init__(self, text=''): self.orig = text diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index f755df22a..25ec48c90 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -81,6 +81,11 @@ class TestLocationWithoutEnv: with pytest.raises(ValueError): Location('ssh://localhost:22/path:archive') + def test_no_slashes(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + with pytest.raises(ValueError): + Location('/some/path/to/repo::archive_name_with/slashes/is_invalid') + def test_canonical_path(self, monkeypatch): monkeypatch.delenv('BORG_REPO', raising=False) locations = ['some/path::archive', 'file://some/path::archive', 'host:some/path::archive', @@ -134,6 +139,11 @@ class TestLocationWithEnv: assert repr(Location()) == \ "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive=None)" + def test_no_slashes(self, monkeypatch): + monkeypatch.setenv('BORG_REPO', '/some/absolute/path') + with pytest.raises(ValueError): + Location('::archive_name_with/slashes/is_invalid') + class FormatTimedeltaTestCase(BaseTestCase): From aed6cc944652dd8745a43bb1aa03f029ca6b39a3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 19 Sep 2015 16:58:02 +0200 Subject: [PATCH 037/151] be more clear about pruning, attic issue #132 --- borg/archiver.py | 1 + 1 file changed, 1 insertion(+) diff --git a/borg/archiver.py b/borg/archiver.py index a6b5acdd4..e393fad0d 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -862,6 +862,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") If a prefix is set with -p, then only archives that start with the prefix are considered for deletion and only those archives count towards the totals specified by the rules. + Otherwise, *all* archives in the repository are candidates for deletion! """) subparser = subparsers.add_parser('prune', parents=[common_parser], description=self.do_prune.__doc__, From 08417b52ec0b6848ecd5617424aedf4c40acf28f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 19 Sep 2015 17:48:41 +0200 Subject: [PATCH 038/151] implement counters for Include/ExcludePatterns --- borg/helpers.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index 9dab70aa8..820efa9ed 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -242,6 +242,7 @@ def normalized(func): # always have to enter an exact match return func + class IncludePattern: """Literal files or directories listed on the command line for some operations (e.g. extract, but not create). @@ -249,6 +250,8 @@ class IncludePattern: path match as well. A trailing slash makes no difference. """ def __init__(self, pattern): + self.match_count = 0 + if sys.platform in ('darwin',): pattern = unicodedata.normalize("NFD", pattern) @@ -256,7 +259,10 @@ class IncludePattern: @normalized def match(self, path): - return (path+os.path.sep).startswith(self.pattern) + matches = (path+os.path.sep).startswith(self.pattern) + if matches: + self.match_count += 1 + return matches def __repr__(self): return '%s(%s)' % (type(self), self.pattern) @@ -267,6 +273,8 @@ class ExcludePattern(IncludePattern): exclude the contents of a directory, but not the directory itself. """ def __init__(self, pattern): + self.match_count = 0 + if pattern.endswith(os.path.sep): self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep else: @@ -281,7 +289,10 @@ class ExcludePattern(IncludePattern): @normalized def match(self, path): - return self.regex.match(path+os.path.sep) is not None + matches = self.regex.match(path+os.path.sep) is not None + if matches: + self.match_count += 1 + return matches def __repr__(self): return '%s(%s)' % (type(self), self.pattern) From 15b003e344cfbad046f99eee9686fcb6dd85af5c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 19 Sep 2015 18:03:53 +0200 Subject: [PATCH 039/151] add a string representation for Include/ExcludePattern it just gives the original string that was used. --- borg/helpers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/borg/helpers.py b/borg/helpers.py index 820efa9ed..f9450c1b8 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -250,6 +250,7 @@ class IncludePattern: path match as well. A trailing slash makes no difference. """ def __init__(self, pattern): + self.pattern_orig = pattern self.match_count = 0 if sys.platform in ('darwin',): @@ -267,12 +268,16 @@ class IncludePattern: def __repr__(self): return '%s(%s)' % (type(self), self.pattern) + def __str__(self): + return self.pattern_orig + class ExcludePattern(IncludePattern): """Shell glob patterns to exclude. A trailing slash means to exclude the contents of a directory, but not the directory itself. """ def __init__(self, pattern): + self.pattern_orig = pattern self.match_count = 0 if pattern.endswith(os.path.sep): @@ -297,6 +302,9 @@ class ExcludePattern(IncludePattern): def __repr__(self): return '%s(%s)' % (type(self), self.pattern) + def __str__(self): + return self.pattern_orig + def timestamp(s): """Convert a --timestamp=s argument to a datetime object""" From e0a08c5caeafdd920a2e98d3755b7beded2d8d5f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 19 Sep 2015 18:16:47 +0200 Subject: [PATCH 040/151] borg extract: warn if a include pattern never matched, fixes #209 --- borg/archiver.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/borg/archiver.py b/borg/archiver.py index e393fad0d..75b6e72b6 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -21,7 +21,7 @@ from .repository import Repository from .cache import Cache from .key import key_creator from .helpers import Error, location_validator, format_time, format_file_size, \ - format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ + format_file_mode, ExcludePattern, IncludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ is_cachedir, bigint_to_int, ChunkerParams, CompressionSpec @@ -286,6 +286,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") if not args.dry_run: while dirs: archive.extract_item(dirs.pop(-1)) + for pattern in patterns: + if isinstance(pattern, IncludePattern) and pattern.match_count == 0: + self.print_error("Warning: Include pattern '%s' never matched.", pattern) return self.exit_code def do_rename(self, args): From ab76176553047d101a14c1ed92eba664044100c1 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 19 Sep 2015 18:38:44 +0200 Subject: [PATCH 041/151] fix: patterns might be None --- borg/archiver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/archiver.py b/borg/archiver.py index 75b6e72b6..28f1d8a3f 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -286,7 +286,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") if not args.dry_run: while dirs: archive.extract_item(dirs.pop(-1)) - for pattern in patterns: + for pattern in (patterns or []): if isinstance(pattern, IncludePattern) and pattern.match_count == 0: self.print_error("Warning: Include pattern '%s' never matched.", pattern) return self.exit_code From 5de30e9a05d3f0a8cb7f48d993ba91c436b7baba Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 19 Sep 2015 19:02:03 +0200 Subject: [PATCH 042/151] update CHANGES --- CHANGES.rst | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 243b9703c..e1ab529ea 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,7 +1,6 @@ Borg Changelog ============== - Version 0.26.0 (not released yet) --------------------------------- @@ -12,6 +11,8 @@ New features: - read special files as if they were regular files, #79 - implement borg create --dry-run, attic issue #267 - Normalize paths before pattern matching on OS X, #143 +- support OpenBSD and NetBSD (except xattrs/ACLs) +- support / run tests on Python 3.5 Bug fixes: @@ -28,21 +29,32 @@ Bug fixes: - cython code: add some int types to get rid of unspecific python add / subtract operations (avoid undefined symbol FPE_... error on some platforms) - fix verbose mode display of stdin backup +- extract: warn if a include pattern never matched, fixes #209, + implement counters for Include/ExcludePatterns +- archive names with slashes are invalid, attic issue #180 +- chunker: add a check whether the POSIX_FADV_DONTNEED constant is defined - + fixes building on OpenBSD. Other changes: - detect inconsistency / corruption / hash collision, #170 - replace versioneer with setuptools_scm, #106 - docs: pkg-config is needed for llfuse installation +- help/docs: be more clear about pruning, attic issue #132 - xattr tests: ignore security.selinux attribute showing up - fix tests on ext3, seems to need a bit more space for a sparse file - do not test lzma level 9 compression (avoid MemoryError) -- use vagrant to do easy cross-platform testing (#196), currently: - debian 7 wheezy 32bit, debian 8 jessie 64bit - ubuntu 12.04 32bit, ubuntu 14.04 64bit - centos 7 64bit - freebsd 10.2 - darwin (OS X Yosemite) +- tests: work around strange mtime granularity issue on netbsd, fixes #204 +- tests: ignore st_rdev if file is not a block/char device, fixes #203 +- test setup: stay away from the setgid and sticky mode bits +- use Vagrant to do easy cross-platform testing (#196), currently: + Debian 7 "wheezy" 32bit, Debian 8 "jessie" 64bit + Ubuntu 12.04 32bit, Ubuntu 14.04 64bit + Centos 7 64bit + FreeBSD 10.2 64bit + OpenBSD 5.7 64bit + NetBSD 6.1.5 64bit + Darwin (OS X Yosemite) Version 0.25.0 From fb8d1cc6020976ce55862807efaa4f1862eb0d93 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 19 Sep 2015 20:31:26 +0200 Subject: [PATCH 043/151] also mention libattr ACLs are implemented as xattrs, so libattr is pulled in as a dependency of libacl. --- docs/global.rst.inc | 1 + docs/installation.rst | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/global.rst.inc b/docs/global.rst.inc index c8c490498..265ad2658 100644 --- a/docs/global.rst.inc +++ b/docs/global.rst.inc @@ -13,6 +13,7 @@ .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2 .. _ACL: https://en.wikipedia.org/wiki/Access_control_list .. _libacl: http://savannah.nongnu.org/projects/acl/ +.. _libattr: http://savannah.nongnu.org/projects/attr/ .. _liblz4: https://github.com/Cyan4973/lz4 .. _OpenSSL: https://www.openssl.org/ .. _Python: http://www.python.org/ diff --git a/docs/installation.rst b/docs/installation.rst index 4d025c822..fbf5e7f4c 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -8,7 +8,7 @@ Installation * Python_ >= 3.2 * OpenSSL_ >= 1.0.0 -* libacl_ +* libacl_ (that pulls in libattr_ also) * liblz4_ * some python dependencies, see install_requires in setup.py From 6ef7a9b11b99175dc3e965ad9dafe07ecb8146e6 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 19 Sep 2015 20:57:02 +0200 Subject: [PATCH 044/151] update CHANGES for 0.26 release --- CHANGES.rst | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index e1ab529ea..0751cf273 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,8 +1,8 @@ Borg Changelog ============== -Version 0.26.0 (not released yet) ---------------------------------- +Version 0.26.0 +-------------- New features: @@ -39,22 +39,27 @@ Other changes: - detect inconsistency / corruption / hash collision, #170 - replace versioneer with setuptools_scm, #106 -- docs: pkg-config is needed for llfuse installation -- help/docs: be more clear about pruning, attic issue #132 -- xattr tests: ignore security.selinux attribute showing up -- fix tests on ext3, seems to need a bit more space for a sparse file -- do not test lzma level 9 compression (avoid MemoryError) -- tests: work around strange mtime granularity issue on netbsd, fixes #204 -- tests: ignore st_rdev if file is not a block/char device, fixes #203 -- test setup: stay away from the setgid and sticky mode bits +- docs: + + - pkg-config is needed for llfuse installation + - be more clear about pruning, attic issue #132 +- unit tests: + + - xattr: ignore security.selinux attribute showing up + - ext3 seems to need a bit more space for a sparse file + - do not test lzma level 9 compression (avoid MemoryError) + - work around strange mtime granularity issue on netbsd, fixes #204 + - ignore st_rdev if file is not a block/char device, fixes #203 + - stay away from the setgid and sticky mode bits - use Vagrant to do easy cross-platform testing (#196), currently: - Debian 7 "wheezy" 32bit, Debian 8 "jessie" 64bit - Ubuntu 12.04 32bit, Ubuntu 14.04 64bit - Centos 7 64bit - FreeBSD 10.2 64bit - OpenBSD 5.7 64bit - NetBSD 6.1.5 64bit - Darwin (OS X Yosemite) + + - Debian 7 "wheezy" 32bit, Debian 8 "jessie" 64bit + - Ubuntu 12.04 32bit, Ubuntu 14.04 64bit + - Centos 7 64bit + - FreeBSD 10.2 64bit + - OpenBSD 5.7 64bit + - NetBSD 6.1.5 64bit + - Darwin (OS X Yosemite) Version 0.25.0 From 0f68f1ccd1eb9331c7f6975c037e8b8223df2af2 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 19 Sep 2015 21:35:02 +0200 Subject: [PATCH 045/151] add docs about release process --- docs/development.rst | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/docs/development.rst b/docs/development.rst index be8405c18..54f527e4e 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -51,6 +51,7 @@ Important notes: - When using -- to give options to py.test, you MUST also give borg.testsuite[.module]. + Building the docs with Sphinx ----------------------------- @@ -66,3 +67,32 @@ Now run:: make html Then point a web browser at docs/_build/html/index.html. + + +Creating a new release +---------------------- + +Checklist:: + +- all issues for this milestone closed? +- any low hanging fruit left on the issue tracker? +- run tox on all supported platforms via vagrant, check for test fails. +- is Travis CI happy also? +- update CHANGES.rst (compare to git log). check version number of upcoming release. +- check MANIFEST.in and setup.py - are they complete? +- tag the release:: + + git tag -s -m "tagged release" 0.26.0 + +- create a release on PyPi:: + + python setup.py register sdist upload --identity="Thomas Waldmann" --sign + +- close release milestone. +- announce on:: + + - mailing list + - Twitter + - IRC channel (topic) + +- create binary wheels and link them from issue tracker: https://github.com/borgbackup/borg/issues/147 From d4de0fd01987bf6bf8a2d2854d70dd62ad7afbb1 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 19 Sep 2015 21:44:40 +0200 Subject: [PATCH 046/151] remove remainders of versioneer, typo fix. --- MANIFEST.in | 2 +- setup.cfg | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index d74d9e2c4..217dab938 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ -include README.rst AUTHORS LICENSE CHANGES.rst MANIFEST.in versioneer.py +include README.rst AUTHORS LICENSE CHANGES.rst MANIFEST.in recursive-include borg *.pyx recursive-include docs * recursive-exclude docs *.pyc diff --git a/setup.cfg b/setup.cfg index 19a49eea6..8a128d6e1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,5 +4,5 @@ python_files = testsuite/*.py [flake8] ignore = E226,F403 max-line-length = 250 -exclude = versioneer.py,docs/conf.py,borg/_version.py,build,dist,.git,.idea,.cache +exclude = docs/conf.py,borg/_version.py,build,dist,.git,.idea,.cache max-complexity = 100 diff --git a/setup.py b/setup.py index e3f66ff9a..68a3db8d3 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,7 @@ except ImportError: if not all(os.path.exists(path) for path in [ compress_source, crypto_source, chunker_source, hashindex_source, platform_linux_source, platform_freebsd_source]): - raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version') + raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version.') def detect_openssl(prefixes): From 507d90f4762d0de474c6366bd2b527944e3d6a7d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 19 Sep 2015 22:17:46 +0200 Subject: [PATCH 047/151] MANIFEST.in: add some more excludes --- MANIFEST.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MANIFEST.in b/MANIFEST.in index 217dab938..309c1f8dc 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,4 +4,6 @@ recursive-include docs * recursive-exclude docs *.pyc recursive-exclude docs *.pyo prune docs/_build +prune .travis +exclude .coveragerc .gitattributes .gitignore .travis.yml Vagrantfile include borg/_version.py From 688488d62d518c0ee12c9a910ca3efeb5e18ae61 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 19 Sep 2015 22:20:41 +0200 Subject: [PATCH 048/151] docs: must run make html before release --- docs/development.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/development.rst b/docs/development.rst index 54f527e4e..1282b9c97 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -80,6 +80,7 @@ Checklist:: - is Travis CI happy also? - update CHANGES.rst (compare to git log). check version number of upcoming release. - check MANIFEST.in and setup.py - are they complete? +- cd docs ; make html # to update the usage include files - tag the release:: git tag -s -m "tagged release" 0.26.0 From 4410f1c3c3f7703cabe603a63dd07c322f9eaedd Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 19 Sep 2015 22:38:00 +0200 Subject: [PATCH 049/151] docs: must run make html after tagging, so the html titel says correct version --- docs/development.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/development.rst b/docs/development.rst index 1282b9c97..420481680 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -80,11 +80,12 @@ Checklist:: - is Travis CI happy also? - update CHANGES.rst (compare to git log). check version number of upcoming release. - check MANIFEST.in and setup.py - are they complete? -- cd docs ; make html # to update the usage include files - tag the release:: git tag -s -m "tagged release" 0.26.0 +- cd docs ; make html # to update the usage include files +- update website with the html - create a release on PyPi:: python setup.py register sdist upload --identity="Thomas Waldmann" --sign From 9dc98eeb4588d0decb29f9e5b7d923ea23c171ec Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 20 Sep 2015 22:22:40 +0200 Subject: [PATCH 050/151] vagrant: add py3.4 for precise/trusty, install pyinstaller also: source .profile so the PATH is ok --- Vagrantfile | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index 63e74396d..77832fe73 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -11,6 +11,7 @@ # - have all dependencies installed # - have a working "virtualenv" command # - have a working "python3" command +# - have a working "python3.4" command on platforms where we build a binary # # packages_prepare_OS goals: (for some older OS) # - adds additional package sources, so packages_OS can find all it needs. @@ -18,6 +19,8 @@ # prepare_user goals: # - have a working "borg-env" virtual env installed, with code from "borg". # both directories are in /vagrant/borg/. +# - have a working "borg-env34" virtual env installed, with code from "borg" +# and pyinstaller. def packages_prepare_wheezy return <<-EOF @@ -30,6 +33,15 @@ def packages_prepare_precise return <<-EOF # ubuntu 12.04 precise does not have lz4, but it is available from a ppa: add-apt-repository -y ppa:gezakovacs/lz4 + # we build the 32bit binary here also, using pyinstaller and py3.4. + add-apt-repository -y ppa:fkrull/deadsnakes + EOF +end + +def packages_prepare_trusty + return <<-EOF + # we build the 64bit binary here also, using pyinstaller and py3.4. + add-apt-repository -y ppa:fkrull/deadsnakes EOF end @@ -52,7 +64,9 @@ end def packages_debianoid return <<-EOF apt-get update + apt-get install -y python-dev # pyinstaller needs py2 apt-get install -y python3-dev python3-setuptools + apt-get install -y python3.4-dev # for pyinstaller / binary building apt-get install -y libssl-dev libacl1-dev liblz4-dev apt-get install -y libfuse-dev fuse pkg-config apt-get install -y fakeroot build-essential git @@ -170,16 +184,26 @@ def prepare_user(boxname) fi cd /vagrant/borg - #python -m virtualenv --python=python3 borg-env + # this is the env with the STANDARD python3.x on this platform virtualenv --python=python3 borg-env . borg-env/bin/activate - cd borg - # pip install -U pip setuptools # we fetch a current virtualenv, so these are fresh also pip install 'llfuse<0.41' # 0.41 does not install due to UnicodeDecodeError pip install -r requirements.d/development.txt pip install -e . + # on some platforms, we build a borg binary (and use py3.4 for it) + if which python3.4 > /dev/null; then + cd /vagrant/borg + virtualenv --python=python3.4 borg-env34 + . borg-env34/bin/activate + cd borg + pip install 'PyInstaller==3.0.dev2' + pip install 'llfuse<0.41' # 0.41 does not install due to UnicodeDecodeError + pip install -r requirements.d/development.txt + pip install -e . + fi + echo echo "Run:" echo " vagrant rsync #{boxname}" @@ -189,6 +213,7 @@ end def fix_perms return <<-EOF + . ~/.profile chown -R vagrant /vagrant/borg EOF end @@ -215,6 +240,7 @@ Vagrant.configure(2) do |config| config.vm.define "trusty64" do |b| b.vm.box = "ubuntu/trusty64" + b.vm.provision "packages prepare trusty", :type => :shell, :inline => packages_prepare_trusty b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("trusty64") end From c557613e3a47896e89a640030a8e2ab92cef11a4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 21 Sep 2015 21:20:33 +0200 Subject: [PATCH 051/151] vagrant: darwin: give configure options so libpython gets build it is needed by pyinstaller to build the borg binary --- Vagrantfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Vagrantfile b/Vagrantfile index 77832fe73..381fceedd 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -166,7 +166,7 @@ def packages_darwin if which pyenv > /dev/null; then eval "$(pyenv init -)" fi - pyenv install 3.4.3 + env PYTHON_CONFIGURE_OPTS="--enable-framework" pyenv install 3.4.3 pyenv global 3.4.3 pyenv rehash python -m pip install virtualenv From 7dbe2b80b3b0e8320957a1e7bea32cb8f14dee0b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 21 Sep 2015 21:28:18 +0200 Subject: [PATCH 052/151] docs: add a note about the single-file binaries --- docs/installation.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/installation.rst b/docs/installation.rst index fbf5e7f4c..4fd5ed356 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -74,6 +74,15 @@ and compare that to our latest release and review the change log (see links on our web site). +Installation (binary) +--------------------- +For some platforms we offer a ready-to-use standalone borg binary. + +It is supposed to work without requiring installation or preparations. + +Check https://github.com/borgbackup/borg/issues/214 for available binaries. + + Debian Jessie / Ubuntu 14.04 preparations (wheel) ------------------------------------------------- From 22f335e0a0367173cdeaa41239425798a7baba31 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 21 Sep 2015 21:48:46 +0200 Subject: [PATCH 053/151] docs: add how to build wheels and standalone binaries --- docs/development.rst | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/docs/development.rst b/docs/development.rst index 420481680..1ba239370 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -98,3 +98,35 @@ Checklist:: - IRC channel (topic) - create binary wheels and link them from issue tracker: https://github.com/borgbackup/borg/issues/147 +- create standalone binaries and link them from issue tracker: https://github.com/borgbackup/borg/issues/214 + + +Creating binary wheels +---------------------- + +With virtual env activated:: + + pip install wheel + python setup.py bdist_wheel + ls -l dist/*.whl + +Note: Binary wheels are rather specific for the platform they get built on. + E.g. a wheel built for Ubuntu 14.04 64bit likely will not work on Centos7 64bit. + + +Creating standalone binaries +---------------------------- + +With virtual env activated:: + + pip install pyinstaller==3.0.dev2 # or a later 3.x release + pyinstaller -F -n borg-PLATFORM borg/__main__.py + ls -l dist/* + +On less good supported platforms than Linux, there might be issues with pyinstaller +not finding the dynamic python library (libpython*) or with pyinstaller not having +a pre-compiled "bootloader" for the platform or with not supporting the platform at +all. + +Note: Standalone binaries built with pyinstaller are supposed to work on same OS, + same architecture (x86 32bit, amd64 64bit) without external dependencies. From 86502d2729756ff75e8e36c7a39ee4f2a2642003 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 26 Sep 2015 14:24:37 +0200 Subject: [PATCH 054/151] docs: explain --read-special in more detail, fixes #220 --- docs/usage.rst | 78 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 11 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index da6d93f11..3a933d42c 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -214,12 +214,6 @@ Examples # Even slower, even higher compression (N = 0..9) $ borg create --compression lzma,N /mnt/backup::repo ~ - # Backup some LV snapshots (you have to create the snapshots before this - # and remove them afterwards). We also backup the output of lvdisplay so - # we can see the LV sizes at restore time. See also "borg extract" examples. - $ lvdisplay > lvdisplay.txt - $ borg create --read-special /mnt/backup::repo lvdisplay.txt /dev/vg0/*-snapshot - .. include:: usage/extract.rst.inc Examples @@ -238,11 +232,6 @@ Examples # Extract the "src" directory but exclude object files $ borg extract /mnt/backup::my-files home/USERNAME/src --exclude '*.o' - # Restore LV snapshots (the target LVs /dev/vg0/* of correct size have - # to be already available and will be overwritten by this command!) - $ borg extract --stdout /mnt/backup::repo dev/vg0/root-snapshot > /dev/vg0/root - $ borg extract --stdout /mnt/backup::repo dev/vg0/home-snapshot > /dev/vg0/home - Note: currently, extract always writes into the current working directory ("."), so make sure you ``cd`` to the right place before calling ``borg extract``. @@ -357,3 +346,70 @@ Examples $ cat ~/.ssh/authorized_keys command="borg serve --restrict-to-path /mnt/backup" ssh-rsa AAAAB3[...] + +Additional Notes +================ + +Here are misc. notes about topics that are maybe not covered in enough detail in the usage section. + +--read-special +-------------- + +The option --read-special is not intended for normal, filesystem-level (full or +partly-recursive) backups. You only give this option if you want to do something +rather ... special - and if you have hand-picked some files that you want to treat +that way. + +`borg create --read-special` will open all files without doing any special treatment +according to the file type (the only exception here are directories: they will be +recursed into). Just imagine what happens if you do `cat filename` - the content +you will see there is what borg will backup for that filename. + +So, for example, symlinks will be followed, block device content will be read, +named pipes / UNIX domain sockets will be read. + +You need to be careful with what you give as filename when using --read-special, +e.g. if you give /dev/zero, your backup will never terminate. + +The given files' metadata is saved as it would be saved without --read-special +(e.g. its name, its size [might be 0], its mode, etc.) - but additionally, also +the content read from it will be saved for it. + +Restoring such files' content is currently only supported one at a time via --stdout +option (and you have to redirect stdout to where ever it shall go, maybe directly +into an existing device file of your choice or indirectly via dd). + +Example +~~~~~~~ + +Imagine you have made some snapshots of logical volumes (LVs) you want to backup. + +Note: For some scenarios, this is a good method to get "crash-like" consistency +(I call it crash-like because it is the same as you would get if you just hit the +reset button or your machine would abrubtly and completely crash). +This is better than no consistency at all and a good method for some use cases, +but likely not good enough if you have databases running. + +Then you create a backup archive of all these snapshots. The backup process will +see a "frozen" state of the logical volumes, while the processes working in the +original volumes continue changing the data stored there. + +You also add the output of `lvdisplay` to your backup, so you can see the LV sizes +in case you ever need to recreate and restore them. + +After the backup has completed, you remove the snapshots again. + +:: + $ # create snapshots here + $ lvdisplay > lvdisplay.txt + $ borg create --read-special /mnt/backup::repo lvdisplay.txt /dev/vg0/*-snapshot + $ # remove snapshots here + +Now, let's see how to restore some LVs from such a backup. + + $ borg extract /mnt/backup::repo lvdisplay.txt + $ # create empty LVs with correct sizes here (look into lvdisplay.txt). + $ # we assume that you created an empty root and home LV and overwrite it now: + $ borg extract --stdout /mnt/backup::repo dev/vg0/root-snapshot > /dev/vg0/root + $ borg extract --stdout /mnt/backup::repo dev/vg0/home-snapshot > /dev/vg0/home + From 4a71613f147d1253eef0d49bf35a8f1751ee671a Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 28 Sep 2015 00:05:52 +0200 Subject: [PATCH 055/151] modularize Vagrantfile, use centos6 boxes for building the binaries for building binaries that run everywhere, we need to love old stuff like centos6, because it has the oldest glibc version which we want to support. use 768MB for 64bit machines (not 512) --- Vagrantfile | 441 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 278 insertions(+), 163 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index 381fceedd..2d5190344 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -4,23 +4,8 @@ # Automated creation of testing environments on misc. platforms # Usage: # vagrant up OS -# vagrant ssh OS command +# vagrant ssh OS -c command # vagrant halt OS -# -# packages_OS goals: -# - have all dependencies installed -# - have a working "virtualenv" command -# - have a working "python3" command -# - have a working "python3.4" command on platforms where we build a binary -# -# packages_prepare_OS goals: (for some older OS) -# - adds additional package sources, so packages_OS can find all it needs. -# -# prepare_user goals: -# - have a working "borg-env" virtual env installed, with code from "borg". -# both directories are in /vagrant/borg/. -# - have a working "borg-env34" virtual env installed, with code from "borg" -# and pyinstaller. def packages_prepare_wheezy return <<-EOF @@ -33,64 +18,74 @@ def packages_prepare_precise return <<-EOF # ubuntu 12.04 precise does not have lz4, but it is available from a ppa: add-apt-repository -y ppa:gezakovacs/lz4 - # we build the 32bit binary here also, using pyinstaller and py3.4. - add-apt-repository -y ppa:fkrull/deadsnakes - EOF -end - -def packages_prepare_trusty - return <<-EOF - # we build the 64bit binary here also, using pyinstaller and py3.4. - add-apt-repository -y ppa:fkrull/deadsnakes - EOF -end - -def packages_centos - return <<-EOF - yum install -y epel-release - yum update -y - yum install -y python34 python34-devel - ln -s /usr/bin/python3.4 /usr/bin/python3 - yum install -y openssl-devel openssl - yum install -y libacl-devel libacl - yum install -y lz4-devel - yum install -y fuse-devel fuse pkgconfig - yum install -y fakeroot gcc git - yum install -y python-pip - pip install virtualenv EOF end def packages_debianoid return <<-EOF apt-get update - apt-get install -y python-dev # pyinstaller needs py2 - apt-get install -y python3-dev python3-setuptools - apt-get install -y python3.4-dev # for pyinstaller / binary building - apt-get install -y libssl-dev libacl1-dev liblz4-dev - apt-get install -y libfuse-dev fuse pkg-config + # for building borgbackup and dependencies: + apt-get install -y libssl-dev libacl1-dev liblz4-dev libfuse-dev fuse pkg-config apt-get install -y fakeroot build-essential git + apt-get install -y python3-dev python3-setuptools # this way it works on older dists (like ubuntu 12.04) also: easy_install3 pip pip3 install virtualenv + touch ~vagrant/.bash_profile ; chown vagrant ~vagrant/.bash_profile + EOF +end + +def packages_redhatted + return <<-EOF + yum install -y epel-release + yum update -y + # for building borgbackup and dependencies: + yum install -y openssl-devel openssl libacl-devel libacl lz4-devel fuse-devel fuse pkgconfig + usermod -a -G fuse vagrant + yum install -y fakeroot gcc git patch + # for building python: + yum install -y zlib-devel bzip2-devel ncurses-devel readline-devel xz-devel sqlite-devel + #yum install -y python-pip + #pip install virtualenv + touch ~vagrant/.bash_profile ; chown vagrant ~vagrant/.bash_profile + EOF +end + +def packages_darwin + return <<-EOF + # get osxfuse 3.0.x pre-release code from github: + curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.0.5/osxfuse-3.0.5.dmg >osxfuse.dmg + MOUNTDIR=$(echo `hdiutil mount osxfuse.dmg | tail -1 | awk '{$1="" ; print $0}'` | xargs -0 echo) \ + && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.0.5.pkg" -target / + sudo chown -R vagrant /usr/local # brew must be able to create stuff here + ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" + brew update + brew install openssl + brew install lz4 + brew install fakeroot + brew install git + touch ~vagrant/.bash_profile ; chown vagrant ~vagrant/.bash_profile EOF end def packages_freebsd return <<-EOF - pkg install -y python34 py34-setuptools34 - ln -s /usr/local/bin/python3.4 /usr/local/bin/python3 - pkg install -y openssl liblz4 - pkg install -y fusefs-libs pkgconf - pkg install -y fakeroot git - easy_install-3.4 pip - pip3 install virtualenv + # for building borgbackup and dependencies: + pkg install -y openssl liblz4 fusefs-libs pkgconf + pkg install -y fakeroot git bash + # for building python: + pkg install sqlite3 + # make bash default / work: + chsh -s bash vagrant + mount -t fdescfs fdesc /dev/fd + echo 'fdesc /dev/fd fdescfs rw 0 0' >> /etc/fstab # make FUSE work echo 'fuse_load="YES"' >> /boot/loader.conf echo 'vfs.usermount=1' >> /etc/sysctl.conf kldload fuse sysctl vfs.usermount=1 pw groupmod operator -M vagrant + touch ~vagrant/.bash_profile ; chown vagrant ~vagrant/.bash_profile EOF end @@ -103,117 +98,166 @@ def packages_openbsd ln -sf /home/vagrant/borg /vagrant/ pkg_add bash chsh -s /usr/local/bin/bash vagrant - pkg_add python-3.4.2 - pkg_add py3-setuptools - ln -sf /usr/local/bin/python3.4 /usr/local/bin/python3 - ln -sf /usr/local/bin/python3.4 /usr/local/bin/python pkg_add openssl pkg_add lz4 # pkg_add fuse # does not install, sdl dependency missing pkg_add git # no fakeroot + pkg_add python-3.4.2 + pkg_add py3-setuptools + ln -sf /usr/local/bin/python3.4 /usr/local/bin/python3 + ln -sf /usr/local/bin/python3.4 /usr/local/bin/python easy_install-3.4 pip pip3 install virtualenv + touch ~vagrant/.bash_profile ; chown vagrant ~vagrant/.bash_profile EOF end def packages_netbsd - # ftp://ftp.netbsd.org/pub/pkgsrc/packages/NetBSD/amd64/6.1.5/All/ return <<-EOF - #ftp ftp://ftp.NetBSD.org/pub/pkgsrc/current/pkgsrc.tar.gz - #tar xzf pkgsrc.tar.gz - #cd pkgsrc/bootstrap - #./bootstrap - #PATH="/usr/pkg/sbin:$PATH" - hostname netbsd # the box we use has an invalid hostname - PKG_PATH="ftp://ftp.NetBSD.org/pub/pkgsrc/packages/NetBSD/amd64/6.1.5/All/" - export PKG_PATH - pkg_add python34 py34-setuptools - ln -s /usr/pkg/bin/python3.4 /usr/pkg/bin/python - ln -s /usr/pkg/bin/python3.4 /usr/pkg/bin/python3 - pkg_add mozilla-rootcerts lz4 git - mkdir -p /usr/local/opt/lz4/include - mkdir -p /usr/local/opt/lz4/lib - ln -s /usr/pkg/include/lz4*.h /usr/local/opt/lz4/include/ - ln -s /usr/pkg/lib/liblz4* /usr/local/opt/lz4/lib/ - touch /etc/openssl/openssl.cnf # avoids a flood of "can't open ..." - mozilla-rootcerts install - #pkg_add pkg-config fuse-2.9.3 # llfuse does not support netbsd - easy_install-3.4 pip - pip install virtualenv + hostname netbsd # the box we use has an invalid hostname + PKG_PATH="ftp://ftp.NetBSD.org/pub/pkgsrc/packages/NetBSD/amd64/6.1.5/All/" + export PKG_PATH + pkg_add mozilla-rootcerts lz4 git bash + chsh -s bash vagrant + mkdir -p /usr/local/opt/lz4/include + mkdir -p /usr/local/opt/lz4/lib + ln -s /usr/pkg/include/lz4*.h /usr/local/opt/lz4/include/ + ln -s /usr/pkg/lib/liblz4* /usr/local/opt/lz4/lib/ + touch /etc/openssl/openssl.cnf # avoids a flood of "can't open ..." + mozilla-rootcerts install + # llfuse does not support netbsd + pkg_add python34 py34-setuptools + ln -s /usr/pkg/bin/python3.4 /usr/pkg/bin/python + ln -s /usr/pkg/bin/python3.4 /usr/pkg/bin/python3 + easy_install-3.4 pip + pip install virtualenv + touch ~vagrant/.bash_profile ; chown vagrant ~vagrant/.bash_profile EOF end -def packages_darwin +def install_pyenv(boxname) return <<-EOF - ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" - brew update - # this installs osxfuse 2.8.0 (which is based on libfuse 2.7.3). - # llfuse later complains about needing (libfuse) 2.8.0 at least. - #brew install caskroom/cask/brew-cask - #brew cask install osxfuse # needs cask install because of apple's unsigned kext ban - # get osxfuse 3.0.x pre-release code from github: - curl https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.0.5/osxfuse-3.0.5.dmg -L >osxfuse.dmg - MOUNTDIR=$(echo `hdiutil mount osxfuse.dmg | tail -1 | awk '{$1="" ; print $0}'` | xargs -0 echo) \ - && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.0.5.pkg" -target / - brew install openssl - brew install lz4 - # looks dirty, is there a better way without root?: - mkdir -p /usr/local/opt/lz4 - ln -s /usr/local/Cellar/lz4/r*/include /usr/local/opt/lz4/ - ln -s /usr/local/Cellar/lz4/r*/lib /usr/local/opt/lz4/ - brew install fakeroot - brew install pyenv - if which pyenv > /dev/null; then - eval "$(pyenv init -)" - fi - env PYTHON_CONFIGURE_OPTS="--enable-framework" pyenv install 3.4.3 - pyenv global 3.4.3 + curl -s -L https://raw.githubusercontent.com/yyuu/pyenv-installer/master/bin/pyenv-installer | bash + echo 'export PATH="$HOME/.pyenv/bin:$PATH"' >> ~/.bash_profile + echo 'eval "$(pyenv init -)"' >> ~/.bash_profile + echo 'eval "$(pyenv virtualenv-init -)"' >> ~/.bash_profile + echo 'export PYTHON_CONFIGURE_OPTS="--enable-shared"' >> ~/.bash_profile + EOF +end + +def fix_pyenv_darwin(boxname) + return <<-EOF + echo 'export PYTHON_CONFIGURE_OPTS="--enable-framework"' >> ~/.bash_profile + EOF +end + +def install_pythons(boxname) + return <<-EOF + . ~/.bash_profile + pyenv install 3.2.2 # tests, 3.2(.0) and 3.2.1 deadlock, issue #221 + pyenv install 3.3.0 # tests + pyenv install 3.4.0 # tests + pyenv install 3.5.0 # tests + #pyenv install 3.5.1 # binary build, use latest 3.5.x release pyenv rehash - python -m pip install virtualenv EOF end -def prepare_user(boxname) +def build_sys_venv(boxname) return <<-EOF - echo export 'PATH=/usr/local/bin:$PATH' >> ~/.profile - . ~/.profile - - # initialize python on darwin - if which pyenv > /dev/null; then - eval "$(pyenv init -)" - fi - + . ~/.bash_profile cd /vagrant/borg - # this is the env with the STANDARD python3.x on this platform virtualenv --python=python3 borg-env + EOF +end + +def build_pyenv_venv(boxname) + return <<-EOF + . ~/.bash_profile + cd /vagrant/borg + # use the latest 3.5 release + pyenv global 3.5.0 + pyenv virtualenv 3.5.0 borg-env + ln -s ~/.pyenv/versions/borg-env . + EOF +end + +def install_borg(boxname) + return <<-EOF + . ~/.bash_profile + cd /vagrant/borg . borg-env/bin/activate + pip install -U wheel # upgrade wheel, too old for 3.5 cd borg + # clean up (wrong/outdated) stuff we likely got via rsync: + rm -f borg/*.so borg/*.cpy* + rm -f borg/{chunker,crypto,compress,hashindex,platform_linux}.c + rm -rf borg/__pycache__ borg/support/__pycache__ borg/testsuite/__pycache__ pip install 'llfuse<0.41' # 0.41 does not install due to UnicodeDecodeError pip install -r requirements.d/development.txt pip install -e . + EOF +end - # on some platforms, we build a borg binary (and use py3.4 for it) - if which python3.4 > /dev/null; then - cd /vagrant/borg - virtualenv --python=python3.4 borg-env34 - . borg-env34/bin/activate - cd borg - pip install 'PyInstaller==3.0.dev2' - pip install 'llfuse<0.41' # 0.41 does not install due to UnicodeDecodeError - pip install -r requirements.d/development.txt - pip install -e . +def install_pyinstaller(boxname) + return <<-EOF + . ~/.bash_profile + cd /vagrant/borg + . borg-env/bin/activate + git clone https://github.com/pyinstaller/pyinstaller.git + cd pyinstaller + git checkout develop + pip install -e . + EOF +end + +def install_pyinstaller_bootloader(boxname) + return <<-EOF + . ~/.bash_profile + cd /vagrant/borg + . borg-env/bin/activate + git clone https://github.com/pyinstaller/pyinstaller.git + cd pyinstaller + git checkout python3 + # build bootloader, if it is not included + cd bootloader + python ./waf all + cd .. + pip install -e . + EOF +end + +def build_binary_with_pyinstaller(boxname) + return <<-EOF + . ~/.bash_profile + cd /vagrant/borg + . borg-env/bin/activate + cd borg + pyinstaller -F -n borg --hidden-import=logging.config borg/__main__.py + EOF +end + +def run_tests(boxname) + return <<-EOF + . ~/.bash_profile + cd /vagrant/borg/borg + . ../borg-env/bin/activate + if which pyenv > /dev/null; then + # for testing, use the earliest point releases of the supported python versions: + pyenv global 3.2.2 3.3.0 3.4.0 3.5.0 + fi + # otherwise: just use the system python + if which fakeroot > /dev/null; then + fakeroot -u tox --skip-missing-interpreters + else + tox --skip-missing-interpreters fi - - echo - echo "Run:" - echo " vagrant rsync #{boxname}" - echo " vagrant ssh #{boxname} -c 'cd project/path; ...'" EOF end def fix_perms return <<-EOF - . ~/.profile + # . ~/.profile chown -R vagrant /vagrant/borg EOF end @@ -229,68 +273,139 @@ Vagrant.configure(2) do |config| config.vm.provider :virtualbox do |v| #v.gui = true - v.cpus = 2 + v.cpus = 1 end - config.vm.define "centos7" do |b| + # Linux + config.vm.define "centos7_64" do |b| b.vm.box = "centos/7" - b.vm.provision "packages centos7 64", :type => :shell, :inline => packages_centos - b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("centos7_64") + b.vm.provider :virtualbox do |v| + v.memory = 768 + end + b.vm.provision "install system packages", :type => :shell, :inline => packages_redhatted + b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos7_64") + b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos7_64") + b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos7_64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("centos7_64") + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos7_64") + end + + config.vm.define "centos6_32" do |b| + b.vm.box = "centos6-32" + b.vm.provision "install system packages", :type => :shell, :inline => packages_redhatted + b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos6_32") + b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos6_32") + b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos6_32") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("centos6_32") + b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("centos6_32") + b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("centos6_32") + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos6_32") + end + + config.vm.define "centos6_64" do |b| + b.vm.box = "centos6-64" + b.vm.provider :virtualbox do |v| + v.memory = 768 + end + b.vm.provision "install system packages", :type => :shell, :inline => packages_redhatted + b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos6_64") + b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos6_64") + b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos6_64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("centos6_64") + b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("centos6_64") + b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("centos6_64") + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos6_64") end config.vm.define "trusty64" do |b| b.vm.box = "ubuntu/trusty64" - b.vm.provision "packages prepare trusty", :type => :shell, :inline => packages_prepare_trusty + b.vm.provider :virtualbox do |v| + v.memory = 768 + end b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid - b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("trusty64") + b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("trusty64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("trusty64") + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("trusty64") end config.vm.define "precise32" do |b| b.vm.box = "ubuntu/precise32" b.vm.provision "packages prepare precise", :type => :shell, :inline => packages_prepare_precise b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid - b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("precise32") + b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("precise32") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("precise32") + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("precise32") end config.vm.define "jessie64" do |b| b.vm.box = "debian/jessie64" + b.vm.provider :virtualbox do |v| + v.memory = 768 + end b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid - b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("jessie64") + b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("jessie64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("jessie64") + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("jessie64") end config.vm.define "wheezy32" do |b| b.vm.box = "puppetlabs/debian-7.8-32-nocm" b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid - b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("wheezy32") - end - - # BSD - config.vm.define "freebsd" do |b| - b.vm.box = "geoffgarside/freebsd-10.2" - b.vm.provision "packages freebsd", :type => :shell, :inline => packages_freebsd - b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("freebsd") - end - - config.vm.define "openbsd" do |b| - b.vm.box = "bodgit/openbsd-5.7-amd64" - b.vm.provision "packages openbsd", :type => :shell, :inline => packages_openbsd - b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("openbsd") - end - - config.vm.define "netbsd" do |b| - #b.vm.box = "Kralian/netbsd_6.1.5_amd64" - b.vm.box = "alex-skimlinks/netbsd-6.1.5-amd64" - b.ssh.shell = "ksh -l" - #b.ssh.shell = "sh" - b.vm.provision "packages netbsd", :type => :shell, :inline => packages_netbsd - b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("netbsd") + b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("wheezy32") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("wheezy32") + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("wheezy32") end # OS X - config.vm.define "darwin" do |b| + config.vm.define "darwin64" do |b| b.vm.box = "jhcook/yosemite-clitools" b.vm.provision "packages darwin", :type => :shell, :privileged => false, :inline => packages_darwin - b.vm.provision "prepare user", :type => :shell, :privileged => false, :inline => prepare_user("darwin") + b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("darwin64") + b.vm.provision "fix pyenv", :type => :shell, :privileged => false, :inline => fix_pyenv_darwin("darwin64") + b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("darwin64") + b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("darwin64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("darwin64") + b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("darwin64") + b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("darwin64") + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("darwin64") + end + + # BSD + config.vm.define "freebsd64" do |b| + b.vm.box = "geoffgarside/freebsd-10.2" + b.vm.provider :virtualbox do |v| + v.memory = 768 + end + b.vm.provision "install system packages", :type => :shell, :inline => packages_freebsd + b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("freebsd") + b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("freebsd") + b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("freebsd") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("freebsd") + b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller_bootloader("freebsd") + b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("freebsd") + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("freebsd") + end + + config.vm.define "openbsd64" do |b| + b.vm.box = "bodgit/openbsd-5.7-amd64" + b.vm.provider :virtualbox do |v| + v.memory = 768 + end + b.vm.provision "packages openbsd", :type => :shell, :inline => packages_openbsd + b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("openbsd64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("openbsd64") + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("openbsd64") + end + + config.vm.define "netbsd64" do |b| + b.vm.box = "alex-skimlinks/netbsd-6.1.5-amd64" + b.vm.provider :virtualbox do |v| + v.memory = 768 + end + b.vm.provision "packages netbsd", :type => :shell, :inline => packages_netbsd + b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("netbsd64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("netbsd64") + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("netbsd64") end end From 524ca297ea72c0151735e7df15ff9f31dd7b4b6b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 28 Sep 2015 00:22:20 +0200 Subject: [PATCH 056/151] document binary and non-binary requirements --- docs/installation.rst | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 4fd5ed356..e0608027c 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -4,9 +4,15 @@ Installation ============ -|project_name| requires: +|project_name| pyinstaller binary installation requires: -* Python_ >= 3.2 +* Linux: glibc >= 2.12 (ok for most supported Linux releases) +* MacOS X: 10.10 (unknown whether it works for older releases) +* FreeBSD: 10.2 (unknown whether it works for older releases) + +|project_name| non-binary installation requires: + +* Python_ >= 3.2.2 * OpenSSL_ >= 1.0.0 * libacl_ (that pulls in libattr_ also) * liblz4_ @@ -21,6 +27,8 @@ Below, we describe different ways to install |project_name|. - **dist package** - easy and fast, needs a distribution and platform specific binary package (for your Linux/*BSD/OS X/... distribution). +- **pyinstaller binary** - easy and fast, we provide a ready-to-use binary file + that just works on the supported platforms - **wheel** - easy and fast, needs a platform specific borgbackup binary wheel, which matches your platform [OS and CPU]). - **pypi** - installing a source package from pypi needs more installation steps @@ -74,8 +82,8 @@ and compare that to our latest release and review the change log (see links on our web site). -Installation (binary) ---------------------- +Installation (pyinstaller binary) +--------------------------------- For some platforms we offer a ready-to-use standalone borg binary. It is supposed to work without requiring installation or preparations. From 507752f39a80aecd49859545c0d3e5d6d9d90a19 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 28 Sep 2015 00:33:20 +0200 Subject: [PATCH 057/151] update CHANGES --- CHANGES.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 0751cf273..b2c3af457 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,19 @@ Borg Changelog ============== +Version 0.26.1 +-------------- + +This is a minor update, just docs and new pyinstaller binaries. + +- docs update about python and binary requirements +- better docs for --read-special, fix #220 +- re-built the binaries, fix #218 and #213 (glibc version issue) +- update web site about single-file pyinstaller binaries + +Note: if you did a python-based installation, there is no need to upgrade. + + Version 0.26.0 -------------- From a3d967bdffab78afe80af6df03b3c968efb308cb Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 28 Sep 2015 00:41:40 +0200 Subject: [PATCH 058/151] hint to single-file pyinstaller binaries from README --- README.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.rst b/README.rst index b8fb652b4..310413bfe 100644 --- a/README.rst +++ b/README.rst @@ -63,6 +63,10 @@ Main features Backup archives are mountable as userspace filesystems for easy interactive backup examination and restores (e.g. by using a regular file manager). +**Easy installation** + For Linux, Mac OS X and FreeBSD, we offer a single-file pyinstaller binary + that does not require installing anything - you can just run it. + **Platforms Borg works on** * Linux * Mac OS X From 6aca4694fe1125859538fed6a2763e5695a7fdf8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 30 Sep 2015 16:10:50 +0200 Subject: [PATCH 059/151] fix segment entry header size check, attic issue #352 it only checked for too big sizes, but not for too small ones. that made it die with a ValueError and not raise the appropriate IntegrityError that gets handled in check() and triggers the repair attempt for the segment. --- borg/repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/repository.py b/borg/repository.py index f43161fb6..ee3074311 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -538,7 +538,7 @@ class LoggedIO: crc, size, tag = self.header_fmt.unpack(header) except struct.error as err: raise IntegrityError('Invalid segment entry header [offset {}]: {}'.format(offset, err)) - if size > MAX_OBJECT_SIZE: + if size > MAX_OBJECT_SIZE or size < self.header_fmt.size: raise IntegrityError('Invalid segment entry size [offset {}]'.format(offset)) length = size - self.header_fmt.size rest = fd.read(length) From ba5fa0ba99c5f1c6a842fb9cb61d95c54630bcbe Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 30 Sep 2015 17:13:22 +0200 Subject: [PATCH 060/151] update / improve / reformat FAQ - reduce redundancy (platforms are documented in README.rst) - reformat to 80 chars width - clarify checkpoints - remove workarounds for stuff that was fixed --- docs/faq.rst | 77 ++++++++++++++++++++++++---------------------------- 1 file changed, 35 insertions(+), 42 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index bb569e4f1..c43936ce2 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -4,16 +4,9 @@ Frequently asked questions ========================== -Which platforms are supported? - Currently Linux, FreeBSD and MacOS X are supported. - OpenBSD and NetBSD work also, except for xattrs and ACLs. - - You can try your luck on other POSIX-like systems, like Cygwin, - other BSDs, etc. but they are not officially supported. - Can I backup VM disk images? - Yes, the :ref:`deduplication ` technique used by |project_name| - makes sure only the modified parts of the file are stored. + Yes, the :ref:`deduplication ` technique used by + |project_name| makes sure only the modified parts of the file are stored. Also, we have optional simple sparse file support for extract. Can I backup from multiple servers into a single repository? @@ -47,9 +40,10 @@ Which file types, attributes, etc. are preserved? * BSD flags on OS X and FreeBSD Which file types, attributes, etc. are *not* preserved? - * UNIX domain sockets (because it does not make sense - they are meaningless - without the running process that created them and the process needs to - recreate them in any case). So, don't panic if your backup misses a UDS! + * UNIX domain sockets (because it does not make sense - they are + meaningless without the running process that created them and the process + needs to recreate them in any case). So, don't panic if your backup + misses a UDS! * The precise on-disk representation of the holes in a sparse file. Archive creation has no special support for sparse files, holes are backed up as (deduplicated and compressed) runs of zero bytes. @@ -77,52 +71,51 @@ When backing up to remote servers, do I have to trust the remote server? Yes, as an attacker with access to the remote server could delete (or otherwise make unavailable) all your backups. -If a backup stops mid-way, does the already-backed-up data stay there? I.e. does |project_name| resume backups? - Yes, during a backup a special checkpoint archive named ``.checkpoint`` is saved every 5 minutes - containing all the data backed-up until that point. This means that at most 5 minutes worth of data needs to be - retransmitted if a backup needs to be restarted. +If a backup stops mid-way, does the already-backed-up data stay there? + Yes, |project_name| supports resuming backups. + During a backup a special checkpoint archive named ``.checkpoint`` + is saved every checkpoint interval (the default value for this is 5 + minutes) containing all the data backed-up until that point. This means + that at most worth of data needs to be retransmitted + if a backup needs to be restarted. + Once your backup has finished successfully, you can delete all ``*.checkpoint`` + archives. If it crashes with a UnicodeError, what can I do? Check if your encoding is set correctly. For most POSIX-like systems, try:: export LANG=en_US.UTF-8 # or similar, important is correct charset -I can't extract non-ascii filenames by giving them on the commandline on OS X!? - This is due to different ways to represent some characters in unicode. - HFS+ likes the decomposed form while the commandline seems to be the composed - form usually. If you run into that, for now maybe just try: +I can't extract non-ascii filenames by giving them on the commandline!? + This might be due to different ways to represent some characters in unicode + or due to other non-ascii encoding issues. + If you run into that, try this: - - avoiding the non-ascii characters on the commandline by e.g. extracting + - avoid the non-ascii characters on the commandline by e.g. extracting the parent directory (or even everything) - - try to enter the composed form on the commandline - mount the repo using FUSE and use some file manager - See issue #143 on the issue tracker for more about this. - -If I want to run |project_name| on a ARM CPU older than ARM v6? - You need to enable the alignment trap handler to fixup misaligned accesses:: - - echo "2" > /proc/cpu/alignment - Can |project_name| add redundancy to the backup data to deal with hardware malfunction? - No, it can't. While that at first sounds like a good idea to defend against some - defect HDD sectors or SSD flash blocks, dealing with this in a reliable way needs a lot - of low-level storage layout information and control which we do not have (and also can't - get, even if we wanted). + No, it can't. While that at first sounds like a good idea to defend against + some defect HDD sectors or SSD flash blocks, dealing with this in a + reliable way needs a lot of low-level storage layout information and + control which we do not have (and also can't get, even if we wanted). - So, if you need that, consider RAID1 or a filesystem that offers redundant storage - or just make 2 backups to different locations / different hardware. + So, if you need that, consider RAID or a filesystem that offers redundant + storage or just make backups to different locations / different hardware. + + See also `ticket 225 `_. Can |project_name| verify data integrity of a backup archive? - Yes, if you want to detect accidental data damage (like bit rot), use the ``check`` - operation. It will notice corruption using CRCs and hashes. - If you want to be able to detect malicious tampering also, use a encrypted repo. - It will then be able to check using CRCs and HMACs. + Yes, if you want to detect accidental data damage (like bit rot), use the + ``check`` operation. It will notice corruption using CRCs and hashes. + If you want to be able to detect malicious tampering also, use a encrypted + repo. It will then be able to check using CRCs and HMACs. Why was Borg forked from Attic? - Borg was created in May 2015 in response to the difficulty of - getting new code or larger changes incorporated into Attic and - establishing a bigger developer community / more open development. + Borg was created in May 2015 in response to the difficulty of getting new + code or larger changes incorporated into Attic and establishing a bigger + developer community / more open development. More details can be found in `ticket 217 `_ that led to the fork. From bc9679f0e5b689ff90c7f199c6f55431743d1df4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 30 Sep 2015 17:38:51 +0200 Subject: [PATCH 061/151] development docs: update / fix / add Vagrant section thanks to level323 for the vagrant docs --- docs/development.rst | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/docs/development.rst b/docs/development.rst index 1ba239370..03a4b735e 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -59,7 +59,7 @@ The documentation (in reStructuredText format, .rst) is in docs/. To build the html version of it, you need to have sphinx installed:: - pip3 install sphinx + pip3 install sphinx # important: this will install sphinx with Python 3 Now run:: @@ -68,6 +68,28 @@ Now run:: Then point a web browser at docs/_build/html/index.html. +Using Vagrant +------------- + +We use Vagrant for the automated creation of testing environment and borgbackup +standalone binaries for various platforms. + +For better security, there is no automatic sync in the VM to host direction. +The plugin `vagrant-scp` is useful to copy stuff from the VMs to the host. + +Usage:: + + To create and provision the VM: + vagrant up OS + To create an ssh session to the VM: + vagrant ssh OS command + To shut down the VM: + vagrant halt OS + To shut down and destroy the VM: + vagrant destroy OS + To copy files from the VM (in this case, the generated binary): + vagrant scp OS:/vagrant/borg/borg/dist/borg . + Creating a new release ---------------------- @@ -106,7 +128,7 @@ Creating binary wheels With virtual env activated:: - pip install wheel + pip install -U wheel python setup.py bdist_wheel ls -l dist/*.whl @@ -117,16 +139,15 @@ Note: Binary wheels are rather specific for the platform they get built on. Creating standalone binaries ---------------------------- +Make sure you have everything built and installed (including llfuse and fuse). + With virtual env activated:: - pip install pyinstaller==3.0.dev2 # or a later 3.x release - pyinstaller -F -n borg-PLATFORM borg/__main__.py + pip install pyinstaller==3.0.dev2 # or a later 3.x release or git checkout + pyinstaller -F -n borg-PLATFORM --hidden-import=logging.config borg/__main__.py ls -l dist/* -On less good supported platforms than Linux, there might be issues with pyinstaller -not finding the dynamic python library (libpython*) or with pyinstaller not having -a pre-compiled "bootloader" for the platform or with not supporting the platform at -all. +If you encounter issues, see also our `Vagrantfile` for details. Note: Standalone binaries built with pyinstaller are supposed to work on same OS, same architecture (x86 32bit, amd64 64bit) without external dependencies. From 967963e17dd319453f60c7146db3e75eb9f44ff9 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 30 Sep 2015 17:42:20 +0200 Subject: [PATCH 062/151] Vagrantfile: remove usage, it is in the development docs now --- Vagrantfile | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index 2d5190344..2777f33a7 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -1,11 +1,7 @@ # -*- mode: ruby -*- # vi: set ft=ruby : -# Automated creation of testing environments on misc. platforms -# Usage: -# vagrant up OS -# vagrant ssh OS -c command -# vagrant halt OS +# Automated creation of testing environments / binaries on misc. platforms def packages_prepare_wheezy return <<-EOF From e3433f433ca0ee33959708cb8bcbd4263215eefc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 17:30:13 -0400 Subject: [PATCH 063/151] generate usage include files only as needed right now, the update_usage script regenerates the usage files at every call by moving this into the makefile, we make those files be generated only when the source file change, which makes testing docs much faster --- docs/Makefile | 16 ++++++++++++++-- docs/update_usage.sh | 13 ------------- 2 files changed, 14 insertions(+), 15 deletions(-) delete mode 100755 docs/update_usage.sh diff --git a/docs/Makefile b/docs/Makefile index 387195a2a..1aaf1cf7e 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -36,8 +36,7 @@ help: clean: -rm -rf $(BUILDDIR)/* -html: - ./update_usage.sh +html: usage $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." @@ -140,3 +139,16 @@ gh-io: html inotify: html while inotifywait -r . --exclude usage.rst --exclude '_build/*' ; do make html ; done + +# generate list of targets +usage: $(shell borg help | grep -A1 "Available commands:" | tail -1 | sed 's/[{} ]//g;s/,\|^/.rst.inc usage\//g;s/^.rst.inc//;s/usage\/help//') + +# generate help file based on usage +usage/%.rst.inc: ../borg/archiver.py + printf ".. _borg_$*:\n\n" > $@ + printf "borg $*\n" >> $@ + echo -n borg $* | tr 'a-z- ' '-' >> $@ + printf "\n::\n\n" >> $@ + borg help $* --usage-only | sed -e 's/^/ /' >> $@ + printf "\nDescription\n~~~~~~~~~~~\n" >> $@ + borg help $* --epilog-only >> $@ diff --git a/docs/update_usage.sh b/docs/update_usage.sh deleted file mode 100755 index 9e79f4e88..000000000 --- a/docs/update_usage.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -if [ ! -d usage ]; then - mkdir usage -fi -for cmd in change-passphrase check create delete extract info init list mount prune serve; do - FILENAME="usage/$cmd.rst.inc" - LINE=`echo -n borg $cmd | tr 'a-z- ' '-'` - echo -e ".. _borg_$cmd:\n" > $FILENAME - echo -e "borg $cmd\n$LINE\n::\n\n" >> $FILENAME - borg help $cmd --usage-only | sed -e 's/^/ /' >> $FILENAME - echo -e "\nDescription\n~~~~~~~~~~~\n" >> $FILENAME - borg help $cmd --epilog-only >> $FILENAME -done From 50e7d0f345c85db9a86e0ea2c8991e61095ddbf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 17:42:18 -0400 Subject: [PATCH 064/151] more human-readable output in usage generation --- docs/Makefile | 15 ++++++++------- docs/api.rst | 6 ++++++ 2 files changed, 14 insertions(+), 7 deletions(-) create mode 100644 docs/api.rst diff --git a/docs/Makefile b/docs/Makefile index 1aaf1cf7e..1f3f7d76c 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -145,10 +145,11 @@ usage: $(shell borg help | grep -A1 "Available commands:" | tail -1 | sed 's/[{} # generate help file based on usage usage/%.rst.inc: ../borg/archiver.py - printf ".. _borg_$*:\n\n" > $@ - printf "borg $*\n" >> $@ - echo -n borg $* | tr 'a-z- ' '-' >> $@ - printf "\n::\n\n" >> $@ - borg help $* --usage-only | sed -e 's/^/ /' >> $@ - printf "\nDescription\n~~~~~~~~~~~\n" >> $@ - borg help $* --epilog-only >> $@ + @echo generating usage for $* + @printf ".. _borg_$*:\n\n" > $@ + @printf "borg $*\n" >> $@ + @echo -n borg $* | tr 'a-z- ' '-' >> $@ + @printf "\n::\n\n" >> $@ + @borg help $* --usage-only | sed -e 's/^/ /' >> $@ + @printf "\nDescription\n~~~~~~~~~~~\n" >> $@ + @borg help $* --epilog-only >> $@ diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 000000000..c535aa970 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,6 @@ +Borg Backup API documentation +============================= + +.. automodule:: borg + :members: + :undoc-members: From b72d955c3ee901be9c4d221893cdd88b750324ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 18:33:27 -0400 Subject: [PATCH 065/151] remove api file introduced by mistake in #228 --- docs/api.rst | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 docs/api.rst diff --git a/docs/api.rst b/docs/api.rst deleted file mode 100644 index c535aa970..000000000 --- a/docs/api.rst +++ /dev/null @@ -1,6 +0,0 @@ -Borg Backup API documentation -============================= - -.. automodule:: borg - :members: - :undoc-members: From a2cf0025c23e938029e023116167ec7d74583da1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 18:35:54 -0400 Subject: [PATCH 066/151] add automatically generated API this is a crude hack for now, and could use a better table of contents but at least we have some way of linking and showing the different internal functions the next phase here is obviously to document that API through the addition of docstrings. a static api.rst could also be easier to read, but maybe that could go through some docstrings as well, to be tested --- docs/Makefile | 17 ++++++++++++++++- docs/conf.py | 2 +- docs/index.rst | 1 + 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index 1f3f7d76c..133080cdb 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -36,7 +36,7 @@ help: clean: -rm -rf $(BUILDDIR)/* -html: usage +html: usage api.rst $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." @@ -153,3 +153,18 @@ usage/%.rst.inc: ../borg/archiver.py @borg help $* --usage-only | sed -e 's/^/ /' >> $@ @printf "\nDescription\n~~~~~~~~~~~\n" >> $@ @borg help $* --epilog-only >> $@ + +api.rst: Makefile + @echo "auto-generating API documentation" + @echo "Borg Backup API documentation" > $@ + @echo "=============================" >> $@ + @echo "" >> $@ + @for mod in ../borg/*.pyx ../borg/*.py; do \ + if echo "$$mod" | grep -q "/_"; then \ + continue ; \ + fi ; \ + printf ".. automodule:: "; \ + echo "$$mod" | sed "s!\.\./!!;s/\.pyx\?//;s!/!.!"; \ + echo " :members:"; \ + echo " :undoc-members:"; \ + done >> $@ diff --git a/docs/conf.py b/docs/conf.py index 772d88498..eba5c841e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -218,7 +218,7 @@ latex_documents = [ # ['see "AUTHORS" file'], 1) #] -extensions = ['sphinx.ext.extlinks'] +extensions = ['sphinx.ext.extlinks', 'sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode'] extlinks = { 'issue': ('https://github.com/borgbackup/borg/issues/%s', '#'), diff --git a/docs/index.rst b/docs/index.rst index a871ef353..6a42dce0f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -16,3 +16,4 @@ Borg Documentation changes internals development + api From de54228809046d3942b5c20e2d8fbea54653aba4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 21:08:47 -0400 Subject: [PATCH 067/151] first stab at an attic-borg converter for now, just in the test suite, but will be migrated to a separate command --- borg/testsuite/convert.py | 108 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 borg/testsuite/convert.py diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py new file mode 100644 index 000000000..e201581c3 --- /dev/null +++ b/borg/testsuite/convert.py @@ -0,0 +1,108 @@ +import binascii +import os +import pytest +import shutil +import tempfile + +import attic.repository + +from ..helpers import IntegrityError +from ..repository import Repository, MAGIC +from . import BaseTestCase + +class NotImplementedException(Exception): + pass + +class ConversionTestCase(BaseTestCase): + + def open(self, path, repo_type = Repository, create=False): + return repo_type(os.path.join(path, 'repository'), create = create) + + def setUp(self): + self.tmppath = tempfile.mkdtemp() + self.attic_repo = self.open(self.tmppath, + repo_type = attic.repository.Repository, + create = True) + # throw some stuff in that repo, copied from `RepositoryTestCase.test1`_ + for x in range(100): + self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + self.attic_repo.close() + + def test_convert(self): + self.repository = self.open(self.tmppath) + # check should fail because of magic number + assert not self.repository.check() # can't check raises() because check() handles the error + self.repository.close() + self.convert() + self.repository = self.open(self.tmppath) + assert self.repository.check() # can't check raises() because check() handles the error + self.repository.close() + + def convert(self): + '''convert an attic repository to a borg repository + + those are the files that need to be converted here, from most + important to least important: segments, key files, and various + caches, the latter being optional, as they will be rebuilt if + missing.''' + self.convert_segments() + with pytest.raises(NotImplementedException): + self.convert_keyfiles() + with pytest.raises(NotImplementedException): + self.convert_cache() + + def convert_segments(self): + '''convert repository segments from attic to borg + + replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in + `$ATTIC_REPO/data/**`. + + luckily the segment length didn't change so we can just + replace the 8 first bytes of all regular files in there. + + `Repository.segment_iterator()` could be used here.''' + self.repository = self.open(self.tmppath) + segs = [ filename for i, filename in self.repository.io.segment_iterator() ] + self.repository.close() + for filename in segs: + print("converting segment %s..." % filename) + with open(filename, 'r+b') as segment: + segment.seek(0) + segment.write(MAGIC) + + def convert_keyfiles(self): + '''convert key files from attic to borg + + replacement pattern is `s/ATTIC KEY/BORG_KEY/` in + `get_keys_dir()`, that is `$ATTIC_KEYS_DIR` or + `$HOME/.attic/keys`, and moved to `$BORG_KEYS_DIR` or + `$HOME/.borg/keys`. + + the keyfiles are loaded by `KeyfileKey.find_key_file()`. that + finds the keys with the right identifier for the repo, no need + to decrypt to convert. will need to rewrite the whole key file + because magic number length changed.''' + raise NotImplementedException('not implemented') + + def convert_cache(self): + '''convert caches from attic to borg + + those are all hash indexes, so we need to + `s/ATTICIDX/BORG_IDX/` in a few locations: + + * the repository index (in `$ATTIC_REPO/index.%d`, where `%d` + is the `Repository.get_index_transaction_id()`), which we + should probably update, with a lock, see + `Repository.open()`, which i'm not sure we should use + because it may write data on `Repository.close()`... + + * the `files` and `chunks` cache (in + `$HOME/.cache/attic//`), which we could just drop, + but if we'd want to convert, we could open it with the + `Cache.open()`, edit in place and then `Cache.close()` to + make sure we have locking right + ''' + raise NotImplementedException('not implemented') + + def tearDown(self): + shutil.rmtree(self.tmppath) From 9ab1e1961e8acf29b17b1acedc62b4f717b1fd65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 22:23:11 -0400 Subject: [PATCH 068/151] keyfile conversion code --- borg/testsuite/convert.py | 84 ++++++++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 15 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index e201581c3..ba3af8ee6 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -5,16 +5,27 @@ import shutil import tempfile import attic.repository +import attic.key +import attic.helpers -from ..helpers import IntegrityError +from ..helpers import IntegrityError, get_keys_dir from ..repository import Repository, MAGIC +from ..key import KeyfileKey, KeyfileNotFoundError from . import BaseTestCase class NotImplementedException(Exception): pass +class AtticKeyfileKey(KeyfileKey): + '''backwards compatible Attick key file parser''' + FILE_ID = 'ATTIC KEY' + class ConversionTestCase(BaseTestCase): + class MockArgs: + def __init__(self, path): + self.repository = attic.helpers.Location(path) + def open(self, path, repo_type = Repository, create=False): return repo_type(os.path.join(path, 'repository'), create = create) @@ -26,6 +37,10 @@ class ConversionTestCase(BaseTestCase): # throw some stuff in that repo, copied from `RepositoryTestCase.test1`_ for x in range(100): self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + self.keysdir = self.MockArgs(self.tmppath) + os.environ['ATTIC_KEYS_DIR'] = self.tmppath + os.environ['ATTIC_PASSPHRASE'] = 'test' + self.key = attic.key.KeyfileKey.create(self.attic_repo, self.keysdir) self.attic_repo.close() def test_convert(self): @@ -33,9 +48,15 @@ class ConversionTestCase(BaseTestCase): # check should fail because of magic number assert not self.repository.check() # can't check raises() because check() handles the error self.repository.close() + os.environ['BORG_KEYS_DIR'] = self.tmppath self.convert() + # check that the new keyfile is alright + keyfile = os.path.join(get_keys_dir(), + os.path.basename(self.key.path)) + with open(keyfile, 'r') as f: + assert f.read().startswith(KeyfileKey.FILE_ID) self.repository = self.open(self.tmppath) - assert self.repository.check() # can't check raises() because check() handles the error + assert self.repository.check() self.repository.close() def convert(self): @@ -45,32 +66,52 @@ class ConversionTestCase(BaseTestCase): important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if missing.''' - self.convert_segments() - with pytest.raises(NotImplementedException): - self.convert_keyfiles() + self.repository = self.open(self.tmppath) + segments = [ filename for i, filename in self.repository.io.segment_iterator() ] + try: + keyfile = self.find_attic_keyfile() + except KeyfileNotFoundError: + print("no key file found for repository, not converting") + else: + self.convert_keyfiles(keyfile) + self.repository.close() + self.convert_segments(segments) with pytest.raises(NotImplementedException): self.convert_cache() - def convert_segments(self): + def convert_segments(self, segments): '''convert repository segments from attic to borg replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in `$ATTIC_REPO/data/**`. luckily the segment length didn't change so we can just - replace the 8 first bytes of all regular files in there. - - `Repository.segment_iterator()` could be used here.''' - self.repository = self.open(self.tmppath) - segs = [ filename for i, filename in self.repository.io.segment_iterator() ] - self.repository.close() - for filename in segs: + replace the 8 first bytes of all regular files in there.''' + for filename in segments: print("converting segment %s..." % filename) with open(filename, 'r+b') as segment: segment.seek(0) segment.write(MAGIC) - def convert_keyfiles(self): + def find_attic_keyfile(self): + '''find the attic keyfiles + + this is expected to look into $HOME/.attic/keys or + $ATTIC_KEYS_DIR for key files matching the given Borg + repository. + + it is expected to raise an exception (KeyfileNotFoundError) if + no key is found. whether that exception is from Borg or Attic + is unclear. + + this is split in a separate function in case we want to + reimplement the attic code here. + ''' + self.repository._location = attic.helpers.Location(self.tmppath) + return attic.key.KeyfileKey().find_key_file(self.repository) + + def convert_keyfiles(self, keyfile): + '''convert key files from attic to borg replacement pattern is `s/ATTIC KEY/BORG_KEY/` in @@ -82,7 +123,20 @@ class ConversionTestCase(BaseTestCase): finds the keys with the right identifier for the repo, no need to decrypt to convert. will need to rewrite the whole key file because magic number length changed.''' - raise NotImplementedException('not implemented') + print("converting keyfile %s" % keyfile) + with open(keyfile, 'r') as f: + data = f.read() + data = data.replace(AtticKeyfileKey.FILE_ID, + KeyfileKey.FILE_ID, + 1) + keyfile = os.path.join(get_keys_dir(), + os.path.basename(keyfile)) + print("writing borg keyfile to %s" % keyfile) + with open(keyfile, 'w') as f: + f.write(data) + with open(keyfile, 'r') as f: + data = f.read() + assert data.startswith(KeyfileKey.FILE_ID) def convert_cache(self): '''convert caches from attic to borg From e88a994c8a2bd269d23a1fb4307dd2d9923c5668 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 22:40:46 -0400 Subject: [PATCH 069/151] reshuffle and document --- borg/testsuite/convert.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index ba3af8ee6..52564134d 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -37,10 +37,17 @@ class ConversionTestCase(BaseTestCase): # throw some stuff in that repo, copied from `RepositoryTestCase.test1`_ for x in range(100): self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') - self.keysdir = self.MockArgs(self.tmppath) + + # we use the repo dir for the created keyfile, because we do + # not want to clutter existing keyfiles os.environ['ATTIC_KEYS_DIR'] = self.tmppath + + # we use the same directory for the converted files, which + # will clutter the previously created one, which we don't care + # about anyways. in real runs, the original key will be retained. + os.environ['BORG_KEYS_DIR'] = self.tmppath os.environ['ATTIC_PASSPHRASE'] = 'test' - self.key = attic.key.KeyfileKey.create(self.attic_repo, self.keysdir) + self.key = attic.key.KeyfileKey.create(self.attic_repo, self.MockArgs(self.tmppath)) self.attic_repo.close() def test_convert(self): @@ -48,7 +55,6 @@ class ConversionTestCase(BaseTestCase): # check should fail because of magic number assert not self.repository.check() # can't check raises() because check() handles the error self.repository.close() - os.environ['BORG_KEYS_DIR'] = self.tmppath self.convert() # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), @@ -96,6 +102,9 @@ class ConversionTestCase(BaseTestCase): def find_attic_keyfile(self): '''find the attic keyfiles + the keyfiles are loaded by `KeyfileKey.find_key_file()`. that + finds the keys with the right identifier for the repo + this is expected to look into $HOME/.attic/keys or $ATTIC_KEYS_DIR for key files matching the given Borg repository. @@ -119,10 +128,10 @@ class ConversionTestCase(BaseTestCase): `$HOME/.attic/keys`, and moved to `$BORG_KEYS_DIR` or `$HOME/.borg/keys`. - the keyfiles are loaded by `KeyfileKey.find_key_file()`. that - finds the keys with the right identifier for the repo, no need - to decrypt to convert. will need to rewrite the whole key file - because magic number length changed.''' + no need to decrypt to convert. we need to rewrite the whole + key file because magic number length changed, but that's not a + problem because the keyfiles are small (compared to, say, + all the segments).''' print("converting keyfile %s" % keyfile) with open(keyfile, 'r') as f: data = f.read() From 2d1988179e5149bb7bc29d589a5fa6887b00e76b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 22:41:38 -0400 Subject: [PATCH 070/151] some debugging code --- borg/testsuite/convert.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 52564134d..c95ffa793 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -53,6 +53,7 @@ class ConversionTestCase(BaseTestCase): def test_convert(self): self.repository = self.open(self.tmppath) # check should fail because of magic number + print("this will show an error, it is expected") assert not self.repository.check() # can't check raises() because check() handles the error self.repository.close() self.convert() @@ -72,7 +73,9 @@ class ConversionTestCase(BaseTestCase): important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if missing.''' + print("opening attic repository with borg") self.repository = self.open(self.tmppath) + print("reading segments from attic repository using borg") segments = [ filename for i, filename in self.repository.io.segment_iterator() ] try: keyfile = self.find_attic_keyfile() @@ -94,7 +97,7 @@ class ConversionTestCase(BaseTestCase): luckily the segment length didn't change so we can just replace the 8 first bytes of all regular files in there.''' for filename in segments: - print("converting segment %s..." % filename) + print("converting segment %s in place" % filename) with open(filename, 'r+b') as segment: segment.seek(0) segment.write(MAGIC) From c7af4c7f1d1f8d5380bce60c27d743f11e0dc81d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 22:43:08 -0400 Subject: [PATCH 071/151] more debug --- borg/testsuite/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index c95ffa793..5ef88893b 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -80,7 +80,7 @@ class ConversionTestCase(BaseTestCase): try: keyfile = self.find_attic_keyfile() except KeyfileNotFoundError: - print("no key file found for repository, not converting") + print("no key file found for repository") else: self.convert_keyfiles(keyfile) self.repository.close() From 312c3cf738318ec0dba0383a23159f17fa0aa1e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 22:53:58 -0400 Subject: [PATCH 072/151] rewrite converter to avoid using attic code the unit tests themselves still use attic to generate an attic repository for testing, but the converter code should now be standalone --- borg/testsuite/convert.py | 42 +++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 5ef88893b..44b0a3f39 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -1,4 +1,4 @@ -import binascii +from binascii import hexlify import os import pytest import shutil @@ -20,6 +20,37 @@ class AtticKeyfileKey(KeyfileKey): '''backwards compatible Attick key file parser''' FILE_ID = 'ATTIC KEY' + # verbatim copy from attic + @staticmethod + def get_keys_dir(): + """Determine where to repository keys and cache""" + return os.environ.get('ATTIC_KEYS_DIR', + os.path.join(os.path.expanduser('~'), '.attic', 'keys')) + + @classmethod + def find_key_file(cls, repository): + '''copy of attic's `find_key_file`_ + + this has two small modifications: + + 1. it uses the above `get_keys_dir`_ instead of the global one, + assumed to be borg's + + 2. it uses `repository.path`_ instead of + `repository._location.canonical_path`_ because we can't + assume the repository has been opened by the archiver yet + ''' + get_keys_dir = cls.get_keys_dir + id = hexlify(repository.id).decode('ascii') + keys_dir = get_keys_dir() + for name in os.listdir(keys_dir): + filename = os.path.join(keys_dir, name) + with open(filename, 'r') as fd: + line = fd.readline().strip() + if line and line.startswith(cls.FILE_ID) and line[10:] == id: + return filename + raise KeyfileNotFoundError(repository.path, get_keys_dir()) + class ConversionTestCase(BaseTestCase): class MockArgs: @@ -116,11 +147,10 @@ class ConversionTestCase(BaseTestCase): no key is found. whether that exception is from Borg or Attic is unclear. - this is split in a separate function in case we want to - reimplement the attic code here. - ''' - self.repository._location = attic.helpers.Location(self.tmppath) - return attic.key.KeyfileKey().find_key_file(self.repository) + this is split in a separate function in case we want to use + the attic code here directly, instead of our local + implementation.''' + return AtticKeyfileKey.find_key_file(self.repository) def convert_keyfiles(self, keyfile): From aa25a217a46b678b14ddbd08d3ec66e2cc11b349 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:01:03 -0400 Subject: [PATCH 073/151] move conversion code to a separate class for clarity --- borg/testsuite/convert.py | 86 +++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 44b0a3f39..f4b8bd3db 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -16,41 +16,6 @@ from . import BaseTestCase class NotImplementedException(Exception): pass -class AtticKeyfileKey(KeyfileKey): - '''backwards compatible Attick key file parser''' - FILE_ID = 'ATTIC KEY' - - # verbatim copy from attic - @staticmethod - def get_keys_dir(): - """Determine where to repository keys and cache""" - return os.environ.get('ATTIC_KEYS_DIR', - os.path.join(os.path.expanduser('~'), '.attic', 'keys')) - - @classmethod - def find_key_file(cls, repository): - '''copy of attic's `find_key_file`_ - - this has two small modifications: - - 1. it uses the above `get_keys_dir`_ instead of the global one, - assumed to be borg's - - 2. it uses `repository.path`_ instead of - `repository._location.canonical_path`_ because we can't - assume the repository has been opened by the archiver yet - ''' - get_keys_dir = cls.get_keys_dir - id = hexlify(repository.id).decode('ascii') - keys_dir = get_keys_dir() - for name in os.listdir(keys_dir): - filename = os.path.join(keys_dir, name) - with open(filename, 'r') as fd: - line = fd.readline().strip() - if line and line.startswith(cls.FILE_ID) and line[10:] == id: - return filename - raise KeyfileNotFoundError(repository.path, get_keys_dir()) - class ConversionTestCase(BaseTestCase): class MockArgs: @@ -81,13 +46,17 @@ class ConversionTestCase(BaseTestCase): self.key = attic.key.KeyfileKey.create(self.attic_repo, self.MockArgs(self.tmppath)) self.attic_repo.close() + def tearDown(self): + shutil.rmtree(self.tmppath) + def test_convert(self): self.repository = self.open(self.tmppath) # check should fail because of magic number print("this will show an error, it is expected") assert not self.repository.check() # can't check raises() because check() handles the error self.repository.close() - self.convert() + print("opening attic repository with borg and converting") + self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert() # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), os.path.basename(self.key.path)) @@ -97,6 +66,7 @@ class ConversionTestCase(BaseTestCase): assert self.repository.check() self.repository.close() +class AtticRepositoryConverter(Repository): def convert(self): '''convert an attic repository to a borg repository @@ -104,17 +74,15 @@ class ConversionTestCase(BaseTestCase): important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if missing.''' - print("opening attic repository with borg") - self.repository = self.open(self.tmppath) print("reading segments from attic repository using borg") - segments = [ filename for i, filename in self.repository.io.segment_iterator() ] + segments = [ filename for i, filename in self.io.segment_iterator() ] try: keyfile = self.find_attic_keyfile() except KeyfileNotFoundError: print("no key file found for repository") else: self.convert_keyfiles(keyfile) - self.repository.close() + self.close() self.convert_segments(segments) with pytest.raises(NotImplementedException): self.convert_cache() @@ -150,7 +118,7 @@ class ConversionTestCase(BaseTestCase): this is split in a separate function in case we want to use the attic code here directly, instead of our local implementation.''' - return AtticKeyfileKey.find_key_file(self.repository) + return AtticKeyfileKey.find_key_file(self) def convert_keyfiles(self, keyfile): @@ -200,5 +168,37 @@ class ConversionTestCase(BaseTestCase): ''' raise NotImplementedException('not implemented') - def tearDown(self): - shutil.rmtree(self.tmppath) +class AtticKeyfileKey(KeyfileKey): + '''backwards compatible Attick key file parser''' + FILE_ID = 'ATTIC KEY' + + # verbatim copy from attic + @staticmethod + def get_keys_dir(): + """Determine where to repository keys and cache""" + return os.environ.get('ATTIC_KEYS_DIR', + os.path.join(os.path.expanduser('~'), '.attic', 'keys')) + + @classmethod + def find_key_file(cls, repository): + '''copy of attic's `find_key_file`_ + + this has two small modifications: + + 1. it uses the above `get_keys_dir`_ instead of the global one, + assumed to be borg's + + 2. it uses `repository.path`_ instead of + `repository._location.canonical_path`_ because we can't + assume the repository has been opened by the archiver yet + ''' + get_keys_dir = cls.get_keys_dir + id = hexlify(repository.id).decode('ascii') + keys_dir = get_keys_dir() + for name in os.listdir(keys_dir): + filename = os.path.join(keys_dir, name) + with open(filename, 'r') as fd: + line = fd.readline().strip() + if line and line.startswith(cls.FILE_ID) and line[10:] == id: + return filename + raise KeyfileNotFoundError(repository.path, get_keys_dir()) From 5a1680397c571ad2f42e731d4835b7f2f356aa55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:02:21 -0400 Subject: [PATCH 074/151] remove needless use of self --- borg/testsuite/convert.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index f4b8bd3db..9d5f95142 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -87,7 +87,8 @@ class AtticRepositoryConverter(Repository): with pytest.raises(NotImplementedException): self.convert_cache() - def convert_segments(self, segments): + @staticmethod + def convert_segments(segments): '''convert repository segments from attic to borg replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in @@ -120,7 +121,8 @@ class AtticRepositoryConverter(Repository): implementation.''' return AtticKeyfileKey.find_key_file(self) - def convert_keyfiles(self, keyfile): + @staticmethod + def convert_keyfiles(keyfile): '''convert key files from attic to borg From c30df4e033834c4d96be67fe4bcedb75014dc115 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:18:03 -0400 Subject: [PATCH 075/151] move converter code out of test suite --- borg/converter.py | 147 +++++++++++++++++++++++++++++++++++++ borg/testsuite/convert.py | 151 ++------------------------------------ 2 files changed, 152 insertions(+), 146 deletions(-) create mode 100644 borg/converter.py diff --git a/borg/converter.py b/borg/converter.py new file mode 100644 index 000000000..b558af883 --- /dev/null +++ b/borg/converter.py @@ -0,0 +1,147 @@ +from binascii import hexlify +import os + +from .helpers import IntegrityError, get_keys_dir +from .repository import Repository, MAGIC +from .key import KeyfileKey, KeyfileNotFoundError + +class NotImplementedException(Exception): + pass + +class AtticRepositoryConverter(Repository): + def convert(self): + '''convert an attic repository to a borg repository + + those are the files that need to be converted here, from most + important to least important: segments, key files, and various + caches, the latter being optional, as they will be rebuilt if + missing.''' + print("reading segments from attic repository using borg") + segments = [ filename for i, filename in self.io.segment_iterator() ] + try: + keyfile = self.find_attic_keyfile() + except KeyfileNotFoundError: + print("no key file found for repository") + else: + self.convert_keyfiles(keyfile) + self.close() + self.convert_segments(segments) + self.convert_cache() + + @staticmethod + def convert_segments(segments): + '''convert repository segments from attic to borg + + replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in + `$ATTIC_REPO/data/**`. + + luckily the segment length didn't change so we can just + replace the 8 first bytes of all regular files in there.''' + for filename in segments: + print("converting segment %s in place" % filename) + with open(filename, 'r+b') as segment: + segment.seek(0) + segment.write(MAGIC) + + def find_attic_keyfile(self): + '''find the attic keyfiles + + the keyfiles are loaded by `KeyfileKey.find_key_file()`. that + finds the keys with the right identifier for the repo + + this is expected to look into $HOME/.attic/keys or + $ATTIC_KEYS_DIR for key files matching the given Borg + repository. + + it is expected to raise an exception (KeyfileNotFoundError) if + no key is found. whether that exception is from Borg or Attic + is unclear. + + this is split in a separate function in case we want to use + the attic code here directly, instead of our local + implementation.''' + return AtticKeyfileKey.find_key_file(self) + + @staticmethod + def convert_keyfiles(keyfile): + + '''convert key files from attic to borg + + replacement pattern is `s/ATTIC KEY/BORG_KEY/` in + `get_keys_dir()`, that is `$ATTIC_KEYS_DIR` or + `$HOME/.attic/keys`, and moved to `$BORG_KEYS_DIR` or + `$HOME/.borg/keys`. + + no need to decrypt to convert. we need to rewrite the whole + key file because magic number length changed, but that's not a + problem because the keyfiles are small (compared to, say, + all the segments).''' + print("converting keyfile %s" % keyfile) + with open(keyfile, 'r') as f: + data = f.read() + data = data.replace(AtticKeyfileKey.FILE_ID, + KeyfileKey.FILE_ID, + 1) + keyfile = os.path.join(get_keys_dir(), + os.path.basename(keyfile)) + print("writing borg keyfile to %s" % keyfile) + with open(keyfile, 'w') as f: + f.write(data) + with open(keyfile, 'r') as f: + data = f.read() + assert data.startswith(KeyfileKey.FILE_ID) + + def convert_cache(self): + '''convert caches from attic to borg + + those are all hash indexes, so we need to + `s/ATTICIDX/BORG_IDX/` in a few locations: + + * the repository index (in `$ATTIC_REPO/index.%d`, where `%d` + is the `Repository.get_index_transaction_id()`), which we + should probably update, with a lock, see + `Repository.open()`, which i'm not sure we should use + because it may write data on `Repository.close()`... + + * the `files` and `chunks` cache (in + `$HOME/.cache/attic//`), which we could just drop, + but if we'd want to convert, we could open it with the + `Cache.open()`, edit in place and then `Cache.close()` to + make sure we have locking right + ''' + raise NotImplementedException('not implemented') + +class AtticKeyfileKey(KeyfileKey): + '''backwards compatible Attick key file parser''' + FILE_ID = 'ATTIC KEY' + + # verbatim copy from attic + @staticmethod + def get_keys_dir(): + """Determine where to repository keys and cache""" + return os.environ.get('ATTIC_KEYS_DIR', + os.path.join(os.path.expanduser('~'), '.attic', 'keys')) + + @classmethod + def find_key_file(cls, repository): + '''copy of attic's `find_key_file`_ + + this has two small modifications: + + 1. it uses the above `get_keys_dir`_ instead of the global one, + assumed to be borg's + + 2. it uses `repository.path`_ instead of + `repository._location.canonical_path`_ because we can't + assume the repository has been opened by the archiver yet + ''' + get_keys_dir = cls.get_keys_dir + id = hexlify(repository.id).decode('ascii') + keys_dir = get_keys_dir() + for name in os.listdir(keys_dir): + filename = os.path.join(keys_dir, name) + with open(filename, 'r') as fd: + line = fd.readline().strip() + if line and line.startswith(cls.FILE_ID) and line[10:] == id: + return filename + raise KeyfileNotFoundError(repository.path, get_keys_dir()) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 9d5f95142..74196063e 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -1,4 +1,3 @@ -from binascii import hexlify import os import pytest import shutil @@ -8,14 +7,12 @@ import attic.repository import attic.key import attic.helpers -from ..helpers import IntegrityError, get_keys_dir +from ..converter import AtticRepositoryConverter, NotImplementedException +from ..helpers import get_keys_dir +from ..key import KeyfileKey from ..repository import Repository, MAGIC -from ..key import KeyfileKey, KeyfileNotFoundError from . import BaseTestCase -class NotImplementedException(Exception): - pass - class ConversionTestCase(BaseTestCase): class MockArgs: @@ -56,7 +53,8 @@ class ConversionTestCase(BaseTestCase): assert not self.repository.check() # can't check raises() because check() handles the error self.repository.close() print("opening attic repository with borg and converting") - self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert() + with pytest.raises(NotImplementedException): + self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert() # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), os.path.basename(self.key.path)) @@ -65,142 +63,3 @@ class ConversionTestCase(BaseTestCase): self.repository = self.open(self.tmppath) assert self.repository.check() self.repository.close() - -class AtticRepositoryConverter(Repository): - def convert(self): - '''convert an attic repository to a borg repository - - those are the files that need to be converted here, from most - important to least important: segments, key files, and various - caches, the latter being optional, as they will be rebuilt if - missing.''' - print("reading segments from attic repository using borg") - segments = [ filename for i, filename in self.io.segment_iterator() ] - try: - keyfile = self.find_attic_keyfile() - except KeyfileNotFoundError: - print("no key file found for repository") - else: - self.convert_keyfiles(keyfile) - self.close() - self.convert_segments(segments) - with pytest.raises(NotImplementedException): - self.convert_cache() - - @staticmethod - def convert_segments(segments): - '''convert repository segments from attic to borg - - replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in - `$ATTIC_REPO/data/**`. - - luckily the segment length didn't change so we can just - replace the 8 first bytes of all regular files in there.''' - for filename in segments: - print("converting segment %s in place" % filename) - with open(filename, 'r+b') as segment: - segment.seek(0) - segment.write(MAGIC) - - def find_attic_keyfile(self): - '''find the attic keyfiles - - the keyfiles are loaded by `KeyfileKey.find_key_file()`. that - finds the keys with the right identifier for the repo - - this is expected to look into $HOME/.attic/keys or - $ATTIC_KEYS_DIR for key files matching the given Borg - repository. - - it is expected to raise an exception (KeyfileNotFoundError) if - no key is found. whether that exception is from Borg or Attic - is unclear. - - this is split in a separate function in case we want to use - the attic code here directly, instead of our local - implementation.''' - return AtticKeyfileKey.find_key_file(self) - - @staticmethod - def convert_keyfiles(keyfile): - - '''convert key files from attic to borg - - replacement pattern is `s/ATTIC KEY/BORG_KEY/` in - `get_keys_dir()`, that is `$ATTIC_KEYS_DIR` or - `$HOME/.attic/keys`, and moved to `$BORG_KEYS_DIR` or - `$HOME/.borg/keys`. - - no need to decrypt to convert. we need to rewrite the whole - key file because magic number length changed, but that's not a - problem because the keyfiles are small (compared to, say, - all the segments).''' - print("converting keyfile %s" % keyfile) - with open(keyfile, 'r') as f: - data = f.read() - data = data.replace(AtticKeyfileKey.FILE_ID, - KeyfileKey.FILE_ID, - 1) - keyfile = os.path.join(get_keys_dir(), - os.path.basename(keyfile)) - print("writing borg keyfile to %s" % keyfile) - with open(keyfile, 'w') as f: - f.write(data) - with open(keyfile, 'r') as f: - data = f.read() - assert data.startswith(KeyfileKey.FILE_ID) - - def convert_cache(self): - '''convert caches from attic to borg - - those are all hash indexes, so we need to - `s/ATTICIDX/BORG_IDX/` in a few locations: - - * the repository index (in `$ATTIC_REPO/index.%d`, where `%d` - is the `Repository.get_index_transaction_id()`), which we - should probably update, with a lock, see - `Repository.open()`, which i'm not sure we should use - because it may write data on `Repository.close()`... - - * the `files` and `chunks` cache (in - `$HOME/.cache/attic//`), which we could just drop, - but if we'd want to convert, we could open it with the - `Cache.open()`, edit in place and then `Cache.close()` to - make sure we have locking right - ''' - raise NotImplementedException('not implemented') - -class AtticKeyfileKey(KeyfileKey): - '''backwards compatible Attick key file parser''' - FILE_ID = 'ATTIC KEY' - - # verbatim copy from attic - @staticmethod - def get_keys_dir(): - """Determine where to repository keys and cache""" - return os.environ.get('ATTIC_KEYS_DIR', - os.path.join(os.path.expanduser('~'), '.attic', 'keys')) - - @classmethod - def find_key_file(cls, repository): - '''copy of attic's `find_key_file`_ - - this has two small modifications: - - 1. it uses the above `get_keys_dir`_ instead of the global one, - assumed to be borg's - - 2. it uses `repository.path`_ instead of - `repository._location.canonical_path`_ because we can't - assume the repository has been opened by the archiver yet - ''' - get_keys_dir = cls.get_keys_dir - id = hexlify(repository.id).decode('ascii') - keys_dir = get_keys_dir() - for name in os.listdir(keys_dir): - filename = os.path.join(keys_dir, name) - with open(filename, 'r') as fd: - line = fd.readline().strip() - if line and line.startswith(cls.FILE_ID) and line[10:] == id: - return filename - raise KeyfileNotFoundError(repository.path, get_keys_dir()) From 77ed6dec2ba3dcbbec4b4027ffd3313fa03f6905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:27:55 -0400 Subject: [PATCH 076/151] skip converter tests if attic isn't installed --- borg/testsuite/convert.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 74196063e..29b7c49f0 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -3,9 +3,14 @@ import pytest import shutil import tempfile -import attic.repository -import attic.key -import attic.helpers +try: + import attic.repository + import attic.key + import attic.helpers +except ImportError: + attic = None +pytestmark = pytest.mark.skipif(attic is None, + reason = 'cannot find an attic install') from ..converter import AtticRepositoryConverter, NotImplementedException from ..helpers import get_keys_dir From e5543657658b46ef48248697531ca447ec86bcda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:28:07 -0400 Subject: [PATCH 077/151] remove unused import --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index b558af883..a416f3e79 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -1,7 +1,7 @@ from binascii import hexlify import os -from .helpers import IntegrityError, get_keys_dir +from .helpers import get_keys_dir from .repository import Repository, MAGIC from .key import KeyfileKey, KeyfileNotFoundError From f35e8e17f2b4b4379bb250d3b495c6c59f734cb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:50:35 -0400 Subject: [PATCH 078/151] add dry run support to converter --- borg/converter.py | 29 ++++++++++++++++------------- borg/testsuite/convert.py | 2 +- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index a416f3e79..d949fd31a 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -9,7 +9,7 @@ class NotImplementedException(Exception): pass class AtticRepositoryConverter(Repository): - def convert(self): + def convert(self, dryrun=True): '''convert an attic repository to a borg repository those are the files that need to be converted here, from most @@ -23,13 +23,13 @@ class AtticRepositoryConverter(Repository): except KeyfileNotFoundError: print("no key file found for repository") else: - self.convert_keyfiles(keyfile) + self.convert_keyfiles(keyfile, dryrun) self.close() - self.convert_segments(segments) - self.convert_cache() + self.convert_segments(segments, dryrun) + self.convert_cache(dryrun) @staticmethod - def convert_segments(segments): + def convert_segments(segments, dryrun): '''convert repository segments from attic to borg replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in @@ -39,6 +39,8 @@ class AtticRepositoryConverter(Repository): replace the 8 first bytes of all regular files in there.''' for filename in segments: print("converting segment %s in place" % filename) + if dryrun: + continue with open(filename, 'r+b') as segment: segment.seek(0) segment.write(MAGIC) @@ -63,7 +65,7 @@ class AtticRepositoryConverter(Repository): return AtticKeyfileKey.find_key_file(self) @staticmethod - def convert_keyfiles(keyfile): + def convert_keyfiles(keyfile, dryrun): '''convert key files from attic to borg @@ -85,13 +87,14 @@ class AtticRepositoryConverter(Repository): keyfile = os.path.join(get_keys_dir(), os.path.basename(keyfile)) print("writing borg keyfile to %s" % keyfile) - with open(keyfile, 'w') as f: - f.write(data) - with open(keyfile, 'r') as f: - data = f.read() - assert data.startswith(KeyfileKey.FILE_ID) + if not dryrun: + with open(keyfile, 'w') as f: + f.write(data) + with open(keyfile, 'r') as f: + data = f.read() + assert data.startswith(KeyfileKey.FILE_ID) - def convert_cache(self): + def convert_cache(self, dryrun): '''convert caches from attic to borg those are all hash indexes, so we need to @@ -109,7 +112,7 @@ class AtticRepositoryConverter(Repository): `Cache.open()`, edit in place and then `Cache.close()` to make sure we have locking right ''' - raise NotImplementedException('not implemented') + raise NotImplementedException('cache conversion not implemented, next borg backup will take longer to rebuild those caches') class AtticKeyfileKey(KeyfileKey): '''backwards compatible Attick key file parser''' diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 29b7c49f0..e708ea60d 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -59,7 +59,7 @@ class ConversionTestCase(BaseTestCase): self.repository.close() print("opening attic repository with borg and converting") with pytest.raises(NotImplementedException): - self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert() + self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert(dryrun=False) # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), os.path.basename(self.key.path)) From a5f32b0a27c076326500e5b335c6592e083dc130 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:50:46 -0400 Subject: [PATCH 079/151] add convert command --- borg/archiver.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/borg/archiver.py b/borg/archiver.py index 28f1d8a3f..2c4302b3a 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -17,6 +17,7 @@ import traceback from . import __version__ from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS from .compress import Compressor, COMPR_BUFFER +from .converter import AtticRepositoryConverter, NotImplementedException from .repository import Repository from .cache import Cache from .key import key_creator @@ -462,6 +463,15 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") stats.print_('Deleted data:', cache) return self.exit_code + def do_convert(self, parser, commands, args): + '''convert a repository from attic to borg''' + repo = AtticRepositoryConverter(os.path.join(args.repository, 'repository'), create=False) + try: + repo.convert(args.dry_run) + except NotImplementedException as e: + print("warning: %s" % e) + return self.exit_code + helptext = {} helptext['patterns'] = ''' Exclude patterns use a variant of shell pattern syntax, with '*' matching any @@ -896,6 +906,43 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") type=location_validator(archive=False), help='repository to prune') + convert_epilog = textwrap.dedent(""" + convert will convert an existing Attic repository to Borg in place. + + it will change the magic numbers in the repository's segments + to match the new Borg magic numbers. the keyfiles found in + $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and + copied to $BORG_KEYS_DIR or ~/.borg/keys. + + the cache files are *not* currently converted, which will + result in a much longer backup the first time. you can run + `borg check --repair` to rebuild those files after the + conversion. + + the conversion can IRREMEDIABLY DAMAGE YOUR REPOSITORY! Attic + will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as + the magic numbers will have changed. + + it is recommended you run this on a copy of the Attic + repository, in case something goes wrong, for example: + + cp -a attic borg + borg convert -n borg + borg convert borg + + you have been warned.""") + subparser = subparsers.add_parser('convert', parents=[common_parser], + description=self.do_convert.__doc__, + epilog=convert_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter) + subparser.set_defaults(func=self.do_convert) + subparser.add_argument('-n', '--dry-run', dest='dry_run', + default=False, action='store_true', + help='do not change repository') + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', + type=location_validator(archive=False), + help='path to the attic repository to be converted') + subparser = subparsers.add_parser('help', parents=[common_parser], description='Extra help') subparser.add_argument('--epilog-only', dest='epilog_only', From 1b29699403facffc1396c0741936df3dd8a1c8f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 00:15:12 -0400 Subject: [PATCH 080/151] cosmetic: reorder --- borg/testsuite/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index e708ea60d..d48a0e05b 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -35,6 +35,7 @@ class ConversionTestCase(BaseTestCase): # throw some stuff in that repo, copied from `RepositoryTestCase.test1`_ for x in range(100): self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + self.attic_repo.close() # we use the repo dir for the created keyfile, because we do # not want to clutter existing keyfiles @@ -46,7 +47,6 @@ class ConversionTestCase(BaseTestCase): os.environ['BORG_KEYS_DIR'] = self.tmppath os.environ['ATTIC_PASSPHRASE'] = 'test' self.key = attic.key.KeyfileKey.create(self.attic_repo, self.MockArgs(self.tmppath)) - self.attic_repo.close() def tearDown(self): shutil.rmtree(self.tmppath) From 1ba856d2b3ff3fdbbd6bc3afb25701a7bcb57c45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 00:15:25 -0400 Subject: [PATCH 081/151] refactor: group test repo subroutine --- borg/testsuite/convert.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index d48a0e05b..cb9f5ec4c 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -51,12 +51,16 @@ class ConversionTestCase(BaseTestCase): def tearDown(self): shutil.rmtree(self.tmppath) - def test_convert(self): + def check_repo(self, state = True): + if not state: + print("this will show an error, this is expected") self.repository = self.open(self.tmppath) - # check should fail because of magic number - print("this will show an error, it is expected") - assert not self.repository.check() # can't check raises() because check() handles the error + assert self.repository.check() is state # can't check raises() because check() handles the error self.repository.close() + + def test_convert(self): + # check should fail because of magic number + self.check_repo(False) print("opening attic repository with borg and converting") with pytest.raises(NotImplementedException): self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert(dryrun=False) @@ -65,6 +69,4 @@ class ConversionTestCase(BaseTestCase): os.path.basename(self.key.path)) with open(keyfile, 'r') as f: assert f.read().startswith(KeyfileKey.FILE_ID) - self.repository = self.open(self.tmppath) - assert self.repository.check() - self.repository.close() + self.check_repo() From bcd94b96e0e2a7932e75557403e08d70d2e7fc94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 00:32:34 -0400 Subject: [PATCH 082/151] split up keyfile, segments and overall testing in converter --- borg/testsuite/convert.py | 53 ++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index cb9f5ec4c..172b308ec 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -12,7 +12,7 @@ except ImportError: pytestmark = pytest.mark.skipif(attic is None, reason = 'cannot find an attic install') -from ..converter import AtticRepositoryConverter, NotImplementedException +from ..converter import AtticRepositoryConverter, NotImplementedException, AtticKeyfileKey from ..helpers import get_keys_dir from ..key import KeyfileKey from ..repository import Repository, MAGIC @@ -20,10 +20,6 @@ from . import BaseTestCase class ConversionTestCase(BaseTestCase): - class MockArgs: - def __init__(self, path): - self.repository = attic.helpers.Location(path) - def open(self, path, repo_type = Repository, create=False): return repo_type(os.path.join(path, 'repository'), create = create) @@ -37,6 +33,34 @@ class ConversionTestCase(BaseTestCase): self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') self.attic_repo.close() + def tearDown(self): + shutil.rmtree(self.tmppath) + + def check_repo(self, state = True): + if not state: + print("this will show an error, this is expected") + repository = self.open(self.tmppath) + assert repository.check() is state # can't check raises() because check() handles the error + repository.close() + + def test_convert_segments(self): + # check should fail because of magic number + self.check_repo(False) + print("opening attic repository with borg and converting") + repo = self.open(self.tmppath, repo_type = AtticRepositoryConverter) + segments = [ filename for i, filename in repo.io.segment_iterator() ] + repo.close() + repo.convert_segments(segments, dryrun=False) + self.check_repo() + +class EncryptedConversionTestCase(ConversionTestCase): + class MockArgs: + def __init__(self, path): + self.repository = attic.helpers.Location(path) + + def setUp(self): + super().setUp() + # we use the repo dir for the created keyfile, because we do # not want to clutter existing keyfiles os.environ['ATTIC_KEYS_DIR'] = self.tmppath @@ -48,17 +72,18 @@ class ConversionTestCase(BaseTestCase): os.environ['ATTIC_PASSPHRASE'] = 'test' self.key = attic.key.KeyfileKey.create(self.attic_repo, self.MockArgs(self.tmppath)) - def tearDown(self): - shutil.rmtree(self.tmppath) + def test_keys(self): + repository = self.open(self.tmppath, repo_type = AtticRepositoryConverter) + keyfile = AtticKeyfileKey.find_key_file(repository) + AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) - def check_repo(self, state = True): - if not state: - print("this will show an error, this is expected") - self.repository = self.open(self.tmppath) - assert self.repository.check() is state # can't check raises() because check() handles the error - self.repository.close() + # check that the new keyfile is alright + keyfile = os.path.join(get_keys_dir(), + os.path.basename(self.key.path)) + with open(keyfile, 'r') as f: + assert f.read().startswith(KeyfileKey.FILE_ID) - def test_convert(self): + def test_convert_all(self): # check should fail because of magic number self.check_repo(False) print("opening attic repository with borg and converting") From c99082922553114c981109178e4b3d5c8c13f195 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 00:51:19 -0400 Subject: [PATCH 083/151] add attic dependency for build as a separate factor this way we don't depend on attic for regular build, but we can still see proper test coverage --- tox.ini | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index d177c121a..8fd697657 100644 --- a/tox.ini +++ b/tox.ini @@ -2,13 +2,15 @@ # fakeroot -u tox --recreate [tox] -envlist = py32, py33, py34, py35 +envlist = py{32,33,34,35}{,-attic} [testenv] # Change dir to avoid import problem for cython code. The directory does # not really matter, should be just different from the toplevel dir. changedir = {toxworkdir} -deps = -rrequirements.d/development.txt +deps = + -rrequirements.d/development.txt + attic: attic commands = py.test --cov=borg --pyargs {posargs:borg.testsuite} # fakeroot -u needs some env vars: passenv = * From a81755f1a98f071f668287e49a32964c92466d5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:34:18 -0400 Subject: [PATCH 084/151] use triple-double-quoted instead of single-double-quoted at the request of TW, see #231 --- borg/archiver.py | 2 +- borg/converter.py | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 2c4302b3a..696291f1c 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -464,7 +464,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") return self.exit_code def do_convert(self, parser, commands, args): - '''convert a repository from attic to borg''' + """convert a repository from attic to borg""" repo = AtticRepositoryConverter(os.path.join(args.repository, 'repository'), create=False) try: repo.convert(args.dry_run) diff --git a/borg/converter.py b/borg/converter.py index d949fd31a..66606095f 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -10,12 +10,12 @@ class NotImplementedException(Exception): class AtticRepositoryConverter(Repository): def convert(self, dryrun=True): - '''convert an attic repository to a borg repository + """convert an attic repository to a borg repository those are the files that need to be converted here, from most important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if - missing.''' + missing.""" print("reading segments from attic repository using borg") segments = [ filename for i, filename in self.io.segment_iterator() ] try: @@ -30,13 +30,13 @@ class AtticRepositoryConverter(Repository): @staticmethod def convert_segments(segments, dryrun): - '''convert repository segments from attic to borg + """convert repository segments from attic to borg replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in `$ATTIC_REPO/data/**`. luckily the segment length didn't change so we can just - replace the 8 first bytes of all regular files in there.''' + replace the 8 first bytes of all regular files in there.""" for filename in segments: print("converting segment %s in place" % filename) if dryrun: @@ -46,7 +46,7 @@ class AtticRepositoryConverter(Repository): segment.write(MAGIC) def find_attic_keyfile(self): - '''find the attic keyfiles + """find the attic keyfiles the keyfiles are loaded by `KeyfileKey.find_key_file()`. that finds the keys with the right identifier for the repo @@ -61,13 +61,13 @@ class AtticRepositoryConverter(Repository): this is split in a separate function in case we want to use the attic code here directly, instead of our local - implementation.''' + implementation.""" return AtticKeyfileKey.find_key_file(self) @staticmethod def convert_keyfiles(keyfile, dryrun): - '''convert key files from attic to borg + """convert key files from attic to borg replacement pattern is `s/ATTIC KEY/BORG_KEY/` in `get_keys_dir()`, that is `$ATTIC_KEYS_DIR` or @@ -77,7 +77,7 @@ class AtticRepositoryConverter(Repository): no need to decrypt to convert. we need to rewrite the whole key file because magic number length changed, but that's not a problem because the keyfiles are small (compared to, say, - all the segments).''' + all the segments).""" print("converting keyfile %s" % keyfile) with open(keyfile, 'r') as f: data = f.read() @@ -95,7 +95,7 @@ class AtticRepositoryConverter(Repository): assert data.startswith(KeyfileKey.FILE_ID) def convert_cache(self, dryrun): - '''convert caches from attic to borg + """convert caches from attic to borg those are all hash indexes, so we need to `s/ATTICIDX/BORG_IDX/` in a few locations: @@ -111,11 +111,11 @@ class AtticRepositoryConverter(Repository): but if we'd want to convert, we could open it with the `Cache.open()`, edit in place and then `Cache.close()` to make sure we have locking right - ''' + """ raise NotImplementedException('cache conversion not implemented, next borg backup will take longer to rebuild those caches') class AtticKeyfileKey(KeyfileKey): - '''backwards compatible Attick key file parser''' + """backwards compatible Attick key file parser""" FILE_ID = 'ATTIC KEY' # verbatim copy from attic @@ -127,7 +127,7 @@ class AtticKeyfileKey(KeyfileKey): @classmethod def find_key_file(cls, repository): - '''copy of attic's `find_key_file`_ + """copy of attic's `find_key_file`_ this has two small modifications: @@ -137,7 +137,7 @@ class AtticKeyfileKey(KeyfileKey): 2. it uses `repository.path`_ instead of `repository._location.canonical_path`_ because we can't assume the repository has been opened by the archiver yet - ''' + """ get_keys_dir = cls.get_keys_dir id = hexlify(repository.id).decode('ascii') keys_dir = get_keys_dir() From efbad396f4d90a03f84cad859bedfa9ec169735b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:36:20 -0400 Subject: [PATCH 085/151] help text review: magic s/number/string/, s/can/must/ --- borg/archiver.py | 8 ++++---- borg/converter.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 696291f1c..832983520 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -909,19 +909,19 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") convert_epilog = textwrap.dedent(""" convert will convert an existing Attic repository to Borg in place. - it will change the magic numbers in the repository's segments - to match the new Borg magic numbers. the keyfiles found in + it will change the magic strings in the repository's segments + to match the new Borg magic strings. the keyfiles found in $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and copied to $BORG_KEYS_DIR or ~/.borg/keys. the cache files are *not* currently converted, which will - result in a much longer backup the first time. you can run + result in a much longer backup the first time. you must run `borg check --repair` to rebuild those files after the conversion. the conversion can IRREMEDIABLY DAMAGE YOUR REPOSITORY! Attic will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as - the magic numbers will have changed. + the magic strings will have changed. it is recommended you run this on a copy of the Attic repository, in case something goes wrong, for example: diff --git a/borg/converter.py b/borg/converter.py index 66606095f..6b35b6cb7 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -35,7 +35,7 @@ class AtticRepositoryConverter(Repository): replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in `$ATTIC_REPO/data/**`. - luckily the segment length didn't change so we can just + luckily the magic string length didn't change so we can just replace the 8 first bytes of all regular files in there.""" for filename in segments: print("converting segment %s in place" % filename) @@ -75,7 +75,7 @@ class AtticRepositoryConverter(Repository): `$HOME/.borg/keys`. no need to decrypt to convert. we need to rewrite the whole - key file because magic number length changed, but that's not a + key file because magic string length changed, but that's not a problem because the keyfiles are small (compared to, say, all the segments).""" print("converting keyfile %s" % keyfile) From c2913f5f1052e47872ca1cc3bcc55db3c92123d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:40:56 -0400 Subject: [PATCH 086/151] style: don't use continue for nothing --- borg/converter.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 6b35b6cb7..61c26b3ae 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -39,11 +39,10 @@ class AtticRepositoryConverter(Repository): replace the 8 first bytes of all regular files in there.""" for filename in segments: print("converting segment %s in place" % filename) - if dryrun: - continue - with open(filename, 'r+b') as segment: - segment.seek(0) - segment.write(MAGIC) + if not dryrun: + with open(filename, 'r+b') as segment: + segment.seek(0) + segment.write(MAGIC) def find_attic_keyfile(self): """find the attic keyfiles From dbd4ac7f8d09265ef468d2531446d42843897d51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:41:44 -0400 Subject: [PATCH 087/151] add missing colon --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 61c26b3ae..6be3f823f 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -48,7 +48,7 @@ class AtticRepositoryConverter(Repository): """find the attic keyfiles the keyfiles are loaded by `KeyfileKey.find_key_file()`. that - finds the keys with the right identifier for the repo + finds the keys with the right identifier for the repo. this is expected to look into $HOME/.attic/keys or $ATTIC_KEYS_DIR for key files matching the given Borg From 5b8cb63479b1f189a59979417682aa1d56467df4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:43:05 -0400 Subject: [PATCH 088/151] remove duplicate code with the unit test --- borg/converter.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 6be3f823f..751791006 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -89,9 +89,6 @@ class AtticRepositoryConverter(Repository): if not dryrun: with open(keyfile, 'w') as f: f.write(data) - with open(keyfile, 'r') as f: - data = f.read() - assert data.startswith(KeyfileKey.FILE_ID) def convert_cache(self, dryrun): """convert caches from attic to borg From ef0ed409b683c4dbaede8c75a8c68585aecc449f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:44:17 -0400 Subject: [PATCH 089/151] fix typo --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 751791006..a9a706ad9 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -111,7 +111,7 @@ class AtticRepositoryConverter(Repository): raise NotImplementedException('cache conversion not implemented, next borg backup will take longer to rebuild those caches') class AtticKeyfileKey(KeyfileKey): - """backwards compatible Attick key file parser""" + """backwards compatible Attic key file parser""" FILE_ID = 'ATTIC KEY' # verbatim copy from attic From d66516351f0885524a1b8c24375ca19c9d330909 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:46:30 -0400 Subject: [PATCH 090/151] use builtin NotImplementedError instead of writing our own NotImplemented didn't work with pytest.raise(), i didn't know about NotImplementedError, thanks tw --- borg/archiver.py | 4 ++-- borg/converter.py | 5 +---- borg/testsuite/convert.py | 4 ++-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 832983520..5c08880d6 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -17,7 +17,7 @@ import traceback from . import __version__ from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS from .compress import Compressor, COMPR_BUFFER -from .converter import AtticRepositoryConverter, NotImplementedException +from .converter import AtticRepositoryConverter from .repository import Repository from .cache import Cache from .key import key_creator @@ -468,7 +468,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") repo = AtticRepositoryConverter(os.path.join(args.repository, 'repository'), create=False) try: repo.convert(args.dry_run) - except NotImplementedException as e: + except NotImplementedError as e: print("warning: %s" % e) return self.exit_code diff --git a/borg/converter.py b/borg/converter.py index a9a706ad9..8261d9281 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -5,9 +5,6 @@ from .helpers import get_keys_dir from .repository import Repository, MAGIC from .key import KeyfileKey, KeyfileNotFoundError -class NotImplementedException(Exception): - pass - class AtticRepositoryConverter(Repository): def convert(self, dryrun=True): """convert an attic repository to a borg repository @@ -108,7 +105,7 @@ class AtticRepositoryConverter(Repository): `Cache.open()`, edit in place and then `Cache.close()` to make sure we have locking right """ - raise NotImplementedException('cache conversion not implemented, next borg backup will take longer to rebuild those caches') + raise NotImplementedError('cache conversion not implemented, next borg backup will take longer to rebuild those caches') class AtticKeyfileKey(KeyfileKey): """backwards compatible Attic key file parser""" diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 172b308ec..208f6604e 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -12,7 +12,7 @@ except ImportError: pytestmark = pytest.mark.skipif(attic is None, reason = 'cannot find an attic install') -from ..converter import AtticRepositoryConverter, NotImplementedException, AtticKeyfileKey +from ..converter import AtticRepositoryConverter, AtticKeyfileKey from ..helpers import get_keys_dir from ..key import KeyfileKey from ..repository import Repository, MAGIC @@ -87,7 +87,7 @@ class EncryptedConversionTestCase(ConversionTestCase): # check should fail because of magic number self.check_repo(False) print("opening attic repository with borg and converting") - with pytest.raises(NotImplementedException): + with pytest.raises(NotImplementedError): self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert(dryrun=False) # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), From d5198c551b1d650f60e1b520eb672a8f5b5fb7f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:47:23 -0400 Subject: [PATCH 091/151] split out depends in imports --- borg/testsuite/convert.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 208f6604e..3a413072e 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -1,8 +1,9 @@ import os -import pytest import shutil import tempfile +import pytest + try: import attic.repository import attic.key From 5f6eb87385e0945022582b7a2f160794eedb52b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:50:06 -0400 Subject: [PATCH 092/151] much nicer validation checking --- borg/testsuite/convert.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 3a413072e..155e568c3 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -37,22 +37,21 @@ class ConversionTestCase(BaseTestCase): def tearDown(self): shutil.rmtree(self.tmppath) - def check_repo(self, state = True): - if not state: - print("this will show an error, this is expected") + def repo_valid(self,): repository = self.open(self.tmppath) - assert repository.check() is state # can't check raises() because check() handles the error + state = repository.check() # can't check raises() because check() handles the error repository.close() + return state def test_convert_segments(self): # check should fail because of magic number - self.check_repo(False) + assert not self.repo_valid() print("opening attic repository with borg and converting") repo = self.open(self.tmppath, repo_type = AtticRepositoryConverter) segments = [ filename for i, filename in repo.io.segment_iterator() ] repo.close() repo.convert_segments(segments, dryrun=False) - self.check_repo() + assert self.repo_valid() class EncryptedConversionTestCase(ConversionTestCase): class MockArgs: @@ -86,7 +85,7 @@ class EncryptedConversionTestCase(ConversionTestCase): def test_convert_all(self): # check should fail because of magic number - self.check_repo(False) + assert not self.repo_valid() print("opening attic repository with borg and converting") with pytest.raises(NotImplementedError): self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert(dryrun=False) @@ -95,4 +94,4 @@ class EncryptedConversionTestCase(ConversionTestCase): os.path.basename(self.key.path)) with open(keyfile, 'r') as f: assert f.read().startswith(KeyfileKey.FILE_ID) - self.check_repo() + assert self.repo_valid() From 4a85f2d0f54fa236d792cce2a1a4f96fca13dfd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:58:00 -0400 Subject: [PATCH 093/151] fix most pep8 warnings * limit all lines to 80 chars * remove spaces around parameters * missing blank lines --- borg/testsuite/convert.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 155e568c3..1943b5df2 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -11,7 +11,7 @@ try: except ImportError: attic = None pytestmark = pytest.mark.skipif(attic is None, - reason = 'cannot find an attic install') + reason='cannot find an attic install') from ..converter import AtticRepositoryConverter, AtticKeyfileKey from ..helpers import get_keys_dir @@ -19,17 +19,18 @@ from ..key import KeyfileKey from ..repository import Repository, MAGIC from . import BaseTestCase + class ConversionTestCase(BaseTestCase): - def open(self, path, repo_type = Repository, create=False): - return repo_type(os.path.join(path, 'repository'), create = create) + def open(self, path, repo_type=Repository, create=False): + return repo_type(os.path.join(path, 'repository'), create=create) def setUp(self): self.tmppath = tempfile.mkdtemp() self.attic_repo = self.open(self.tmppath, - repo_type = attic.repository.Repository, - create = True) - # throw some stuff in that repo, copied from `RepositoryTestCase.test1`_ + repo_type=attic.repository.Repository, + create=True) + # throw some stuff in that repo, copied from `RepositoryTestCase.test1` for x in range(100): self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') self.attic_repo.close() @@ -39,7 +40,8 @@ class ConversionTestCase(BaseTestCase): def repo_valid(self,): repository = self.open(self.tmppath) - state = repository.check() # can't check raises() because check() handles the error + # can't check raises() because check() handles the error + state = repository.check() repository.close() return state @@ -47,12 +49,13 @@ class ConversionTestCase(BaseTestCase): # check should fail because of magic number assert not self.repo_valid() print("opening attic repository with borg and converting") - repo = self.open(self.tmppath, repo_type = AtticRepositoryConverter) - segments = [ filename for i, filename in repo.io.segment_iterator() ] + repo = self.open(self.tmppath, repo_type=AtticRepositoryConverter) + segments = [filename for i, filename in repo.io.segment_iterator()] repo.close() repo.convert_segments(segments, dryrun=False) assert self.repo_valid() + class EncryptedConversionTestCase(ConversionTestCase): class MockArgs: def __init__(self, path): @@ -70,10 +73,12 @@ class EncryptedConversionTestCase(ConversionTestCase): # about anyways. in real runs, the original key will be retained. os.environ['BORG_KEYS_DIR'] = self.tmppath os.environ['ATTIC_PASSPHRASE'] = 'test' - self.key = attic.key.KeyfileKey.create(self.attic_repo, self.MockArgs(self.tmppath)) + self.key = attic.key.KeyfileKey.create(self.attic_repo, + self.MockArgs(self.tmppath)) def test_keys(self): - repository = self.open(self.tmppath, repo_type = AtticRepositoryConverter) + repository = self.open(self.tmppath, + repo_type=AtticRepositoryConverter) keyfile = AtticKeyfileKey.find_key_file(repository) AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) @@ -87,8 +92,9 @@ class EncryptedConversionTestCase(ConversionTestCase): # check should fail because of magic number assert not self.repo_valid() print("opening attic repository with borg and converting") + repo = self.open(self.tmppath, repo_type=AtticRepositoryConverter) with pytest.raises(NotImplementedError): - self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert(dryrun=False) + repo.convert(dryrun=False) # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), os.path.basename(self.key.path)) From b9c474d1877190ef73e295c46ac8b7ae58a803cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:59:01 -0400 Subject: [PATCH 094/151] pep8: put pytest skip marker after imports --- borg/testsuite/convert.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 1943b5df2..08472ef93 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -10,8 +10,6 @@ try: import attic.helpers except ImportError: attic = None -pytestmark = pytest.mark.skipif(attic is None, - reason='cannot find an attic install') from ..converter import AtticRepositoryConverter, AtticKeyfileKey from ..helpers import get_keys_dir @@ -19,6 +17,9 @@ from ..key import KeyfileKey from ..repository import Repository, MAGIC from . import BaseTestCase +pytestmark = pytest.mark.skipif(attic is None, + reason='cannot find an attic install') + class ConversionTestCase(BaseTestCase): From 79d9aebaf2e0f1b533f81815b4eefde20ba9938a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 09:00:49 -0400 Subject: [PATCH 095/151] use permanently instead of irrevocably, which is less common --- borg/archiver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/archiver.py b/borg/archiver.py index 5c08880d6..5c33b5f7b 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -919,7 +919,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") `borg check --repair` to rebuild those files after the conversion. - the conversion can IRREMEDIABLY DAMAGE YOUR REPOSITORY! Attic + the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as the magic strings will have changed. From 57801a288d43c96e9a93894334a61e6ffc6c89f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 09:03:41 -0400 Subject: [PATCH 096/151] keep tests simple by always adding attic depends note that we do not depend on attic to build borg, just to do those tests. if attic goes away, we could eventually do this another way or just stop testing this altogether. --- tox.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index 8fd697657..a9ccb5e04 100644 --- a/tox.ini +++ b/tox.ini @@ -2,7 +2,7 @@ # fakeroot -u tox --recreate [tox] -envlist = py{32,33,34,35}{,-attic} +envlist = py{32,33,34,35} [testenv] # Change dir to avoid import problem for cython code. The directory does @@ -10,7 +10,7 @@ envlist = py{32,33,34,35}{,-attic} changedir = {toxworkdir} deps = -rrequirements.d/development.txt - attic: attic + attic commands = py.test --cov=borg --pyargs {posargs:borg.testsuite} # fakeroot -u needs some env vars: passenv = * From 58815bc28a795bf4a77a288c4edbda7b32c004f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 09:23:17 -0400 Subject: [PATCH 097/151] fix commandline dispatch for converter --- borg/archiver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 5c33b5f7b..02c6ea781 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -463,9 +463,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") stats.print_('Deleted data:', cache) return self.exit_code - def do_convert(self, parser, commands, args): + def do_convert(self, args): """convert a repository from attic to borg""" - repo = AtticRepositoryConverter(os.path.join(args.repository, 'repository'), create=False) + repo = AtticRepositoryConverter(args.repository.path, create=False) try: repo.convert(args.dry_run) except NotImplementedError as e: From 98e4e6bc253f067cc5c45f046073d179e2d668d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 09:35:17 -0400 Subject: [PATCH 098/151] lock repository when converting segments --- borg/converter.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/borg/converter.py b/borg/converter.py index 8261d9281..99de15170 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -2,6 +2,7 @@ from binascii import hexlify import os from .helpers import get_keys_dir +from .locking import UpgradableLock from .repository import Repository, MAGIC from .key import KeyfileKey, KeyfileNotFoundError @@ -22,7 +23,12 @@ class AtticRepositoryConverter(Repository): else: self.convert_keyfiles(keyfile, dryrun) self.close() + # partial open: just hold on to the lock + self.lock = UpgradableLock(os.path.join(self.path, 'lock'), + exclusive=True).acquire() self.convert_segments(segments, dryrun) + self.lock.release() + self.lock = None self.convert_cache(dryrun) @staticmethod @@ -34,6 +40,7 @@ class AtticRepositoryConverter(Repository): luckily the magic string length didn't change so we can just replace the 8 first bytes of all regular files in there.""" + print("converting %d segments..." % len(segments)) for filename in segments: print("converting segment %s in place" % filename) if not dryrun: From f5cb0f4e731bf63b5a7c0795eb612c01b95ac7ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 10:41:31 -0400 Subject: [PATCH 099/151] rewrite convert tests with pytest fixtures --- borg/testsuite/convert.py | 140 ++++++++++++++++++-------------------- 1 file changed, 67 insertions(+), 73 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 08472ef93..ac7d6cbca 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -15,90 +15,84 @@ from ..converter import AtticRepositoryConverter, AtticKeyfileKey from ..helpers import get_keys_dir from ..key import KeyfileKey from ..repository import Repository, MAGIC -from . import BaseTestCase pytestmark = pytest.mark.skipif(attic is None, reason='cannot find an attic install') +def repo_open(path, repo_type=Repository, create=False): + return repo_type(os.path.join(str(path), 'repository'), create=create) -class ConversionTestCase(BaseTestCase): +def repo_valid(path): + repository = repo_open(str(path)) + # can't check raises() because check() handles the error + state = repository.check() + repository.close() + return state - def open(self, path, repo_type=Repository, create=False): - return repo_type(os.path.join(path, 'repository'), create=create) +@pytest.fixture(autouse=True) +def attic_repo(tmpdir): + attic_repo = repo_open(str(tmpdir), + repo_type=attic.repository.Repository, + create=True) + # throw some stuff in that repo, copied from `RepositoryTestCase.test1` + for x in range(100): + attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + attic_repo.close() + return attic_repo - def setUp(self): - self.tmppath = tempfile.mkdtemp() - self.attic_repo = self.open(self.tmppath, - repo_type=attic.repository.Repository, - create=True) - # throw some stuff in that repo, copied from `RepositoryTestCase.test1` - for x in range(100): - self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') - self.attic_repo.close() +@pytest.mark.usefixtures("tmpdir") +def test_convert_segments(tmpdir, attic_repo): + # check should fail because of magic number + assert not repo_valid(tmpdir) + print("opening attic repository with borg and converting") + repo = repo_open(tmpdir, repo_type=AtticRepositoryConverter) + segments = [filename for i, filename in repo.io.segment_iterator()] + repo.close() + repo.convert_segments(segments, dryrun=False) + assert repo_valid(tmpdir) - def tearDown(self): - shutil.rmtree(self.tmppath) +class MockArgs: + def __init__(self, path): + self.repository = attic.helpers.Location(path) - def repo_valid(self,): - repository = self.open(self.tmppath) - # can't check raises() because check() handles the error - state = repository.check() - repository.close() - return state +@pytest.fixture() +def attic_key_file(attic_repo, tmpdir): + keys_dir = str(tmpdir.mkdir('keys')) - def test_convert_segments(self): - # check should fail because of magic number - assert not self.repo_valid() - print("opening attic repository with borg and converting") - repo = self.open(self.tmppath, repo_type=AtticRepositoryConverter) - segments = [filename for i, filename in repo.io.segment_iterator()] - repo.close() - repo.convert_segments(segments, dryrun=False) - assert self.repo_valid() + # we use the repo dir for the created keyfile, because we do + # not want to clutter existing keyfiles + os.environ['ATTIC_KEYS_DIR'] = keys_dir + # we use the same directory for the converted files, which + # will clutter the previously created one, which we don't care + # about anyways. in real runs, the original key will be retained. + os.environ['BORG_KEYS_DIR'] = keys_dir + os.environ['ATTIC_PASSPHRASE'] = 'test' + return attic.key.KeyfileKey.create(attic_repo, + MockArgs(keys_dir)) -class EncryptedConversionTestCase(ConversionTestCase): - class MockArgs: - def __init__(self, path): - self.repository = attic.helpers.Location(path) +def test_keys(tmpdir, attic_repo, attic_key_file): + repository = repo_open(tmpdir, + repo_type=AtticRepositoryConverter) + keyfile = AtticKeyfileKey.find_key_file(repository) + AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) - def setUp(self): - super().setUp() + # check that the new keyfile is alright + keyfile = os.path.join(get_keys_dir(), + os.path.basename(attic_key_file.path)) + with open(keyfile, 'r') as f: + assert f.read().startswith(KeyfileKey.FILE_ID) - # we use the repo dir for the created keyfile, because we do - # not want to clutter existing keyfiles - os.environ['ATTIC_KEYS_DIR'] = self.tmppath - - # we use the same directory for the converted files, which - # will clutter the previously created one, which we don't care - # about anyways. in real runs, the original key will be retained. - os.environ['BORG_KEYS_DIR'] = self.tmppath - os.environ['ATTIC_PASSPHRASE'] = 'test' - self.key = attic.key.KeyfileKey.create(self.attic_repo, - self.MockArgs(self.tmppath)) - - def test_keys(self): - repository = self.open(self.tmppath, - repo_type=AtticRepositoryConverter) - keyfile = AtticKeyfileKey.find_key_file(repository) - AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) - - # check that the new keyfile is alright - keyfile = os.path.join(get_keys_dir(), - os.path.basename(self.key.path)) - with open(keyfile, 'r') as f: - assert f.read().startswith(KeyfileKey.FILE_ID) - - def test_convert_all(self): - # check should fail because of magic number - assert not self.repo_valid() - print("opening attic repository with borg and converting") - repo = self.open(self.tmppath, repo_type=AtticRepositoryConverter) - with pytest.raises(NotImplementedError): - repo.convert(dryrun=False) - # check that the new keyfile is alright - keyfile = os.path.join(get_keys_dir(), - os.path.basename(self.key.path)) - with open(keyfile, 'r') as f: - assert f.read().startswith(KeyfileKey.FILE_ID) - assert self.repo_valid() +def test_convert_all(tmpdir, attic_repo, attic_key_file): + # check should fail because of magic number + assert not repo_valid(tmpdir) + print("opening attic repository with borg and converting") + repo = repo_open(tmpdir, repo_type=AtticRepositoryConverter) + with pytest.raises(NotImplementedError): + repo.convert(dryrun=False) + # check that the new keyfile is alright + keyfile = os.path.join(get_keys_dir(), + os.path.basename(attic_key_file.path)) + with open(keyfile, 'r') as f: + assert f.read().startswith(KeyfileKey.FILE_ID) + assert repo_valid(tmpdir) From a08bcb21aee3594287551ec4b1e8e8c119c8f65d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 11:10:00 -0400 Subject: [PATCH 100/151] refactor common code we get rid of repo_open() which doesn't same much typing, and add a validator for keys --- borg/testsuite/convert.py | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index ac7d6cbca..e3f9be5d1 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -19,21 +19,22 @@ from ..repository import Repository, MAGIC pytestmark = pytest.mark.skipif(attic is None, reason='cannot find an attic install') -def repo_open(path, repo_type=Repository, create=False): - return repo_type(os.path.join(str(path), 'repository'), create=create) - def repo_valid(path): - repository = repo_open(str(path)) + repository = Repository(str(path), create=False) # can't check raises() because check() handles the error state = repository.check() repository.close() return state +def key_valid(path): + keyfile = os.path.join(get_keys_dir(), + os.path.basename(path)) + with open(keyfile, 'r') as f: + return f.read().startswith(KeyfileKey.FILE_ID) + @pytest.fixture(autouse=True) def attic_repo(tmpdir): - attic_repo = repo_open(str(tmpdir), - repo_type=attic.repository.Repository, - create=True) + attic_repo = attic.repository.Repository(str(tmpdir), create=True) # throw some stuff in that repo, copied from `RepositoryTestCase.test1` for x in range(100): attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') @@ -45,7 +46,7 @@ def test_convert_segments(tmpdir, attic_repo): # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") - repo = repo_open(tmpdir, repo_type=AtticRepositoryConverter) + repo = AtticRepositoryConverter(str(tmpdir), create=False) segments = [filename for i, filename in repo.io.segment_iterator()] repo.close() repo.convert_segments(segments, dryrun=False) @@ -72,27 +73,17 @@ def attic_key_file(attic_repo, tmpdir): MockArgs(keys_dir)) def test_keys(tmpdir, attic_repo, attic_key_file): - repository = repo_open(tmpdir, - repo_type=AtticRepositoryConverter) + repository = AtticRepositoryConverter(str(tmpdir), create=False) keyfile = AtticKeyfileKey.find_key_file(repository) AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) - - # check that the new keyfile is alright - keyfile = os.path.join(get_keys_dir(), - os.path.basename(attic_key_file.path)) - with open(keyfile, 'r') as f: - assert f.read().startswith(KeyfileKey.FILE_ID) + assert key_valid(attic_key_file.path) def test_convert_all(tmpdir, attic_repo, attic_key_file): # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") - repo = repo_open(tmpdir, repo_type=AtticRepositoryConverter) + repo = AtticRepositoryConverter(str(tmpdir), create=False) with pytest.raises(NotImplementedError): repo.convert(dryrun=False) - # check that the new keyfile is alright - keyfile = os.path.join(get_keys_dir(), - os.path.basename(attic_key_file.path)) - with open(keyfile, 'r') as f: - assert f.read().startswith(KeyfileKey.FILE_ID) + assert key_valid(attic_key_file.path) assert repo_valid(tmpdir) From 7f6fd1f30686ffcb68bbfa87ba75978d691b8b0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 11:11:30 -0400 Subject: [PATCH 101/151] add docs for all converter test code --- borg/testsuite/convert.py | 66 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index e3f9be5d1..cc85dfca3 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -20,6 +20,12 @@ pytestmark = pytest.mark.skipif(attic is None, reason='cannot find an attic install') def repo_valid(path): + """ + utility function to check if borg can open a repository + + :param path: the path to the repository + :returns: if borg can check the repository + """ repository = Repository(str(path), create=False) # can't check raises() because check() handles the error state = repository.check() @@ -27,6 +33,12 @@ def repo_valid(path): return state def key_valid(path): + """ + check that the new keyfile is alright + + :param path: the path to the key file + :returns: if the file starts with the borg magic string + """ keyfile = os.path.join(get_keys_dir(), os.path.basename(path)) with open(keyfile, 'r') as f: @@ -34,6 +46,12 @@ def key_valid(path): @pytest.fixture(autouse=True) def attic_repo(tmpdir): + """ + create an attic repo with some stuff in it + + :param tmpdir: path to the repository to be created + :returns: a attic.repository.Repository object + """ attic_repo = attic.repository.Repository(str(tmpdir), create=True) # throw some stuff in that repo, copied from `RepositoryTestCase.test1` for x in range(100): @@ -43,6 +61,16 @@ def attic_repo(tmpdir): @pytest.mark.usefixtures("tmpdir") def test_convert_segments(tmpdir, attic_repo): + """test segment conversion + + this will load the given attic repository, list all the segments + then convert them one at a time. we need to close the repo before + conversion otherwise we have errors from borg + + :param tmpdir: a temporary directory to run the test in (builtin + fixture) + :param attic_repo: a populated attic repository (fixture) + """ # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") @@ -53,11 +81,27 @@ def test_convert_segments(tmpdir, attic_repo): assert repo_valid(tmpdir) class MockArgs: + """ + mock attic location + + this is used to simulate a key location with a properly loaded + repository object to create a key file + """ def __init__(self, path): self.repository = attic.helpers.Location(path) @pytest.fixture() def attic_key_file(attic_repo, tmpdir): + """ + create an attic key file from the given repo, in the keys + subdirectory of the given tmpdir + + :param attic_repo: an attic.repository.Repository object (fixture + define above) + :param tmpdir: a temporary directory (a builtin fixture) + :returns: the KeyfileKey object as returned by + attic.key.KeyfileKey.create() + """ keys_dir = str(tmpdir.mkdir('keys')) # we use the repo dir for the created keyfile, because we do @@ -73,12 +117,34 @@ def attic_key_file(attic_repo, tmpdir): MockArgs(keys_dir)) def test_keys(tmpdir, attic_repo, attic_key_file): + """test key conversion + + test that we can convert the given key to a properly formatted + borg key. assumes that the ATTIC_KEYS_DIR and BORG_KEYS_DIR have + been properly populated by the attic_key_file fixture. + + :param tmpdir: a temporary directory (a builtin fixture) + :param attic_repo: an attic.repository.Repository object (fixture + define above) + :param attic_key_file: an attic.key.KeyfileKey (fixture created above) + """ repository = AtticRepositoryConverter(str(tmpdir), create=False) keyfile = AtticKeyfileKey.find_key_file(repository) AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) assert key_valid(attic_key_file.path) def test_convert_all(tmpdir, attic_repo, attic_key_file): + """test all conversion steps + + this runs everything. mostly redundant test, since everything is + done above. yet we expect a NotImplementedError because we do not + convert caches yet. + + :param tmpdir: a temporary directory (a builtin fixture) + :param attic_repo: an attic.repository.Repository object (fixture + define above) + :param attic_key_file: an attic.key.KeyfileKey (fixture created above) + """ # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") From 6c318a0f273e522851100f7094a961396f4743e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 11:12:23 -0400 Subject: [PATCH 102/151] re-pep8 --- borg/testsuite/convert.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index cc85dfca3..5596f4e65 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -19,6 +19,7 @@ from ..repository import Repository, MAGIC pytestmark = pytest.mark.skipif(attic is None, reason='cannot find an attic install') + def repo_valid(path): """ utility function to check if borg can open a repository @@ -32,6 +33,7 @@ def repo_valid(path): repository.close() return state + def key_valid(path): """ check that the new keyfile is alright @@ -44,6 +46,7 @@ def key_valid(path): with open(keyfile, 'r') as f: return f.read().startswith(KeyfileKey.FILE_ID) + @pytest.fixture(autouse=True) def attic_repo(tmpdir): """ @@ -59,6 +62,7 @@ def attic_repo(tmpdir): attic_repo.close() return attic_repo + @pytest.mark.usefixtures("tmpdir") def test_convert_segments(tmpdir, attic_repo): """test segment conversion @@ -80,6 +84,7 @@ def test_convert_segments(tmpdir, attic_repo): repo.convert_segments(segments, dryrun=False) assert repo_valid(tmpdir) + class MockArgs: """ mock attic location @@ -90,6 +95,7 @@ class MockArgs: def __init__(self, path): self.repository = attic.helpers.Location(path) + @pytest.fixture() def attic_key_file(attic_repo, tmpdir): """ @@ -116,6 +122,7 @@ def attic_key_file(attic_repo, tmpdir): return attic.key.KeyfileKey.create(attic_repo, MockArgs(keys_dir)) + def test_keys(tmpdir, attic_repo, attic_key_file): """test key conversion @@ -133,6 +140,7 @@ def test_keys(tmpdir, attic_repo, attic_key_file): AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) assert key_valid(attic_key_file.path) + def test_convert_all(tmpdir, attic_repo, attic_key_file): """test all conversion steps From 946aca97a1ce48e94e5c00be146e58b3f2f5a28a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 11:25:02 -0400 Subject: [PATCH 103/151] avoid flooding the console instead we add progress information --- borg/converter.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 99de15170..b662c1a32 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -1,5 +1,6 @@ from binascii import hexlify import os +import time from .helpers import get_keys_dir from .locking import UpgradableLock @@ -41,12 +42,17 @@ class AtticRepositoryConverter(Repository): luckily the magic string length didn't change so we can just replace the 8 first bytes of all regular files in there.""" print("converting %d segments..." % len(segments)) + i = 0 for filename in segments: - print("converting segment %s in place" % filename) - if not dryrun: + print("\rconverting segment %s in place (%d/%d)" % (filename, i, len(segments)), end='') + i += 1 + if dryrun: + time.sleep(0.001) + else: with open(filename, 'r+b') as segment: segment.seek(0) segment.write(MAGIC) + print() def find_attic_keyfile(self): """find the attic keyfiles From 0d457bc8466e9fbbdb7f069f8707ea93333ce4d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 11:25:12 -0400 Subject: [PATCH 104/151] clarify what to do about the cache warning --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index b662c1a32..04dd911a7 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -118,7 +118,7 @@ class AtticRepositoryConverter(Repository): `Cache.open()`, edit in place and then `Cache.close()` to make sure we have locking right """ - raise NotImplementedError('cache conversion not implemented, next borg backup will take longer to rebuild those caches') + raise NotImplementedError('cache conversion not implemented, next borg backup will take longer to rebuild those caches. use borg check --repair to rebuild now') class AtticKeyfileKey(KeyfileKey): """backwards compatible Attic key file parser""" From 3bb3bd45fc1074a840b5c60dff391c92d6981074 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 12:36:53 -0400 Subject: [PATCH 105/151] add percentage progress --- borg/converter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 04dd911a7..f32187fe8 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -44,7 +44,8 @@ class AtticRepositoryConverter(Repository): print("converting %d segments..." % len(segments)) i = 0 for filename in segments: - print("\rconverting segment %s in place (%d/%d)" % (filename, i, len(segments)), end='') + print("\rconverting segment %d/%d in place, %.2f%% done (%s)" + % (i, len(segments), float(i)/len(segments), filename), end='') i += 1 if dryrun: time.sleep(0.001) From 6a72252b69e0ef07b9e0c54b669e0a762f4f233d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 14:22:29 -0400 Subject: [PATCH 106/151] release lock properly if segment conversion crashes --- borg/converter.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index f32187fe8..7e8e2f75e 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -27,9 +27,11 @@ class AtticRepositoryConverter(Repository): # partial open: just hold on to the lock self.lock = UpgradableLock(os.path.join(self.path, 'lock'), exclusive=True).acquire() - self.convert_segments(segments, dryrun) - self.lock.release() - self.lock = None + try: + self.convert_segments(segments, dryrun) + finally: + self.lock.release() + self.lock = None self.convert_cache(dryrun) @staticmethod From 180dfcb18f87555d2a1c555c9af28ed7061e3afb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 14:23:43 -0400 Subject: [PATCH 107/151] remove needless indentation --- borg/converter.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 7e8e2f75e..a65f887e0 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -93,11 +93,8 @@ class AtticRepositoryConverter(Repository): print("converting keyfile %s" % keyfile) with open(keyfile, 'r') as f: data = f.read() - data = data.replace(AtticKeyfileKey.FILE_ID, - KeyfileKey.FILE_ID, - 1) - keyfile = os.path.join(get_keys_dir(), - os.path.basename(keyfile)) + data = data.replace(AtticKeyfileKey.FILE_ID, KeyfileKey.FILE_ID, 1) + keyfile = os.path.join(get_keys_dir(), os.path.basename(keyfile)) print("writing borg keyfile to %s" % keyfile) if not dryrun: with open(keyfile, 'w') as f: From 35b219597f1a1a9ce85a7f676d0513959699a1dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 14:28:49 -0400 Subject: [PATCH 108/151] only write magic num if necessary this could allow speeding up conversions resumed after interruption --- borg/converter.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index a65f887e0..89f912a7d 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -7,6 +7,8 @@ from .locking import UpgradableLock from .repository import Repository, MAGIC from .key import KeyfileKey, KeyfileNotFoundError +ATTIC_MAGIC = b'ATTICSEG' + class AtticRepositoryConverter(Repository): def convert(self, dryrun=True): """convert an attic repository to a borg repository @@ -54,7 +56,10 @@ class AtticRepositoryConverter(Repository): else: with open(filename, 'r+b') as segment: segment.seek(0) - segment.write(MAGIC) + # only write if necessary + if (segment.read(len(ATTIC_MAGIC)) == ATTIC_MAGIC): + segment.seek(0) + segment.write(MAGIC) print() def find_attic_keyfile(self): From a7902e56575b3bcddea7057dd2c4a06d8c63cf2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 14:29:09 -0400 Subject: [PATCH 109/151] cosmetic: show 100% when done, not n-1/n% --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 89f912a7d..573411584 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -48,9 +48,9 @@ class AtticRepositoryConverter(Repository): print("converting %d segments..." % len(segments)) i = 0 for filename in segments: + i += 1 print("\rconverting segment %d/%d in place, %.2f%% done (%s)" % (i, len(segments), float(i)/len(segments), filename), end='') - i += 1 if dryrun: time.sleep(0.001) else: From 7c32f555ac45fb02e0c821d697e43976c005cdd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 15:43:16 -0400 Subject: [PATCH 110/151] repository index conversion --- borg/converter.py | 32 ++++++++++++++++++++++++-------- borg/testsuite/convert.py | 5 +++-- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 573411584..899979900 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -18,6 +18,8 @@ class AtticRepositoryConverter(Repository): caches, the latter being optional, as they will be rebuilt if missing.""" print("reading segments from attic repository using borg") + # we need to open it to load the configuration and other fields + self.open(self.path, exclusive=False) segments = [ filename for i, filename in self.io.segment_iterator() ] try: keyfile = self.find_attic_keyfile() @@ -31,10 +33,10 @@ class AtticRepositoryConverter(Repository): exclusive=True).acquire() try: self.convert_segments(segments, dryrun) + self.convert_cache(dryrun) finally: self.lock.release() self.lock = None - self.convert_cache(dryrun) @staticmethod def convert_segments(segments, dryrun): @@ -54,14 +56,19 @@ class AtticRepositoryConverter(Repository): if dryrun: time.sleep(0.001) else: - with open(filename, 'r+b') as segment: - segment.seek(0) - # only write if necessary - if (segment.read(len(ATTIC_MAGIC)) == ATTIC_MAGIC): - segment.seek(0) - segment.write(MAGIC) + AtticRepositoryConverter.header_replace(filename, ATTIC_MAGIC, MAGIC) print() + @staticmethod + def header_replace(filename, old_magic, new_magic): + print("changing header on %s" % filename) + with open(filename, 'r+b') as segment: + segment.seek(0) + # only write if necessary + if (segment.read(len(old_magic)) == old_magic): + segment.seek(0) + segment.write(new_magic) + def find_attic_keyfile(self): """find the attic keyfiles @@ -123,7 +130,16 @@ class AtticRepositoryConverter(Repository): `Cache.open()`, edit in place and then `Cache.close()` to make sure we have locking right """ - raise NotImplementedError('cache conversion not implemented, next borg backup will take longer to rebuild those caches. use borg check --repair to rebuild now') + caches = [] + transaction_id = self.get_index_transaction_id() + if transaction_id is None: + print('no index file found for repository %s' % self.path) + else: + caches += [os.path.join(self.path, 'index.%d' % transaction_id).encode('utf-8')] + for cache in caches: + print("converting cache %s" % cache) + AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') + class AtticKeyfileKey(KeyfileKey): """backwards compatible Attic key file parser""" diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 5596f4e65..b57e77097 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -59,6 +59,7 @@ def attic_repo(tmpdir): # throw some stuff in that repo, copied from `RepositoryTestCase.test1` for x in range(100): attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + attic_repo.commit() attic_repo.close() return attic_repo @@ -82,6 +83,7 @@ def test_convert_segments(tmpdir, attic_repo): segments = [filename for i, filename in repo.io.segment_iterator()] repo.close() repo.convert_segments(segments, dryrun=False) + repo.convert_cache(dryrun=False) assert repo_valid(tmpdir) @@ -157,7 +159,6 @@ def test_convert_all(tmpdir, attic_repo, attic_key_file): assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") repo = AtticRepositoryConverter(str(tmpdir), create=False) - with pytest.raises(NotImplementedError): - repo.convert(dryrun=False) + repo.convert(dryrun=False) assert key_valid(attic_key_file.path) assert repo_valid(tmpdir) From 022de5be47174b6017152b60577ab54c9b309a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:01:01 -0400 Subject: [PATCH 111/151] untested file/chunks cache conversion i couldn't figure out how to generate a cache set directly, Archiver is a pain... --- borg/converter.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 899979900..8436f9486 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -1,8 +1,9 @@ from binascii import hexlify import os +import shutil import time -from .helpers import get_keys_dir +from .helpers import get_keys_dir, get_cache_dir from .locking import UpgradableLock from .repository import Repository, MAGIC from .key import KeyfileKey, KeyfileNotFoundError @@ -124,7 +125,7 @@ class AtticRepositoryConverter(Repository): `Repository.open()`, which i'm not sure we should use because it may write data on `Repository.close()`... - * the `files` and `chunks` cache (in + * the `files` and `chunks` cache (in `$ATTIC_CACHE_DIR` or `$HOME/.cache/attic//`), which we could just drop, but if we'd want to convert, we could open it with the `Cache.open()`, edit in place and then `Cache.close()` to @@ -136,6 +137,20 @@ class AtticRepositoryConverter(Repository): print('no index file found for repository %s' % self.path) else: caches += [os.path.join(self.path, 'index.%d' % transaction_id).encode('utf-8')] + + # copy of attic's get_cache_dir() + attic_cache_dir = os.environ.get('ATTIC_CACHE_DIR', + os.path.join(os.path.expanduser('~'), '.cache', 'attic')) + + # XXX: untested, because generating cache files is a PITA, see + # Archiver.do_create() for proof + for cache in [ 'files', 'chunks' ]: + attic_cache = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii'), cache) + if os.path.exists(attic_cache): + borg_cache = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii'), cache) + shutil.copy(attic_cache, borg_cache) + caches += [borg_cache] + for cache in caches: print("converting cache %s" % cache) AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') From 4f9a411ad843469133426c4eea5c4815198a8777 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:01:17 -0400 Subject: [PATCH 112/151] remove unneeded fixture decorator --- borg/testsuite/convert.py | 1 - 1 file changed, 1 deletion(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index b57e77097..ceb3efb11 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -64,7 +64,6 @@ def attic_repo(tmpdir): return attic_repo -@pytest.mark.usefixtures("tmpdir") def test_convert_segments(tmpdir, attic_repo): """test segment conversion From 28a033d1d35555a2b46b4a50edb010544cf5e749 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:03:52 -0400 Subject: [PATCH 113/151] remove debug output that clobbers segment spinner --- borg/converter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 8436f9486..27d174b47 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -62,7 +62,6 @@ class AtticRepositoryConverter(Repository): @staticmethod def header_replace(filename, old_magic, new_magic): - print("changing header on %s" % filename) with open(filename, 'r+b') as segment: segment.seek(0) # only write if necessary From 55f79b4999429c10cb99d154bb667fc781986629 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:24:28 -0400 Subject: [PATCH 114/151] complete cache conversion code we need to create the borg cache directory dry run was ignored, fixed. process cache before segment, because we want to do the faster stuff first --- borg/converter.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 27d174b47..ced2b409d 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -17,7 +17,11 @@ class AtticRepositoryConverter(Repository): those are the files that need to be converted here, from most important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if - missing.""" + missing. + + we nevertheless do the order in reverse, as we prefer to do + the fast stuff first, to improve interactivity. + """ print("reading segments from attic repository using borg") # we need to open it to load the configuration and other fields self.open(self.path, exclusive=False) @@ -33,8 +37,8 @@ class AtticRepositoryConverter(Repository): self.lock = UpgradableLock(os.path.join(self.path, 'lock'), exclusive=True).acquire() try: - self.convert_segments(segments, dryrun) self.convert_cache(dryrun) + self.convert_segments(segments, dryrun) finally: self.lock.release() self.lock = None @@ -146,13 +150,21 @@ class AtticRepositoryConverter(Repository): for cache in [ 'files', 'chunks' ]: attic_cache = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii'), cache) if os.path.exists(attic_cache): - borg_cache = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii'), cache) - shutil.copy(attic_cache, borg_cache) + borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) + if not os.path.exists(borg_cache_dir): + os.makedirs(borg_cache_dir) + borg_cache = os.path.join(borg_cache_dir, cache) + print("copying attic cache from %s to %s" % (attic_cache, borg_cache)) + if not dryrun: + shutil.copy(attic_cache, borg_cache) caches += [borg_cache] + else: + print("no %s cache found in %s" % (cache, attic_cache)) for cache in caches: print("converting cache %s" % cache) - AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') + if not dryrun: + AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') class AtticKeyfileKey(KeyfileKey): From 8022e563a9316883636cbfe3243d0a24277a111f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:27:19 -0400 Subject: [PATCH 115/151] don't clobber existing borg cache --- borg/converter.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index ced2b409d..456647732 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -154,10 +154,13 @@ class AtticRepositoryConverter(Repository): if not os.path.exists(borg_cache_dir): os.makedirs(borg_cache_dir) borg_cache = os.path.join(borg_cache_dir, cache) - print("copying attic cache from %s to %s" % (attic_cache, borg_cache)) - if not dryrun: - shutil.copy(attic_cache, borg_cache) - caches += [borg_cache] + if os.path.exists(borg_cache): + print("borg cache already exists in %s, skipping conversion of %s" % (borg_cache, attic_cache)) + else: + print("copying attic cache from %s to %s" % (attic_cache, borg_cache)) + if not dryrun: + shutil.copyfile(attic_cache, borg_cache) + caches += [borg_cache] else: print("no %s cache found in %s" % (cache, attic_cache)) From 3e7fa0d63339d49b04792bac3f050c42c3e1cba0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:41:17 -0400 Subject: [PATCH 116/151] also copy the cache config file to workaround #234 --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 456647732..2657a27a3 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -147,7 +147,7 @@ class AtticRepositoryConverter(Repository): # XXX: untested, because generating cache files is a PITA, see # Archiver.do_create() for proof - for cache in [ 'files', 'chunks' ]: + for cache in [ 'files', 'chunks', 'config' ]: attic_cache = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii'), cache) if os.path.exists(attic_cache): borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) From 081b91bea016b43f569e66a681366514af5b0f8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 09:43:10 -0400 Subject: [PATCH 117/151] remove needless paren --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 2657a27a3..db4fca8f7 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -69,7 +69,7 @@ class AtticRepositoryConverter(Repository): with open(filename, 'r+b') as segment: segment.seek(0) # only write if necessary - if (segment.read(len(old_magic)) == old_magic): + if segment.read(len(old_magic)) == old_magic: segment.seek(0) segment.write(new_magic) From 41e9942efea82394585bd3ddae4bf995dc31c8a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 09:43:51 -0400 Subject: [PATCH 118/151] follow naming of tested module --- borg/testsuite/{convert.py => converter.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename borg/testsuite/{convert.py => converter.py} (100%) diff --git a/borg/testsuite/convert.py b/borg/testsuite/converter.py similarity index 100% rename from borg/testsuite/convert.py rename to borg/testsuite/converter.py From d4d1b414b5c7b53ba37f32d7cb5ed8a15ffd6b68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 09:44:53 -0400 Subject: [PATCH 119/151] remove needless autouse --- borg/testsuite/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/testsuite/converter.py b/borg/testsuite/converter.py index ceb3efb11..b7e3748e9 100644 --- a/borg/testsuite/converter.py +++ b/borg/testsuite/converter.py @@ -47,7 +47,7 @@ def key_valid(path): return f.read().startswith(KeyfileKey.FILE_ID) -@pytest.fixture(autouse=True) +@pytest.fixture() def attic_repo(tmpdir): """ create an attic repo with some stuff in it From 69040588cdf7a7ed7630302378209ad71bf1d47a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 10:10:43 -0400 Subject: [PATCH 120/151] update docs to reflect that cache is converted --- borg/archiver.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 02c6ea781..041f44260 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -914,10 +914,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and copied to $BORG_KEYS_DIR or ~/.borg/keys. - the cache files are *not* currently converted, which will - result in a much longer backup the first time. you must run - `borg check --repair` to rebuild those files after the - conversion. + the cache files are converted, but the cache layout between Borg + and Attic changed, so it is possible the first backup after the + conversion takes longer than expected due to the cache resync. the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as From ad85f64842a95f37445faad2d4bd6d5323100323 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 10:10:50 -0400 Subject: [PATCH 121/151] whitespace --- borg/archiver.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 041f44260..357bdad86 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -925,9 +925,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") it is recommended you run this on a copy of the Attic repository, in case something goes wrong, for example: - cp -a attic borg - borg convert -n borg - borg convert borg + cp -a attic borg + borg convert -n borg + borg convert borg you have been warned.""") subparser = subparsers.add_parser('convert', parents=[common_parser], From ea5d00436c723d09769cccc618eed4f69585d73a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 10:12:13 -0400 Subject: [PATCH 122/151] also document the cache locations --- borg/archiver.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 357bdad86..2f5325257 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -914,9 +914,11 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and copied to $BORG_KEYS_DIR or ~/.borg/keys. - the cache files are converted, but the cache layout between Borg - and Attic changed, so it is possible the first backup after the - conversion takes longer than expected due to the cache resync. + the cache files are converted, from $ATTIC_CACHE_DIR or + ~/.cache/attic to $BORG_CACHE_DIR or ~/.cache/borg, but the + cache layout between Borg and Attic changed, so it is possible + the first backup after the conversion takes longer than expected + due to the cache resync. the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as From 897851599152488d952e6f3bc43132299374cee5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 2 Oct 2015 16:56:31 +0200 Subject: [PATCH 123/151] temporary hack to avoid using lots of disk space for chunks.archive.d --- borg/cache.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/borg/cache.py b/borg/cache.py index cfe419913..c33d024cc 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -219,9 +219,12 @@ class Cache: return path.encode('utf-8') def cached_archives(): - fns = os.listdir(archive_path) - # filenames with 64 hex digits == 256bit - return set(unhexlify(fn) for fn in fns if len(fn) == 64) + if self.do_cache: + fns = os.listdir(archive_path) + # filenames with 64 hex digits == 256bit + return set(unhexlify(fn) for fn in fns if len(fn) == 64) + else: + return set() def repo_archives(): return set(info[b'id'] for info in self.manifest.archives.values()) @@ -258,14 +261,15 @@ class Cache: if b'chunks' in item: for chunk_id, size, csize in item[b'chunks']: add(chunk_idx, chunk_id, size, csize) - fn = mkpath(archive_id) - fn_tmp = mkpath(archive_id, suffix='.tmp') - try: - chunk_idx.write(fn_tmp) - except Exception: - os.unlink(fn_tmp) - else: - os.rename(fn_tmp, fn) + if self.do_cache: + fn = mkpath(archive_id) + fn_tmp = mkpath(archive_id, suffix='.tmp') + try: + chunk_idx.write(fn_tmp) + except Exception: + os.unlink(fn_tmp) + else: + os.rename(fn_tmp, fn) return chunk_idx def lookup_name(archive_id): @@ -323,6 +327,9 @@ class Cache: self.begin_txn() repository = cache_if_remote(self.repository) legacy_cleanup() + # TEMPORARY HACK: to avoid archive index caching, create a FILE named ~/.cache/borg/REPOID/chunks.archive.d - + # this is only recommended if you have a fast, low latency connection to your repo (e.g. if repo is local disk) + self.do_cache = os.path.isdir(archive_path) self.chunks = create_master_idx(self.chunks) def add_chunk(self, id, data, stats): From 893242ead42ef50de969a5083c9259cf72fe25f9 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 2 Oct 2015 18:11:10 +0200 Subject: [PATCH 124/151] fix multiple issues with the cache config version check, fixes #234 - issue #234: handle exception when config file is empty is really not a borg cache config - there was a unused %s in the Exception string - error msg was wrong when version check failed - this IS a borg cache, but not of expected version --- borg/cache.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/borg/cache.py b/borg/cache.py index cfe419913..aa9aaab16 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -1,4 +1,4 @@ -from configparser import RawConfigParser +import configparser from .remote import cache_if_remote import errno import msgpack @@ -89,7 +89,7 @@ class Cache: os.makedirs(self.path) with open(os.path.join(self.path, 'README'), 'w') as fd: fd.write('This is a Borg cache') - config = RawConfigParser() + config = configparser.RawConfigParser() config.add_section('cache') config.set('cache', 'version', '1') config.set('cache', 'repository', hexlify(self.repository.id).decode('ascii')) @@ -109,10 +109,17 @@ class Cache: shutil.rmtree(self.path) def _do_open(self): - self.config = RawConfigParser() - self.config.read(os.path.join(self.path, 'config')) - if self.config.getint('cache', 'version') != 1: - raise Exception('%s Does not look like a Borg cache') + self.config = configparser.RawConfigParser() + config_path = os.path.join(self.path, 'config') + self.config.read(config_path) + try: + cache_version = self.config.getint('cache', 'version') + wanted_version = 1 + if cache_version != wanted_version: + raise Exception('%s has unexpected cache version %d (wanted: %d).' % ( + config_path, cache_version, wanted_version)) + except configparser.NoSectionError as e: + raise Exception('%s does not look like a Borg cache.' % config_path) self.id = self.config.get('cache', 'repository') self.manifest_id = unhexlify(self.config.get('cache', 'manifest')) self.timestamp = self.config.get('cache', 'timestamp', fallback=None) From 3143839f6b53a7669d415051b3a8c09038ed4e0b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 3 Oct 2015 03:10:12 +0200 Subject: [PATCH 125/151] use Debian 7 (wheezy) VMs to do the linux binary builds --- Vagrantfile | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index 2777f33a7..772c72168 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -24,6 +24,8 @@ def packages_debianoid apt-get install -y libssl-dev libacl1-dev liblz4-dev libfuse-dev fuse pkg-config apt-get install -y fakeroot build-essential git apt-get install -y python3-dev python3-setuptools + # for building python: + apt-get install zlib1g-dev libbz2-dev libncurses5-dev libreadline-dev liblzma-dev libsqlite3-dev # this way it works on older dists (like ubuntu 12.04) also: easy_install3 pip pip3 install virtualenv @@ -345,14 +347,31 @@ Vagrant.configure(2) do |config| end config.vm.define "wheezy32" do |b| - b.vm.box = "puppetlabs/debian-7.8-32-nocm" + b.vm.box = "boxcutter/debian79-i386" b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid - b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("wheezy32") + b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy32") + b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("wheezy32") + b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("wheezy32") b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("wheezy32") + b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("wheezy32") + b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("wheezy32") b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("wheezy32") end + config.vm.define "wheezy64" do |b| + b.vm.box = "boxcutter/debian79" + b.vm.provision "packages prepare wheezy", :type => :shell, :inline => packages_prepare_wheezy + b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid + b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy64") + b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("wheezy64") + b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("wheezy64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("wheezy64") + b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("wheezy64") + b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("wheezy64") + b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("wheezy64") + end + # OS X config.vm.define "darwin64" do |b| b.vm.box = "jhcook/yosemite-clitools" From 2c13027750f274442f33c1756207cdf51f61d8cc Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 3 Oct 2015 03:21:56 +0200 Subject: [PATCH 126/151] apt-get install: add -y --- Vagrantfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Vagrantfile b/Vagrantfile index 772c72168..d179bdd6c 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -25,7 +25,7 @@ def packages_debianoid apt-get install -y fakeroot build-essential git apt-get install -y python3-dev python3-setuptools # for building python: - apt-get install zlib1g-dev libbz2-dev libncurses5-dev libreadline-dev liblzma-dev libsqlite3-dev + apt-get install -y zlib1g-dev libbz2-dev libncurses5-dev libreadline-dev liblzma-dev libsqlite3-dev # this way it works on older dists (like ubuntu 12.04) also: easy_install3 pip pip3 install virtualenv From bf3cf21ac5d128785dacbc0282370524763099c6 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 3 Oct 2015 14:12:16 +0200 Subject: [PATCH 127/151] docs: add warning about prune --- docs/quickstart.rst | 9 ++++++--- docs/usage.rst | 17 +++++++++++++++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/docs/quickstart.rst b/docs/quickstart.rst index b6c4c42df..32218fc67 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -85,9 +85,12 @@ certain number of old archives:: --exclude /home/Ben/Music/Justin\ Bieber \ --exclude '*.pyc' - # Use the `prune` subcommand to maintain 7 daily, 4 weekly - # and 6 monthly archives. - borg prune -v $REPOSITORY --keep-daily=7 --keep-weekly=4 --keep-monthly=6 + # Use the `prune` subcommand to maintain 7 daily, 4 weekly and 6 monthly + # archives of THIS machine. --prefix `hostname`- is very important to + # limit prune's operation to this machine's archives and not apply to + # other machine's archives also. + borg prune -v $REPOSITORY --prefix `hostname`- \ + --keep-daily=7 --keep-weekly=4 --keep-monthly=6 .. backup_compression: diff --git a/docs/usage.rst b/docs/usage.rst index 3a933d42c..95b95d90d 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -265,10 +265,23 @@ Examples Examples ~~~~~~~~ + +Be careful, prune is potentially dangerous command, it will remove backup +archives. + +The default of prune is to apply to **all archives in the repository** unless +you restrict its operation to a subset of the archives using `--prefix`. +When using --prefix, be careful to choose a good prefix - e.g. do not use a +prefix "foo" if you do not also want to match "foobar". + +It is strongly recommended to always run `prune --dry-run ...` first so you +will see what it would do without it actually doing anything. + :: - # Keep 7 end of day and 4 additional end of week archives: - $ borg prune /mnt/backup --keep-daily=7 --keep-weekly=4 + # Keep 7 end of day and 4 additional end of week archives. + # Do a dry-run without actually deleting anything. + $ borg prune /mnt/backup --dry-run --keep-daily=7 --keep-weekly=4 # Same as above but only apply to archive names starting with "foo": $ borg prune /mnt/backup --keep-daily=7 --keep-weekly=4 --prefix=foo From 2c66e7c23373cd2ee04ae7199d391d39a5a51a8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 10:49:29 -0400 Subject: [PATCH 128/151] make percentage a real percentage --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index db4fca8f7..7f4127cd8 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -57,7 +57,7 @@ class AtticRepositoryConverter(Repository): for filename in segments: i += 1 print("\rconverting segment %d/%d in place, %.2f%% done (%s)" - % (i, len(segments), float(i)/len(segments), filename), end='') + % (i, len(segments), 100*float(i)/len(segments), filename), end='') if dryrun: time.sleep(0.001) else: From 3773681f00c030b0deff0c503d3d94577b9f32a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 11:07:37 -0400 Subject: [PATCH 129/151] rewire cache copy mechanisms we separate the conversion and the copy in order to be able to copy arbitrary files from attic without converting them. this allows us to copy the config file cleanly without attempting to rewrite its magic number --- borg/converter.py | 56 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 7f4127cd8..39fe3788a 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -144,25 +144,49 @@ class AtticRepositoryConverter(Repository): # copy of attic's get_cache_dir() attic_cache_dir = os.environ.get('ATTIC_CACHE_DIR', os.path.join(os.path.expanduser('~'), '.cache', 'attic')) + attic_cache_dir = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii')) + borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) + + def copy_cache_file(file): + """copy the given attic cache file into the borg directory + + does nothing if dryrun is True. also expects + attic_cache_dir and borg_cache_dir to be set in the parent + scope, to the directories path including the repository + identifier. + + :params file: the basename of the cache file to copy + (example: "files" or "chunks") as a string + + :returns: the borg file that was created or None if non + was created. + + """ + attic_file = os.path.join(attic_cache_dir, file) + if os.path.exists(attic_file): + borg_file = os.path.join(borg_cache_dir, file) + if os.path.exists(borg_file): + print("borg cache file already exists in %s, skipping conversion of %s" % (borg_file, attic_file)) + else: + print("copying attic cache file from %s to %s" % (attic_file, borg_file)) + if not dryrun: + shutil.copyfile(attic_file, borg_file) + return borg_file + else: + print("no %s cache file found in %s" % (file, attic_file)) + return None + + if os.path.exists(attic_cache_dir): + if not os.path.exists(borg_cache_dir): + os.makedirs(borg_cache_dir) + copy_cache_file('config') # XXX: untested, because generating cache files is a PITA, see # Archiver.do_create() for proof - for cache in [ 'files', 'chunks', 'config' ]: - attic_cache = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii'), cache) - if os.path.exists(attic_cache): - borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) - if not os.path.exists(borg_cache_dir): - os.makedirs(borg_cache_dir) - borg_cache = os.path.join(borg_cache_dir, cache) - if os.path.exists(borg_cache): - print("borg cache already exists in %s, skipping conversion of %s" % (borg_cache, attic_cache)) - else: - print("copying attic cache from %s to %s" % (attic_cache, borg_cache)) - if not dryrun: - shutil.copyfile(attic_cache, borg_cache) - caches += [borg_cache] - else: - print("no %s cache found in %s" % (cache, attic_cache)) + for cache in [ 'files', 'chunks' ]: + copied = copy_cache_file(cache) + if copied: + caches += [copied] for cache in caches: print("converting cache %s" % cache) From 690541264e8beb6f5789c11c1a426ce65a263344 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 11:49:01 -0400 Subject: [PATCH 130/151] style fixes (pep8, append, file builtin) --- borg/converter.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 39fe3788a..14aedb9bb 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -10,6 +10,7 @@ from .key import KeyfileKey, KeyfileNotFoundError ATTIC_MAGIC = b'ATTICSEG' + class AtticRepositoryConverter(Repository): def convert(self, dryrun=True): """convert an attic repository to a borg repository @@ -25,7 +26,7 @@ class AtticRepositoryConverter(Repository): print("reading segments from attic repository using borg") # we need to open it to load the configuration and other fields self.open(self.path, exclusive=False) - segments = [ filename for i, filename in self.io.segment_iterator() ] + segments = [filename for i, filename in self.io.segment_iterator()] try: keyfile = self.find_attic_keyfile() except KeyfileNotFoundError: @@ -121,7 +122,7 @@ class AtticRepositoryConverter(Repository): those are all hash indexes, so we need to `s/ATTICIDX/BORG_IDX/` in a few locations: - + * the repository index (in `$ATTIC_REPO/index.%d`, where `%d` is the `Repository.get_index_transaction_id()`), which we should probably update, with a lock, see @@ -143,28 +144,29 @@ class AtticRepositoryConverter(Repository): # copy of attic's get_cache_dir() attic_cache_dir = os.environ.get('ATTIC_CACHE_DIR', - os.path.join(os.path.expanduser('~'), '.cache', 'attic')) + os.path.join(os.path.expanduser('~'), + '.cache', 'attic')) attic_cache_dir = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii')) borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) - def copy_cache_file(file): - """copy the given attic cache file into the borg directory + def copy_cache_file(path): + """copy the given attic cache path into the borg directory does nothing if dryrun is True. also expects attic_cache_dir and borg_cache_dir to be set in the parent scope, to the directories path including the repository identifier. - :params file: the basename of the cache file to copy + :params path: the basename of the cache file to copy (example: "files" or "chunks") as a string :returns: the borg file that was created or None if non was created. """ - attic_file = os.path.join(attic_cache_dir, file) + attic_file = os.path.join(attic_cache_dir, path) if os.path.exists(attic_file): - borg_file = os.path.join(borg_cache_dir, file) + borg_file = os.path.join(borg_cache_dir, path) if os.path.exists(borg_file): print("borg cache file already exists in %s, skipping conversion of %s" % (borg_file, attic_file)) else: @@ -173,7 +175,7 @@ class AtticRepositoryConverter(Repository): shutil.copyfile(attic_file, borg_file) return borg_file else: - print("no %s cache file found in %s" % (file, attic_file)) + print("no %s cache file found in %s" % (path, attic_file)) return None if os.path.exists(attic_cache_dir): @@ -183,10 +185,10 @@ class AtticRepositoryConverter(Repository): # XXX: untested, because generating cache files is a PITA, see # Archiver.do_create() for proof - for cache in [ 'files', 'chunks' ]: + for cache in ['files', 'chunks']: copied = copy_cache_file(cache) if copied: - caches += [copied] + caches.append(copied) for cache in caches: print("converting cache %s" % cache) From 48b7c8cea3abe8c0dc8f8cb7d4dd549489659094 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 11:52:12 -0400 Subject: [PATCH 131/151] avoid checking for non-existent files if there's no attic cache, it's no use checking for individual files this also makes the code a little clearer also added comments --- borg/converter.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 14aedb9bb..402ea8b42 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -178,22 +178,26 @@ class AtticRepositoryConverter(Repository): print("no %s cache file found in %s" % (path, attic_file)) return None + # XXX: untested, because generating cache files is a PITA, see + # Archiver.do_create() for proof if os.path.exists(attic_cache_dir): if not os.path.exists(borg_cache_dir): os.makedirs(borg_cache_dir) + + # non-binary file that we don't need to convert, just copy copy_cache_file('config') - # XXX: untested, because generating cache files is a PITA, see - # Archiver.do_create() for proof - for cache in ['files', 'chunks']: - copied = copy_cache_file(cache) - if copied: - caches.append(copied) + # we need to convert the headers of those files, copy first + for cache in ['files', 'chunks']: + copied = copy_cache_file(cache) + if copied: + caches.append(copied) - for cache in caches: - print("converting cache %s" % cache) - if not dryrun: - AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') + # actually convert the headers of the detected files + for cache in caches: + print("converting cache %s" % cache) + if not dryrun: + AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') class AtticKeyfileKey(KeyfileKey): From c91c5d0029cb364168533d33a6ee28c27b9f1340 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 12:36:52 -0400 Subject: [PATCH 132/151] rename convert command to upgrade convert is too generic for the Attic conversion: we may have other converters, from other, more foreign systems that will require different options and different upgrade mechanisms that convert could never cover appropriately. we are more likely to use an approach similar to "git fast-import" instead here, and have the conversion tools be external tool that feed standard data into borg during conversion. upgrade seems like a more natural fit: Attic could be considered like a pre-historic version of Borg that requires invasive changes for borg to be able to use the repository. we may require such changes in the future of borg as well: if we make backwards-incompatible changes to the repository layout or data format, it is possible that we require such changes to be performed on the repository before it is usable again. instead of scattering those conversions all over the code, we should simply have assertions that check the layout is correct and point the user to upgrade if it is not. upgrade should eventually automatically detect the repository format or version and perform appropriate conversions. Attic is only the first one. we still need to implement an adequate API for auto-detection and upgrade, only the seeds of that are present for now. of course, changes to the upgrade command should be thoroughly documented in the release notes and an eventual upgrade manual. --- borg/archiver.py | 39 +++++++++++++------- borg/testsuite/{converter.py => upgrader.py} | 12 +++--- borg/{converter.py => upgrader.py} | 10 ++--- 3 files changed, 36 insertions(+), 25 deletions(-) rename borg/testsuite/{converter.py => upgrader.py} (93%) rename borg/{converter.py => upgrader.py} (96%) diff --git a/borg/archiver.py b/borg/archiver.py index 2f5325257..202ae0ef6 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -17,7 +17,7 @@ import traceback from . import __version__ from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS from .compress import Compressor, COMPR_BUFFER -from .converter import AtticRepositoryConverter +from .upgrader import AtticRepositoryUpgrader from .repository import Repository from .cache import Cache from .key import key_creator @@ -463,11 +463,20 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") stats.print_('Deleted data:', cache) return self.exit_code - def do_convert(self, args): - """convert a repository from attic to borg""" - repo = AtticRepositoryConverter(args.repository.path, create=False) + def do_upgrade(self, args): + """upgrade a repository from a previous version""" + # XXX: currently only upgrades from Attic repositories, but may + # eventually be extended to deal with major upgrades for borg + # itself. + # + # in this case, it should auto-detect the current repository + # format and fire up necessary upgrade mechanism. this remains + # to be implemented. + + # XXX: should auto-detect if it is an attic repository here + repo = AtticRepositoryUpgrader(args.repository.path, create=False) try: - repo.convert(args.dry_run) + repo.upgrade(args.dry_run) except NotImplementedError as e: print("warning: %s" % e) return self.exit_code @@ -906,8 +915,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") type=location_validator(archive=False), help='repository to prune') - convert_epilog = textwrap.dedent(""" - convert will convert an existing Attic repository to Borg in place. + upgrade_epilog = textwrap.dedent(""" + upgrade an existing Borg repository in place. this currently + only support converting an Attic repository, but may + eventually be extended to cover major Borg upgrades as well. it will change the magic strings in the repository's segments to match the new Borg magic strings. the keyfiles found in @@ -928,21 +939,21 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") repository, in case something goes wrong, for example: cp -a attic borg - borg convert -n borg - borg convert borg + borg upgrade -n borg + borg upgrade borg you have been warned.""") - subparser = subparsers.add_parser('convert', parents=[common_parser], - description=self.do_convert.__doc__, - epilog=convert_epilog, + subparser = subparsers.add_parser('upgrade', parents=[common_parser], + description=self.do_upgrade.__doc__, + epilog=upgrade_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) - subparser.set_defaults(func=self.do_convert) + subparser.set_defaults(func=self.do_upgrade) subparser.add_argument('-n', '--dry-run', dest='dry_run', default=False, action='store_true', help='do not change repository') subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', type=location_validator(archive=False), - help='path to the attic repository to be converted') + help='path to the repository to be upgraded') subparser = subparsers.add_parser('help', parents=[common_parser], description='Extra help') diff --git a/borg/testsuite/converter.py b/borg/testsuite/upgrader.py similarity index 93% rename from borg/testsuite/converter.py rename to borg/testsuite/upgrader.py index b7e3748e9..22278f9ac 100644 --- a/borg/testsuite/converter.py +++ b/borg/testsuite/upgrader.py @@ -11,7 +11,7 @@ try: except ImportError: attic = None -from ..converter import AtticRepositoryConverter, AtticKeyfileKey +from ..upgrader import AtticRepositoryUpgrader, AtticKeyfileKey from ..helpers import get_keys_dir from ..key import KeyfileKey from ..repository import Repository, MAGIC @@ -78,7 +78,7 @@ def test_convert_segments(tmpdir, attic_repo): # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") - repo = AtticRepositoryConverter(str(tmpdir), create=False) + repo = AtticRepositoryUpgrader(str(tmpdir), create=False) segments = [filename for i, filename in repo.io.segment_iterator()] repo.close() repo.convert_segments(segments, dryrun=False) @@ -136,9 +136,9 @@ def test_keys(tmpdir, attic_repo, attic_key_file): define above) :param attic_key_file: an attic.key.KeyfileKey (fixture created above) """ - repository = AtticRepositoryConverter(str(tmpdir), create=False) + repository = AtticRepositoryUpgrader(str(tmpdir), create=False) keyfile = AtticKeyfileKey.find_key_file(repository) - AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) + AtticRepositoryUpgrader.convert_keyfiles(keyfile, dryrun=False) assert key_valid(attic_key_file.path) @@ -157,7 +157,7 @@ def test_convert_all(tmpdir, attic_repo, attic_key_file): # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") - repo = AtticRepositoryConverter(str(tmpdir), create=False) - repo.convert(dryrun=False) + repo = AtticRepositoryUpgrader(str(tmpdir), create=False) + repo.upgrade(dryrun=False) assert key_valid(attic_key_file.path) assert repo_valid(tmpdir) diff --git a/borg/converter.py b/borg/upgrader.py similarity index 96% rename from borg/converter.py rename to borg/upgrader.py index 402ea8b42..2efb9216c 100644 --- a/borg/converter.py +++ b/borg/upgrader.py @@ -11,11 +11,11 @@ from .key import KeyfileKey, KeyfileNotFoundError ATTIC_MAGIC = b'ATTICSEG' -class AtticRepositoryConverter(Repository): - def convert(self, dryrun=True): +class AtticRepositoryUpgrader(Repository): + def upgrade(self, dryrun=True): """convert an attic repository to a borg repository - those are the files that need to be converted here, from most + those are the files that need to be upgraded here, from most important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if missing. @@ -62,7 +62,7 @@ class AtticRepositoryConverter(Repository): if dryrun: time.sleep(0.001) else: - AtticRepositoryConverter.header_replace(filename, ATTIC_MAGIC, MAGIC) + AtticRepositoryUpgrader.header_replace(filename, ATTIC_MAGIC, MAGIC) print() @staticmethod @@ -197,7 +197,7 @@ class AtticRepositoryConverter(Repository): for cache in caches: print("converting cache %s" % cache) if not dryrun: - AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') + AtticRepositoryUpgrader.header_replace(cache, b'ATTICIDX', b'BORG_IDX') class AtticKeyfileKey(KeyfileKey): From fded2219a8c842b56a80926324cbeee8413409f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 12:46:23 -0400 Subject: [PATCH 133/151] mention borg delete borg this makes it clear how to start from scratch, in case the chunk cache was failed to be copied and so on. --- borg/archiver.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 202ae0ef6..62da098eb 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -931,10 +931,6 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") the first backup after the conversion takes longer than expected due to the cache resync. - the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic - will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as - the magic strings will have changed. - it is recommended you run this on a copy of the Attic repository, in case something goes wrong, for example: @@ -942,6 +938,17 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") borg upgrade -n borg borg upgrade borg + upgrade should be able to resume if interrupted, although it + will still iterate over all segments. if you want to start + from scratch, use `borg delete` over the copied repository to + make sure the cache files are also removed: + + borg delete borg + + the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic + will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as + the magic strings will have changed. + you have been warned.""") subparser = subparsers.add_parser('upgrade', parents=[common_parser], description=self.do_upgrade.__doc__, From 5409cbaa678eda55c7846726f1146be90ea9b648 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 12:56:03 -0400 Subject: [PATCH 134/151] also copy files cache verbatim it seems the file cache does *not* have the ATTIC magic header (nor does it have one in borg), so we don't need to edit the file - we just copy it like a regular file. while i'm here, simplify the cache conversion loop: it's no use splitting the copy and the edition since the latter is so fast, just do everything in one loop, which makes it much easier to read. --- borg/upgrader.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/borg/upgrader.py b/borg/upgrader.py index 2efb9216c..33ef2d388 100644 --- a/borg/upgrader.py +++ b/borg/upgrader.py @@ -184,20 +184,17 @@ class AtticRepositoryUpgrader(Repository): if not os.path.exists(borg_cache_dir): os.makedirs(borg_cache_dir) - # non-binary file that we don't need to convert, just copy - copy_cache_file('config') + # file that we don't have a header to convert, just copy + for cache in ['config', 'files']: + copy_cache_file(cache) # we need to convert the headers of those files, copy first - for cache in ['files', 'chunks']: + for cache in ['chunks']: copied = copy_cache_file(cache) if copied: - caches.append(copied) - - # actually convert the headers of the detected files - for cache in caches: - print("converting cache %s" % cache) - if not dryrun: - AtticRepositoryUpgrader.header_replace(cache, b'ATTICIDX', b'BORG_IDX') + print("converting cache %s" % cache) + if not dryrun: + AtticRepositoryUpgrader.header_replace(cache, b'ATTICIDX', b'BORG_IDX') class AtticKeyfileKey(KeyfileKey): From 51dc66d05f4600996ad0c9cb93bbd6a5c7965bf4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 3 Oct 2015 19:29:45 +0200 Subject: [PATCH 135/151] implement borg delete --cache-only repo, attic #123 it deletes just the local cache for the given repository, not the repo itself. --- borg/archiver.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 62da098eb..fbdf210a1 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -319,17 +319,19 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") if args.stats: stats.print_('Deleted data:', cache) else: - print("You requested to completely DELETE the repository *including* all archives it contains:") - for archive_info in manifest.list_archive_infos(sort_by='ts'): - print(format_archive(archive_info)) - if not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'): - print("""Type "YES" if you understand this and want to continue.\n""") - if input('Do you want to continue? ') != 'YES': - self.exit_code = 1 - return self.exit_code - repository.destroy() + if not args.cache_only: + print("You requested to completely DELETE the repository *including* all archives it contains:") + for archive_info in manifest.list_archive_infos(sort_by='ts'): + print(format_archive(archive_info)) + if not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'): + print("""Type "YES" if you understand this and want to continue.\n""") + if input('Do you want to continue? ') != 'YES': + self.exit_code = 1 + return self.exit_code + repository.destroy() + print("Repository deleted.") cache.destroy() - print("Repository and corresponding cache were deleted.") + print("Cache deleted.") return self.exit_code def do_mount(self, args): @@ -811,6 +813,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('-s', '--stats', dest='stats', action='store_true', default=False, help='print statistics for the deleted archive') + subparser.add_argument('-c', '--cache-only', dest='cache_only', + action='store_true', default=False, + help='delete only the local cache for the given repository') subparser.add_argument('target', metavar='TARGET', nargs='?', default='', type=location_validator(), help='archive or repository to delete') From 6f637bed2f12887edeb01b3c507d53030b6f4cbf Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 5 Oct 2015 02:27:24 +0200 Subject: [PATCH 136/151] LoggedIO: deduplicated code, improved checks and error handling in read() Code shared by read() and iter_objects() was moved into _read(). Compared to read()'s previous state, this improved: - fixed size check to avoid read with negative size - exception handler for struct unpack - checking for short read - more precise exception messages --- borg/repository.py | 60 +++++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/borg/repository.py b/borg/repository.py index ee3074311..747cc3d0a 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -534,26 +534,9 @@ class LoggedIO: offset = MAGIC_LEN header = fd.read(self.header_fmt.size) while header: - try: - crc, size, tag = self.header_fmt.unpack(header) - except struct.error as err: - raise IntegrityError('Invalid segment entry header [offset {}]: {}'.format(offset, err)) - if size > MAX_OBJECT_SIZE or size < self.header_fmt.size: - raise IntegrityError('Invalid segment entry size [offset {}]'.format(offset)) - length = size - self.header_fmt.size - rest = fd.read(length) - if len(rest) != length: - raise IntegrityError('Segment entry data short read [offset {}]: expected: {}, got {} bytes'.format( - offset, length, len(rest))) - if crc32(rest, crc32(memoryview(header)[4:])) & 0xffffffff != crc: - raise IntegrityError('Segment entry checksum mismatch [offset {}]'.format(offset)) - if tag not in (TAG_PUT, TAG_DELETE, TAG_COMMIT): - raise IntegrityError('Invalid segment entry tag [offset {}]'.format(offset)) - key = None - if tag in (TAG_PUT, TAG_DELETE): - key = rest[:32] + size, tag, key, data = self._read(fd, self.header_fmt, header, offset, (TAG_PUT, TAG_DELETE, TAG_COMMIT)) if include_data: - yield tag, key, offset, rest[32:] + yield tag, key, offset, data else: yield tag, key, offset offset += size @@ -586,16 +569,39 @@ class LoggedIO: fd = self.get_fd(segment) fd.seek(offset) header = fd.read(self.put_header_fmt.size) - crc, size, tag, key = self.put_header_fmt.unpack(header) - if size > MAX_OBJECT_SIZE: - raise IntegrityError('Invalid segment object size') - data = fd.read(size - self.put_header_fmt.size) - if crc32(data, crc32(memoryview(header)[4:])) & 0xffffffff != crc: - raise IntegrityError('Segment checksum mismatch') - if tag != TAG_PUT or id != key: - raise IntegrityError('Invalid segment entry header') + size, tag, key, data = self._read(fd, self.put_header_fmt, header, offset, (TAG_PUT, )) + if id != key: + raise IntegrityError('Invalid segment entry header, is not for wanted id [offset {}]'.format(offset)) return data + def _read(self, fd, fmt, header, offset, acceptable_tags): + # some code shared by read() and iter_objects() + try: + hdr_tuple = fmt.unpack(header) + except struct.error as err: + raise IntegrityError('Invalid segment entry header [offset {}]: {}'.format(offset, err)) + if fmt is self.put_header_fmt: + crc, size, tag, key = hdr_tuple + elif fmt is self.header_fmt: + crc, size, tag = hdr_tuple + key = None + else: + raise TypeError("_read called with unsupported format") + if size > MAX_OBJECT_SIZE or size < fmt.size: + raise IntegrityError('Invalid segment entry size [offset {}]'.format(offset)) + length = size - fmt.size + data = fd.read(length) + if len(data) != length: + raise IntegrityError('Segment entry data short read [offset {}]: expected: {}, got {} bytes'.format( + offset, length, len(data))) + if crc32(data, crc32(memoryview(header)[4:])) & 0xffffffff != crc: + raise IntegrityError('Segment entry checksum mismatch [offset {}]'.format(offset)) + if tag not in acceptable_tags: + raise IntegrityError('Invalid segment entry header, did not get acceptable tag [offset {}]'.format(offset)) + if key is None and tag in (TAG_PUT, TAG_DELETE): + key, data = data[:32], data[32:] + return size, tag, key, data + def write_put(self, id, data): size = len(data) + self.put_header_fmt.size fd = self.get_write_fd() From c50f32426b4f993a8dcc80cd6b90799f3bce7382 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 5 Oct 2015 23:23:59 +0200 Subject: [PATCH 137/151] do not crash on empty lock.roster, fixes #232 --- borg/locking.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/borg/locking.py b/borg/locking.py index 8e4f1a41f..b2beac345 100644 --- a/borg/locking.py +++ b/borg/locking.py @@ -169,6 +169,9 @@ class LockRoster: if err.errno != errno.ENOENT: raise data = {} + except ValueError: + # corrupt/empty roster file? + data = {} return data def save(self, data): From 427ddd64a6734203bc09a92473fa85fe85f645c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Mon, 5 Oct 2015 17:50:46 -0400 Subject: [PATCH 138/151] respect XDG_CACHE_HOME fixes attic#181 --- borg/helpers.py | 4 ++-- borg/testsuite/helpers.py | 12 +++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index f9450c1b8..47d454bec 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -172,8 +172,8 @@ def get_keys_dir(): def get_cache_dir(): """Determine where to repository keys and cache""" - return os.environ.get('BORG_CACHE_DIR', - os.path.join(os.path.expanduser('~'), '.cache', 'borg')) + xdg_cache = os.environ.get('XDG_CACHE_HOME', os.path.join(os.path.expanduser('~'), '.cache')) + return os.environ.get('BORG_CACHE_DIR', os.path.join(xdg_cache, 'borg')) def to_localtime(ts): diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 25ec48c90..4d36eb9ef 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -1,13 +1,14 @@ import hashlib from time import mktime, strptime from datetime import datetime, timezone, timedelta +import os import pytest import sys import msgpack from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, \ - prune_within, prune_split, \ + prune_within, prune_split, get_cache_dir, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams from . import BaseTestCase @@ -381,3 +382,12 @@ class TestParseTimestamp(BaseTestCase): def test(self): self.assert_equal(parse_timestamp('2015-04-19T20:25:00.226410'), datetime(2015, 4, 19, 20, 25, 0, 226410, timezone.utc)) self.assert_equal(parse_timestamp('2015-04-19T20:25:00'), datetime(2015, 4, 19, 20, 25, 0, 0, timezone.utc)) + + +def test_get_cache_dir(): + """test that get_cache_dir respects environement""" + assert get_cache_dir() == os.path.join(os.path.expanduser('~'), '.cache', 'borg') + os.environ['XDG_CACHE_HOME'] = '/var/tmp/.cache' + assert get_cache_dir() == os.path.join('/var/tmp/.cache', 'borg') + os.environ['BORG_CACHE_DIR'] = '/var/tmp' + assert get_cache_dir() == '/var/tmp' From de2a81160685099a835a7f3dfa3eb3c55cb5f19f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Mon, 5 Oct 2015 18:43:54 -0400 Subject: [PATCH 139/151] move RemoteRepository defaults to the class the reasoning behind this is that we may need to test a RemoteRepository setup outside of the main archiver routines, which the current default location makes impossible by moving the umask and remote_path remotes into the RemoteRepository the (reasonable) defaults are available regardless of the (currently obscure) initialisation routine, and make unit tests easier to develop and support --- borg/archiver.py | 8 ++++---- borg/remote.py | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index fbdf210a1..57e30760e 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -571,10 +571,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") help='verbose output') common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false', help='do not load/update the file metadata cache used to detect unchanged files') - common_parser.add_argument('--umask', dest='umask', type=lambda s: int(s, 8), default=0o077, metavar='M', - help='set umask to M (local and remote, default: 0o077)') - common_parser.add_argument('--remote-path', dest='remote_path', default='borg', metavar='PATH', - help='set remote path to executable (default: "borg")') + common_parser.add_argument('--umask', dest='umask', type=lambda s: int(s, 8), default=RemoteRepository.umask, metavar='M', + help='set umask to M (local and remote, default: %(default)s)') + common_parser.add_argument('--remote-path', dest='remote_path', default=RemoteRepository.remote_path, metavar='PATH', + help='set remote path to executable (default: "%(default)s")') # We can't use argparse for "serve" since we don't want it to show up in "Available commands" if args: diff --git a/borg/remote.py b/borg/remote.py index 3a274b214..ce77b8245 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -108,8 +108,9 @@ class RepositoryServer: # pragma: no cover class RemoteRepository: extra_test_args = [] - remote_path = None - umask = None + remote_path = 'borg' + # default umask, overriden by --umask, defaults to read/write only for owner + umask = 0o077 class RPCError(Exception): def __init__(self, name): From 43a65933f7d3e1caa664a84a130c7bd4051bff8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Mon, 5 Oct 2015 18:51:20 -0400 Subject: [PATCH 140/151] move ssh generation code to a stub, add unit test --- borg/remote.py | 34 ++++++++++++++++++++++------------ borg/testsuite/repository.py | 4 ++++ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/borg/remote.py b/borg/remote.py index ce77b8245..19a1416a0 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -126,19 +126,14 @@ class RemoteRepository: self.responses = {} self.unpacker = msgpack.Unpacker(use_list=False) self.p = None - # use local umask also for the remote process - umask = ['--umask', '%03o' % self.umask] + # XXX: ideally, the testsuite would subclass Repository and + # override ssh_cmd() instead of this crude hack, although + # __testsuite__ is not a valid domain name so this is pretty + # safe. if location.host == '__testsuite__': - args = [sys.executable, '-m', 'borg.archiver', 'serve'] + umask + self.extra_test_args - else: # pragma: no cover - args = ['ssh'] - if location.port: - args += ['-p', str(location.port)] - if location.user: - args.append('%s@%s' % (location.user, location.host)) - else: - args.append('%s' % location.host) - args += [self.remote_path, 'serve'] + umask + args = [sys.executable, '-m', 'borg.archiver', 'serve' ] + self.extra_test_args + else: + args = self.ssh_cmd() self.p = Popen(args, bufsize=0, stdin=PIPE, stdout=PIPE) self.stdin_fd = self.p.stdin.fileno() self.stdout_fd = self.p.stdout.fileno() @@ -161,6 +156,21 @@ class RemoteRepository: def __repr__(self): return '<%s %s>' % (self.__class__.__name__, self.location.canonical_path()) + def umask_flag(self): + return ['--umask', '%03o' % self.umask] + + def ssh_cmd(self, location): + args = ['ssh'] + if location.port: + args += ['-p', str(location.port)] + if location.user: + args.append('%s@%s' % (location.user, location.host)) + else: + args.append('%s' % location.host) + # use local umask also for the remote process + args += [self.remote_path, 'serve'] + self.umask_flag() + return args + def call(self, cmd, *args, **kw): for resp in self.call_many(cmd, [args], **kw): return resp diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 74996b717..5df0a6f97 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -325,6 +325,10 @@ class RemoteRepositoryTestCase(RepositoryTestCase): def test_invalid_rpc(self): self.assert_raises(InvalidRPCMethod, lambda: self.repository.call('__init__', None)) + def test_ssh_cmd(self): + assert self.repository.umask is not None + assert self.repository.ssh_cmd(Location('example.com:foo')) == ['ssh', 'example.com', 'borg', 'serve'] + self.repository.umask_flag() + class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase): From a0ef4e25ddbc36e2004a8fa6f035890b8cb17e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Mon, 5 Oct 2015 18:54:00 -0400 Subject: [PATCH 141/151] add support for arbitrary SSH commands (attic#99) while SSH options can be specified through `~/.ssh/config`, some users may want to use a completely different SSH command for their backups, without overriding their $PATH variable. it may also be easier to do ad-hoc configuration and tests that way. plus, the POLA tells us that users expects something like this to be supported by commands that talk to ssh. it is supported by rsync, git and so on. --- borg/remote.py | 3 ++- borg/testsuite/repository.py | 2 ++ docs/usage.rst | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/borg/remote.py b/borg/remote.py index 19a1416a0..c9d8145ba 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -3,6 +3,7 @@ import fcntl import msgpack import os import select +import shlex from subprocess import Popen, PIPE import sys import tempfile @@ -160,7 +161,7 @@ class RemoteRepository: return ['--umask', '%03o' % self.umask] def ssh_cmd(self, location): - args = ['ssh'] + args = shlex.split(os.environ.get('BORG_RSH', 'ssh')) if location.port: args += ['-p', str(location.port)] if location.user: diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 5df0a6f97..5a1524ed9 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -328,6 +328,8 @@ class RemoteRepositoryTestCase(RepositoryTestCase): def test_ssh_cmd(self): assert self.repository.umask is not None assert self.repository.ssh_cmd(Location('example.com:foo')) == ['ssh', 'example.com', 'borg', 'serve'] + self.repository.umask_flag() + os.environ['BORG_RSH'] = 'ssh --foo' + assert self.repository.ssh_cmd(Location('example.com:foo')) == ['ssh', '--foo', 'example.com', 'borg', 'serve'] + self.repository.umask_flag() class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase): diff --git a/docs/usage.rst b/docs/usage.rst index 95b95d90d..6bd292e14 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -48,6 +48,8 @@ General: can either leave it away or abbreviate as `::`, if a positional parameter is required. BORG_PASSPHRASE When set, use the value to answer the passphrase question for encrypted repositories. + BORG_RSH + When set, use this command instead of ``ssh``. TMPDIR where temporary files are stored (might need a lot of temporary space for some operations) From 8f0de2cab75eaeeff6e0649ba18b859a694dceb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Mon, 5 Oct 2015 19:05:27 -0400 Subject: [PATCH 142/151] fix tests on travis, which seem to set BORG_CACHE_DIR --- borg/testsuite/helpers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 4d36eb9ef..620a77c14 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -386,8 +386,16 @@ class TestParseTimestamp(BaseTestCase): def test_get_cache_dir(): """test that get_cache_dir respects environement""" + # reset BORG_CACHE_DIR in order to test default + old_env = None + if os.environ.get('BORG_CACHE_DIR'): + old_env = os.environ['BORG_CACHE_DIR'] + del(os.environ['BORG_CACHE_DIR']) assert get_cache_dir() == os.path.join(os.path.expanduser('~'), '.cache', 'borg') os.environ['XDG_CACHE_HOME'] = '/var/tmp/.cache' assert get_cache_dir() == os.path.join('/var/tmp/.cache', 'borg') os.environ['BORG_CACHE_DIR'] = '/var/tmp' assert get_cache_dir() == '/var/tmp' + # reset old env + if old_env is not None: + os.environ['BORG_CACHE_DIR'] = old_env From a7b70d87cdbecd53decb9d5d33f407a7347b7a7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Mon, 5 Oct 2015 19:22:33 -0400 Subject: [PATCH 143/151] complete test coverage for SSH args parsing --- borg/remote.py | 2 +- borg/testsuite/repository.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/borg/remote.py b/borg/remote.py index c9d8145ba..8001abe2c 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -133,7 +133,7 @@ class RemoteRepository: # safe. if location.host == '__testsuite__': args = [sys.executable, '-m', 'borg.archiver', 'serve' ] + self.extra_test_args - else: + else: # pragma: no cover args = self.ssh_cmd() self.p = Popen(args, bufsize=0, stdin=PIPE, stdout=PIPE) self.stdin_fd = self.p.stdin.fileno() diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 5a1524ed9..2b99b83d6 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -328,6 +328,9 @@ class RemoteRepositoryTestCase(RepositoryTestCase): def test_ssh_cmd(self): assert self.repository.umask is not None assert self.repository.ssh_cmd(Location('example.com:foo')) == ['ssh', 'example.com', 'borg', 'serve'] + self.repository.umask_flag() + assert self.repository.ssh_cmd(Location('ssh://example.com/foo')) == ['ssh', 'example.com', 'borg', 'serve'] + self.repository.umask_flag() + assert self.repository.ssh_cmd(Location('ssh://user@example.com/foo')) == ['ssh', 'user@example.com', 'borg', 'serve'] + self.repository.umask_flag() + assert self.repository.ssh_cmd(Location('ssh://user@example.com:1234/foo')) == ['ssh', '-p', '1234', 'user@example.com', 'borg', 'serve'] + self.repository.umask_flag() os.environ['BORG_RSH'] = 'ssh --foo' assert self.repository.ssh_cmd(Location('example.com:foo')) == ['ssh', '--foo', 'example.com', 'borg', 'serve'] + self.repository.umask_flag() From 8ddc448f41c42bb5dae71bc9f4e9d6abe801dc2c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 6 Oct 2015 20:35:22 +0200 Subject: [PATCH 144/151] make sure to always give segment and offset in repo IntegrityError exception messages this was only handled correctly at one place, by adding the segment number afterwards. now the segment number is always included. --- borg/repository.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/borg/repository.py b/borg/repository.py index 747cc3d0a..932e4fef3 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -301,7 +301,7 @@ class Repository: try: objects = list(self.io.iter_objects(segment)) except IntegrityError as err: - report_error('Error reading segment {}: {}'.format(segment, err)) + report_error(str(err)) objects = [] if repair: self.io.recover_segment(segment, filename) @@ -530,11 +530,12 @@ class LoggedIO: fd = self.get_fd(segment) fd.seek(0) if fd.read(MAGIC_LEN) != MAGIC: - raise IntegrityError('Invalid segment magic') + raise IntegrityError('Invalid segment magic [segment {}, offset {}]'.format(segment, 0)) offset = MAGIC_LEN header = fd.read(self.header_fmt.size) while header: - size, tag, key, data = self._read(fd, self.header_fmt, header, offset, (TAG_PUT, TAG_DELETE, TAG_COMMIT)) + size, tag, key, data = self._read(fd, self.header_fmt, header, segment, offset, + (TAG_PUT, TAG_DELETE, TAG_COMMIT)) if include_data: yield tag, key, offset, data else: @@ -569,17 +570,19 @@ class LoggedIO: fd = self.get_fd(segment) fd.seek(offset) header = fd.read(self.put_header_fmt.size) - size, tag, key, data = self._read(fd, self.put_header_fmt, header, offset, (TAG_PUT, )) + size, tag, key, data = self._read(fd, self.put_header_fmt, header, segment, offset, (TAG_PUT, )) if id != key: - raise IntegrityError('Invalid segment entry header, is not for wanted id [offset {}]'.format(offset)) + raise IntegrityError('Invalid segment entry header, is not for wanted id [segment {}, offset {}]'.format( + segment, offset)) return data - def _read(self, fd, fmt, header, offset, acceptable_tags): + def _read(self, fd, fmt, header, segment, offset, acceptable_tags): # some code shared by read() and iter_objects() try: hdr_tuple = fmt.unpack(header) except struct.error as err: - raise IntegrityError('Invalid segment entry header [offset {}]: {}'.format(offset, err)) + raise IntegrityError('Invalid segment entry header [segment {}, offset {}]: {}'.format( + segment, offset, err)) if fmt is self.put_header_fmt: crc, size, tag, key = hdr_tuple elif fmt is self.header_fmt: @@ -588,16 +591,19 @@ class LoggedIO: else: raise TypeError("_read called with unsupported format") if size > MAX_OBJECT_SIZE or size < fmt.size: - raise IntegrityError('Invalid segment entry size [offset {}]'.format(offset)) + raise IntegrityError('Invalid segment entry size [segment {}, offset {}]'.format( + segment, offset)) length = size - fmt.size data = fd.read(length) if len(data) != length: - raise IntegrityError('Segment entry data short read [offset {}]: expected: {}, got {} bytes'.format( - offset, length, len(data))) + raise IntegrityError('Segment entry data short read [segment {}, offset {}]: expected {}, got {} bytes'.format( + segment, offset, length, len(data))) if crc32(data, crc32(memoryview(header)[4:])) & 0xffffffff != crc: - raise IntegrityError('Segment entry checksum mismatch [offset {}]'.format(offset)) + raise IntegrityError('Segment entry checksum mismatch [segment {}, offset {}]'.format( + segment, offset)) if tag not in acceptable_tags: - raise IntegrityError('Invalid segment entry header, did not get acceptable tag [offset {}]'.format(offset)) + raise IntegrityError('Invalid segment entry header, did not get acceptable tag [segment {}, offset {}]'.format( + segment, offset)) if key is None and tag in (TAG_PUT, TAG_DELETE): key, data = data[:32], data[32:] return size, tag, key, data From ee66c4c4354e7fb055485d49563be10bcd411cd8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 6 Oct 2015 21:49:21 +0200 Subject: [PATCH 145/151] remove docs about binary wheels we stop supporting them, because there are better alternatives: - use a distribution package (from your linux distribution), if available - use a pyinstaller binary provided by us (they include all you need in 1 file and thus have better compatibility properties and are easier to install than a wheel) - install from source (pypi or git) if everything else fails --- docs/development.rst | 14 -------------- docs/installation.rst | 34 +--------------------------------- 2 files changed, 1 insertion(+), 47 deletions(-) diff --git a/docs/development.rst b/docs/development.rst index 03a4b735e..409a63bdf 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -119,23 +119,9 @@ Checklist:: - Twitter - IRC channel (topic) -- create binary wheels and link them from issue tracker: https://github.com/borgbackup/borg/issues/147 - create standalone binaries and link them from issue tracker: https://github.com/borgbackup/borg/issues/214 -Creating binary wheels ----------------------- - -With virtual env activated:: - - pip install -U wheel - python setup.py bdist_wheel - ls -l dist/*.whl - -Note: Binary wheels are rather specific for the platform they get built on. - E.g. a wheel built for Ubuntu 14.04 64bit likely will not work on Centos7 64bit. - - Creating standalone binaries ---------------------------- diff --git a/docs/installation.rst b/docs/installation.rst index e0608027c..50957d17a 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -29,11 +29,8 @@ Below, we describe different ways to install |project_name|. binary package (for your Linux/*BSD/OS X/... distribution). - **pyinstaller binary** - easy and fast, we provide a ready-to-use binary file that just works on the supported platforms -- **wheel** - easy and fast, needs a platform specific borgbackup binary wheel, - which matches your platform [OS and CPU]). - **pypi** - installing a source package from pypi needs more installation steps - and will compile stuff - try this if there is no binary wheel that works for - you. + and will need a compiler, development headers, etc.. - **git** - for developers and power users who want to have the latest code or use revision control (each release is tagged). @@ -91,35 +88,6 @@ It is supposed to work without requiring installation or preparations. Check https://github.com/borgbackup/borg/issues/214 for available binaries. -Debian Jessie / Ubuntu 14.04 preparations (wheel) -------------------------------------------------- - -.. parsed-literal:: - - # Python stuff we need - apt-get install python3 python3-pip - - # Libraries we need (fuse is optional) - apt-get install openssl libacl1 liblz4-1 fuse - - -Installation (wheel) --------------------- - -This uses the latest binary wheel release. - -.. parsed-literal:: - - # Check https://github.com/borgbackup/borg/issues/147 for the correct - # platform-specific binary wheel, download and install it: - - # system-wide installation, needs sudo/root permissions: - sudo pip install borgbackup.whl - - # home directory installation, no sudo/root needed: - pip install --user borgbackup.whl - - Debian Jessie / Ubuntu 14.04 preparations (git/pypi) ---------------------------------------------------- From 28a85bf0aa0359bd3ac0f1bd9d898b8e09487533 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 6 Oct 2015 21:53:20 +0200 Subject: [PATCH 146/151] update website sidebar link also --- docs/_themes/local/sidebarusefullinks.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/_themes/local/sidebarusefullinks.html b/docs/_themes/local/sidebarusefullinks.html index 368dee25f..47de85364 100644 --- a/docs/_themes/local/sidebarusefullinks.html +++ b/docs/_themes/local/sidebarusefullinks.html @@ -5,7 +5,7 @@
  • Main Web Site
  • PyPI packages
  • -
  • Binary Packages
  • +
  • Binaries
  • Current ChangeLog
  • GitHub
  • Issue Tracker
  • From 10db8c1d9bc5b0e4050ac4cf0a673c6ced6f27ad Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 6 Oct 2015 22:55:48 +0200 Subject: [PATCH 147/151] update CHANGES.rst --- CHANGES.rst | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index b2c3af457..016a55348 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,41 @@ Borg Changelog ============== +Version 0.27.0 +-------------- + +New features: + +- "borg upgrade" command - attic -> borg one time converter / migration, #21 +- temporary hack to avoid using lots of disk space for chunks.archive.d, #235: + To use it: rm -rf chunks.archive.d ; touch chunks.archive.d +- respect XDG_CACHE_HOME, attic #181 +- add support for arbitrary SSH commands, attic #99 +- borg delete --cache-only REPO (only delete cache, not REPO), attic #123 + + +Bug fixes: + +- use Debian 7 (wheezy) to build pyinstaller borgbackup binaries, fixes slow + down observed when running the Centos6-built binary on Ubuntu, #222 +- do not crash on empty lock.roster, fixes #232 +- fix multiple issues with the cache config version check, #234 +- fix segment entry header size check, attic #352 + plus other error handling improvements / code deduplication there. +- always give segment and offset in repo IntegrityErrors + + +Other changes: + +- stop producing binary wheels, remove docs about it, #147 +- docs: + - add warning about prune + - generate usage include files only as needed + - development docs: add Vagrant section + - update / improve / reformat FAQ + - hint to single-file pyinstaller binaries from README + + Version 0.26.1 -------------- From 190eed6bb9b695aaab5c2d2e53a7f9cdb63e8a3d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 6 Oct 2015 23:28:56 +0200 Subject: [PATCH 148/151] Vagrant: check out pyinstaller code from master branch they just recently released 3.0 and that is in master now --- Vagrantfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index d179bdd6c..45a0e7e85 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -204,7 +204,7 @@ def install_pyinstaller(boxname) . borg-env/bin/activate git clone https://github.com/pyinstaller/pyinstaller.git cd pyinstaller - git checkout develop + git checkout master pip install -e . EOF end @@ -216,7 +216,7 @@ def install_pyinstaller_bootloader(boxname) . borg-env/bin/activate git clone https://github.com/pyinstaller/pyinstaller.git cd pyinstaller - git checkout python3 + git checkout master # build bootloader, if it is not included cd bootloader python ./waf all From a4967ec5829ee599482d89a9b9cfc227cf16997f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 7 Oct 2015 03:32:55 +0200 Subject: [PATCH 149/151] ssh_cmd: fix wrong caller, fixes #255 --- borg/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/remote.py b/borg/remote.py index 8001abe2c..b9847c7e4 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -134,7 +134,7 @@ class RemoteRepository: if location.host == '__testsuite__': args = [sys.executable, '-m', 'borg.archiver', 'serve' ] + self.extra_test_args else: # pragma: no cover - args = self.ssh_cmd() + args = self.ssh_cmd(location) self.p = Popen(args, bufsize=0, stdin=PIPE, stdout=PIPE) self.stdin_fd = self.p.stdin.fileno() self.stdout_fd = self.p.stdout.fileno() From 81423071d7bb8abd198d6baa98edb0361d278769 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 7 Oct 2015 03:39:46 +0200 Subject: [PATCH 150/151] vagrant: llfuse install on darwin needs pkgconfig installed --- Vagrantfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Vagrantfile b/Vagrantfile index 45a0e7e85..72dfdfddd 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -62,6 +62,7 @@ def packages_darwin brew install lz4 brew install fakeroot brew install git + brew install pkgconfig touch ~vagrant/.bash_profile ; chown vagrant ~vagrant/.bash_profile EOF end From 6299f2d02ce2eb8a463f14c6776933d295a6f662 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 7 Oct 2015 03:42:08 +0200 Subject: [PATCH 151/151] docs: pyinstaller 3.0 is released now this or any later 3.x or git master checkout should work. --- docs/development.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/development.rst b/docs/development.rst index 409a63bdf..9b4c0d893 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -129,7 +129,7 @@ Make sure you have everything built and installed (including llfuse and fuse). With virtual env activated:: - pip install pyinstaller==3.0.dev2 # or a later 3.x release or git checkout + pip install pyinstaller>=3.0 # or git checkout master pyinstaller -F -n borg-PLATFORM --hidden-import=logging.config borg/__main__.py ls -l dist/*