From 7c6f3ece662709ab596469d173ec5f4826e601ce Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Thu, 20 Aug 2015 15:55:12 +0100 Subject: [PATCH 01/17] Initialize chunker fd to -1, so it's not equal to STDIN_FILENO (0) --- borg/_chunker.c | 1 + 1 file changed, 1 insertion(+) diff --git a/borg/_chunker.c b/borg/_chunker.c index 5e599ed89..9dbed1fa5 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -96,6 +96,7 @@ chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32 c->table = buzhash_init_table(seed); c->buf_size = max_size; c->data = malloc(c->buf_size); + c->fh = -1; return c; } From ce3e67cb96f5a189a2f93d5d4847d7dd4b5aea78 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Thu, 20 Aug 2015 17:19:48 +0100 Subject: [PATCH 02/17] chunker - fix 4GB files on 32-bit systems From code inspection - effect not actually tested. --- borg/_chunker.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 9dbed1fa5..8242a2243 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -83,7 +83,8 @@ typedef struct { PyObject *fd; int fh; int done, eof; - size_t remaining, bytes_read, bytes_yielded, position, last; + size_t remaining, position, last; + off_t bytes_read, bytes_yielded; } Chunker; static Chunker * From 59a44296e4e6aace3c0fe0154fc1a27a7a75bee6 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Thu, 20 Aug 2015 17:48:59 +0100 Subject: [PATCH 03/17] chunker - cast from size_t to off_t can now be removed Sorry, this should really have been part of the previous commit - it's why I noticed a problem. --- borg/_chunker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 8242a2243..23abf1e6c 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -163,7 +163,7 @@ chunker_fill(Chunker *c) // size limit) kick out data from the cache that might be still useful // for the OS or other processes. if (length > 0) { - posix_fadvise(c->fh, (off_t) offset, (off_t) length, POSIX_FADV_DONTNEED); + posix_fadvise(c->fh, offset, length, POSIX_FADV_DONTNEED); } #endif } From 0b1035746e3382663b24308ea2b53e8a3f426dc0 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 00:29:46 +0200 Subject: [PATCH 04/17] read special files as if they were regular files, update docs, closes #79 do not use the files cache for such special files --- borg/archiver.py | 14 ++++++++++---- borg/cache.py | 5 +++-- docs/usage.rst | 10 ++++++++++ 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 8cce07b8b..e0e7a94f6 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -145,7 +145,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") continue else: restrict_dev = None - self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev) + self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev, + read_special=args.read_special) archive.save(timestamp=args.timestamp) if args.progress: archive.stats.show_progress(final=True) @@ -163,7 +164,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") print('-' * 78) return self.exit_code - def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev): + def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev, + read_special=False): if exclude_path(path, excludes): return try: @@ -180,7 +182,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") # Ignore if nodump flag is set if has_lchflags and (st.st_flags & stat.UF_NODUMP): return - if stat.S_ISREG(st.st_mode): + if (stat.S_ISREG(st.st_mode) or + read_special and not stat.S_ISDIR(st.st_mode)): try: status = archive.process_file(path, st, cache) except IOError as e: @@ -197,7 +200,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") for filename in sorted(entries): entry_path = os.path.normpath(os.path.join(path, filename)) self._process(archive, cache, excludes, exclude_caches, skip_inodes, - entry_path, restrict_dev) + entry_path, restrict_dev, read_special=read_special) elif stat.S_ISLNK(st.st_mode): status = archive.process_symlink(path, st) elif stat.S_ISFIFO(st.st_mode): @@ -687,6 +690,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") 'zlib,0 .. zlib,9 == zlib (with level 0..9), ' 'lzma == lzma (default level 6), ' 'lzma,0 .. lzma,9 == lzma (with level 0..9).') + subparser.add_argument('--read-special', dest='read_special', + action='store_true', default=False, + help='open and read special files as if they were regular files') subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to create') diff --git a/borg/cache.py b/borg/cache.py index 2391be275..639ffc279 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -3,6 +3,7 @@ from .remote import cache_if_remote import errno import msgpack import os +import stat import sys from binascii import hexlify import shutil @@ -381,7 +382,7 @@ class Cache: stats.update(-size, -csize, False) def file_known_and_unchanged(self, path_hash, st): - if not self.do_files: + if not (self.do_files and stat.S_ISREG(st.st_mode)): return None if self.files is None: self._read_files() @@ -398,7 +399,7 @@ class Cache: return None def memorize_file(self, path_hash, st, ids): - if not self.do_files: + if not (self.do_files and stat.S_ISREG(st.st_mode)): return # Entry: Age, inode, size, mtime, chunk ids mtime_ns = st_mtime_ns(st) diff --git a/docs/usage.rst b/docs/usage.rst index 8595ca7f8..27c258504 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -210,6 +210,11 @@ Examples # Even slower, even higher compression (N = 0..9) $ borg create --compression lzma,N /mnt/backup::repo ~ + # Backup some LV snapshots (you have to create the snapshots before this + # and remove them afterwards). We also backup the output of lvdisplay so + # we can see the LV sizes at restore time. See also "borg extract" examples. + $ lvdisplay > lvdisplay.txt + $ borg create --read-special /mnt/backup::repo lvdisplay.txt /dev/vg0/*-snapshot .. include:: usage/extract.rst.inc @@ -229,6 +234,11 @@ Examples # Extract the "src" directory but exclude object files $ borg extract /mnt/backup::my-files home/USERNAME/src --exclude '*.o' + # Restore LV snapshots (the target LVs /dev/vg0/* of correct size have + # to be already available and will be overwritten by this command!) + $ borg extract --stdout /mnt/backup::repo dev/vg0/root-snapshot > /dev/vg0/root + $ borg extract --stdout /mnt/backup::repo dev/vg0/home-snapshot > /dev/vg0/home + Note: currently, extract always writes into the current working directory ("."), so make sure you ``cd`` to the right place before calling ``borg extract``. From a912c027573ea031094de91827434e64bb0a3675 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 01:10:43 +0200 Subject: [PATCH 05/17] detect inconsistency / corruption / hash collision, closes #170 added a check that compares the size of the new chunk with the stored size of the already existing chunk in storage that has the same id_hash value. raise an exception if there is a size mismatch. this could happen if: - the stored size is somehow incorrect (corruption or software bug) - we found a hash collision for the id_hash (for sha256, this is very unlikely) --- borg/cache.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/borg/cache.py b/borg/cache.py index 2391be275..def181629 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -347,9 +347,9 @@ class Cache: def add_chunk(self, id, data, stats): if not self.txn_active: self.begin_txn() - if self.seen_chunk(id): - return self.chunk_incref(id, stats) size = len(data) + if self.seen_chunk(id, size): + return self.chunk_incref(id, stats) data = self.key.encrypt(data) csize = len(data) self.repository.put(id, data, wait=False) @@ -357,8 +357,14 @@ class Cache: stats.update(size, csize, True) return id, size, csize - def seen_chunk(self, id): - return self.chunks.get(id, (0, 0, 0))[0] + def seen_chunk(self, id, size=None): + refcount, stored_size, _ = self.chunks.get(id, (0, None, None)) + if size is not None and stored_size is not None and size != stored_size: + # we already have a chunk with that id, but different size. + # this is either a hash collision (unlikely) or corruption or a bug. + raise Exception("chunk has same id [%r], but different size (stored: %d new: %d)!" % ( + id, stored_size, size)) + return refcount def chunk_incref(self, id, stats): if not self.txn_active: From b3f5231bac387741aad790b6840697a24695d61c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 18:18:24 +0200 Subject: [PATCH 06/17] BORG_REPO env var support sets the default repository to use, e.g. like: export BORG_REPO=/mnt/backup/repo borg init borg create ::archive borg list borg mount :: /mnt fusermount -u /mnt borg delete ::archive --- borg/archiver.py | 15 +++-- borg/helpers.py | 25 ++++++- borg/testsuite/helpers.py | 133 +++++++++++++++++++++++++++++--------- docs/usage.rst | 12 ++-- 4 files changed, 141 insertions(+), 44 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 8cce07b8b..6a1e40b7e 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -18,7 +18,7 @@ from .compress import Compressor, COMPR_BUFFER from .repository import Repository from .cache import Cache from .key import key_creator -from .helpers import Error, location_validator, format_time, format_file_size, \ +from .helpers import Error, location_validator, Location, format_time, format_file_size, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ @@ -556,7 +556,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") description=self.do_init.__doc__, epilog=init_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) subparser.set_defaults(func=self.do_init) - subparser.add_argument('repository', metavar='REPOSITORY', + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default=Location(), type=location_validator(archive=False), help='repository to create') subparser.add_argument('-e', '--encryption', dest='encryption', @@ -604,7 +604,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") epilog=check_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) subparser.set_defaults(func=self.do_check) - subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE', + subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default=Location(), type=location_validator(), help='repository or archive to check consistency of') subparser.add_argument('--repository-only', dest='repo_only', action='store_true', @@ -629,7 +629,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") epilog=change_passphrase_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) subparser.set_defaults(func=self.do_change_passphrase) - subparser.add_argument('repository', metavar='REPOSITORY', + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default=Location(), type=location_validator(archive=False)) create_epilog = textwrap.dedent(""" @@ -760,7 +760,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('-s', '--stats', dest='stats', action='store_true', default=False, help='print statistics for the deleted archive') - subparser.add_argument('target', metavar='TARGET', + subparser.add_argument('target', metavar='TARGET', nargs='?', default=Location(), type=location_validator(), help='archive or repository to delete') @@ -775,7 +775,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('--short', dest='short', action='store_true', default=False, help='only print file/directory names, nothing else') - subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', type=location_validator(), + subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default=Location(), + type=location_validator(), help='repository/archive to list contents of') mount_epilog = textwrap.dedent(""" This command mounts an archive as a FUSE filesystem. This can be useful for @@ -858,7 +859,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") help='number of yearly archives to keep') subparser.add_argument('-p', '--prefix', dest='prefix', type=str, help='only consider archive names starting with this prefix') - subparser.add_argument('repository', metavar='REPOSITORY', + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default=Location(), type=location_validator(archive=False), help='repository to prune') diff --git a/borg/helpers.py b/borg/helpers.py index 6d2b81736..aa5bead0b 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -466,13 +466,34 @@ class Location: r'(?P[^:]+)(?:::(?P.+))?$') scp_re = re.compile(r'((?:(?P[^@]+)@)?(?P[^:/]+):)?' r'(?P[^:]+)(?:::(?P.+))?$') + # get the repo from BORG_RE env and the optional archive from param. + # if the syntax requires giving REPOSITORY (see "borg mount"), + # use "::" to let it use the env var. + # if REPOSITORY argument is optional, it'll automatically use the env. + env_re = re.compile(r'(?:::(?P.+)?)?$') - def __init__(self, text): + def __init__(self, text=''): self.orig = text - if not self.parse(text): + if not self.parse(self.orig): raise ValueError def parse(self, text): + valid = self._parse(text) + if valid: + return True + m = self.env_re.match(text) + if not m: + return False + repo = os.environ.get('BORG_REPO') + if repo is None: + return False + valid = self._parse(repo) + if not valid: + return False + self.archive = m.group('archive') + return True + + def _parse(self, text): m = self.ssh_re.match(text) if m: self.proto = m.group('proto') diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index b61a8268f..95531df83 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -23,42 +23,115 @@ class BigIntTestCase(BaseTestCase): self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70) -class LocationTestCase(BaseTestCase): +class TestLocationWithoutEnv: + def test_ssh(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + assert repr(Location('ssh://user@host:1234/some/path::archive')) == \ + "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')" + assert repr(Location('ssh://user@host:1234/some/path')) == \ + "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive=None)" - def test(self): - self.assert_equal( - repr(Location('ssh://user@host:1234/some/path::archive')), - "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')" - ) - self.assert_equal( - repr(Location('file:///some/path::archive')), - "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')" - ) - self.assert_equal( - repr(Location('user@host:/some/path::archive')), - "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')" - ) - self.assert_equal( - repr(Location('path::archive')), - "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')" - ) - self.assert_equal( - repr(Location('/some/absolute/path::archive')), - "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')" - ) - self.assert_equal( - repr(Location('some/relative/path::archive')), - "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')" - ) - self.assert_raises(ValueError, lambda: Location('ssh://localhost:22/path:archive')) + def test_file(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + assert repr(Location('file:///some/path::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')" + assert repr(Location('file:///some/path')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive=None)" - def test_canonical_path(self): + def test_scp(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + assert repr(Location('user@host:/some/path::archive')) == \ + "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')" + assert repr(Location('user@host:/some/path')) == \ + "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)" + + def test_folder(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + assert repr(Location('path::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')" + assert repr(Location('path')) == \ + "Location(proto='file', user=None, host=None, port=None, path='path', archive=None)" + + def test_abspath(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + assert repr(Location('/some/absolute/path::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')" + assert repr(Location('/some/absolute/path')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive=None)" + + def test_relpath(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + assert repr(Location('some/relative/path::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')" + assert repr(Location('some/relative/path')) == \ + "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive=None)" + + def test_underspecified(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + with pytest.raises(ValueError): + Location('::archive') + with pytest.raises(ValueError): + Location('::') + with pytest.raises(ValueError): + Location() + + def test_no_double_colon(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + with pytest.raises(ValueError): + Location('ssh://localhost:22/path:archive') + + def test_canonical_path(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) locations = ['some/path::archive', 'file://some/path::archive', 'host:some/path::archive', 'host:~user/some/path::archive', 'ssh://host/some/path::archive', 'ssh://user@host:1234/some/path::archive'] for location in locations: - self.assert_equal(Location(location).canonical_path(), - Location(Location(location).canonical_path()).canonical_path()) + assert Location(location).canonical_path() == \ + Location(Location(location).canonical_path()).canonical_path() + + +class TestLocationWithEnv: + def test_ssh(self, monkeypatch): + monkeypatch.setenv('BORG_REPO', 'ssh://user@host:1234/some/path') + assert repr(Location('::archive')) == \ + "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')" + assert repr(Location()) == \ + "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive=None)" + + def test_file(self, monkeypatch): + monkeypatch.setenv('BORG_REPO', 'file:///some/path') + assert repr(Location('::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')" + assert repr(Location()) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive=None)" + + def test_scp(self, monkeypatch): + monkeypatch.setenv('BORG_REPO', 'user@host:/some/path') + assert repr(Location('::archive')) == \ + "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')" + assert repr(Location()) == \ + "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)" + + def test_folder(self, monkeypatch): + monkeypatch.setenv('BORG_REPO', 'path') + assert repr(Location('::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')" + assert repr(Location()) == \ + "Location(proto='file', user=None, host=None, port=None, path='path', archive=None)" + + def test_abspath(self, monkeypatch): + monkeypatch.setenv('BORG_REPO', '/some/absolute/path') + assert repr(Location('::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')" + assert repr(Location()) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive=None)" + + def test_relpath(self, monkeypatch): + monkeypatch.setenv('BORG_REPO', 'some/relative/path') + assert repr(Location('::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')" + assert repr(Location()) == \ + "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive=None)" class FormatTimedeltaTestCase(BaseTestCase): diff --git a/docs/usage.rst b/docs/usage.rst index 8595ca7f8..882ba469b 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -41,9 +41,15 @@ Environment Variables |project_name| uses some environment variables for automation: -Specifying a passphrase: +General: + BORG_REPO + When set, use the value to give the default repository location. If a command needs an archive + parameter, you can abbreviate as `::archive`. If a command needs a repository parameter, you + can either leave it away or abbreviate as `::`, if a positional parameter is required. BORG_PASSPHRASE When set, use the value to answer the passphrase question for encrypted repositories. + TMPDIR + where temporary files are stored (might need a lot of temporary space for some operations) Some "yes" sayers (if set, they automatically confirm that you really want to do X even if there is that warning): BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK @@ -64,10 +70,6 @@ Building: BORG_OPENSSL_PREFIX Adds given OpenSSL header file directory to the default locations (setup.py). -General: - TMPDIR - where temporary files are stored (might need a lot of temporary space for some operations) - Please note: From 817ce18bc6c1f9507e4ce70169d73bf7ae7769e2 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 20:19:28 +0200 Subject: [PATCH 07/17] fix repository arg default --- borg/archiver.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 6a1e40b7e..54389d5e7 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -18,7 +18,7 @@ from .compress import Compressor, COMPR_BUFFER from .repository import Repository from .cache import Cache from .key import key_creator -from .helpers import Error, location_validator, Location, format_time, format_file_size, \ +from .helpers import Error, location_validator, format_time, format_file_size, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ @@ -556,7 +556,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") description=self.do_init.__doc__, epilog=init_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) subparser.set_defaults(func=self.do_init) - subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default=Location(), + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', type=location_validator(archive=False), help='repository to create') subparser.add_argument('-e', '--encryption', dest='encryption', @@ -604,7 +604,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") epilog=check_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) subparser.set_defaults(func=self.do_check) - subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default=Location(), + subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='', type=location_validator(), help='repository or archive to check consistency of') subparser.add_argument('--repository-only', dest='repo_only', action='store_true', @@ -629,7 +629,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") epilog=change_passphrase_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) subparser.set_defaults(func=self.do_change_passphrase) - subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default=Location(), + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', type=location_validator(archive=False)) create_epilog = textwrap.dedent(""" @@ -760,7 +760,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('-s', '--stats', dest='stats', action='store_true', default=False, help='print statistics for the deleted archive') - subparser.add_argument('target', metavar='TARGET', nargs='?', default=Location(), + subparser.add_argument('target', metavar='TARGET', nargs='?', default='', type=location_validator(), help='archive or repository to delete') @@ -775,7 +775,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('--short', dest='short', action='store_true', default=False, help='only print file/directory names, nothing else') - subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default=Location(), + subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='', type=location_validator(), help='repository/archive to list contents of') mount_epilog = textwrap.dedent(""" @@ -859,7 +859,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") help='number of yearly archives to keep') subparser.add_argument('-p', '--prefix', dest='prefix', type=str, help='only consider archive names starting with this prefix') - subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default=Location(), + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', type=location_validator(archive=False), help='repository to prune') From f5069c4e812a874ef9e742a9c3712b7ba7ca15c7 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 21:11:52 +0200 Subject: [PATCH 08/17] fix reaction to "no" answer at delete repo prompt, fixes #182 --- borg/archiver.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 8cce07b8b..0f931cd39 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -296,10 +296,11 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") print("You requested to completely DELETE the repository *including* all archives it contains:") for archive_info in manifest.list_archive_infos(sort_by='ts'): print(format_archive(archive_info)) - while not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'): + if not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'): print("""Type "YES" if you understand this and want to continue.\n""") - if input('Do you want to continue? ') == 'YES': - break + if input('Do you want to continue? ') != 'YES': + self.exit_code = 1 + return self.exit_code repository.destroy() cache.destroy() print("Repository and corresponding cache were deleted.") From e244fe2f69288a01ae4aff2a5cef9ac6564ca21b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 22:06:52 +0200 Subject: [PATCH 09/17] change 2 more chunker vars to off_t so they get 64bit on 32bit platforms. --- borg/_chunker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 23abf1e6c..b817775b0 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -130,7 +130,7 @@ static int chunker_fill(Chunker *c) { ssize_t n; - size_t offset, length; + off_t offset, length; PyObject *data; memmove(c->data, c->data + c->last, c->position + c->remaining - c->last); c->position -= c->last; From 13f20647dcad97f2726fa2fac8ebcad92a9a19df Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 23:26:47 +0200 Subject: [PATCH 10/17] use absolute path, attic issue #200, attic issue #137 the daemonize code changes the cwd, thus a relative repo path can't work. borg mount repo mnt # did not work borg mount --foreground repo mnt # did work borg mount /abs/path/repo mnt # did work --- borg/repository.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/borg/repository.py b/borg/repository.py index 559a87d87..f43161fb6 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -50,14 +50,14 @@ class Repository: """Object with key {} not found in repository {}.""" def __init__(self, path, create=False, exclusive=False): - self.path = path + self.path = os.path.abspath(path) self.io = None self.lock = None self.index = None self._active_txn = False if create: - self.create(path) - self.open(path, exclusive) + self.create(self.path) + self.open(self.path, exclusive) def __del__(self): self.close() From 16e5f241fca9d622ab38205f185906d1db4a871c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 23:51:03 +0200 Subject: [PATCH 11/17] update CHANGES --- CHANGES.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index d4da70e96..eb7b93667 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,27 @@ Borg Changelog ============== +Version 0.26.0 (not released yet) +--------------------------------- + +New features: + +- BORG_REPO env var to specify the default repo, #168 +- read special files as if they were regular files, #79 + +Bug fixes: + +- borg mount repo: use absolute path, attic #200, attic #137 +- chunker: use off_t to get 64bit on 32bit platform, #178 +- initialize chunker fd to -1, so it's not equal to STDIN_FILENO (0) +- fix reaction to "no" answer at delete repo prompt, #182 + +Other changes: + +- detect inconsistency / corruption / hash collision, #170 +- replace versioneer with setuptools_scm, #106 + + Version 0.25.0 -------------- From 1aacdda4a409da803c722234f96fcc3043b72aef Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 8 Sep 2015 03:12:45 +0200 Subject: [PATCH 12/17] implement borg create --dry-run, attic issue #267 also: fix verbose mode display of stdin backup --- borg/archive.py | 1 + borg/archiver.py | 104 ++++++++++++++++++++++--------------- borg/testsuite/archiver.py | 8 +++ 3 files changed, 71 insertions(+), 42 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 18867dbd9..d6eff1ba9 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -455,6 +455,7 @@ class Archive: b'mtime': int_to_bigint(int(time.time()) * 1000000000) } self.add_item(item) + return 'i' # stdin def process_file(self, path, st, cache): status = None diff --git a/borg/archiver.py b/borg/archiver.py index 728b85482..fd6422781 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -102,17 +102,21 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") def do_create(self, args): """Create new archive""" + dry_run = args.dry_run t0 = datetime.now() - repository = self.open_repository(args.archive, exclusive=True) - manifest, key = Manifest.load(repository) - compr_args = dict(buffer=COMPR_BUFFER) - compr_args.update(args.compression) - key.compressor = Compressor(**compr_args) - cache = Cache(repository, key, manifest, do_files=args.cache_files) - archive = Archive(repository, key, manifest, args.archive.archive, cache=cache, - create=True, checkpoint_interval=args.checkpoint_interval, - numeric_owner=args.numeric_owner, progress=args.progress, - chunker_params=args.chunker_params) + if not dry_run: + repository = self.open_repository(args.archive, exclusive=True) + manifest, key = Manifest.load(repository) + compr_args = dict(buffer=COMPR_BUFFER) + compr_args.update(args.compression) + key.compressor = Compressor(**compr_args) + cache = Cache(repository, key, manifest, do_files=args.cache_files) + archive = Archive(repository, key, manifest, args.archive.archive, cache=cache, + create=True, checkpoint_interval=args.checkpoint_interval, + numeric_owner=args.numeric_owner, progress=args.progress, + chunker_params=args.chunker_params) + else: + archive = cache = None # Add cache dir to inode_skip list skip_inodes = set() try: @@ -130,11 +134,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") for path in args.paths: if path == '-': # stdin path = 'stdin' - self.print_verbose(path) - try: - archive.process_stdin(path, cache) - except IOError as e: - self.print_error('%s: %s', path, e) + if not dry_run: + try: + status = archive.process_stdin(path, cache) + except IOError as e: + self.print_error('%s: %s', path, e) + else: + status = '-' + self.print_verbose("%1s %s", status, path) continue path = os.path.normpath(path) if args.dontcross: @@ -146,26 +153,27 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") else: restrict_dev = None self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev, - read_special=args.read_special) - archive.save(timestamp=args.timestamp) - if args.progress: - archive.stats.show_progress(final=True) - if args.stats: - t = datetime.now() - diff = t - t0 - print('-' * 78) - print('Archive name: %s' % args.archive.archive) - print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii')) - print('Start time: %s' % t0.strftime('%c')) - print('End time: %s' % t.strftime('%c')) - print('Duration: %s' % format_timedelta(diff)) - print('Number of files: %d' % archive.stats.nfiles) - archive.stats.print_('This archive:', cache) - print('-' * 78) + read_special=args.read_special, dry_run=dry_run) + if not dry_run: + archive.save(timestamp=args.timestamp) + if args.progress: + archive.stats.show_progress(final=True) + if args.stats: + t = datetime.now() + diff = t - t0 + print('-' * 78) + print('Archive name: %s' % args.archive.archive) + print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii')) + print('Start time: %s' % t0.strftime('%c')) + print('End time: %s' % t.strftime('%c')) + print('Duration: %s' % format_timedelta(diff)) + print('Number of files: %d' % archive.stats.nfiles) + archive.stats.print_('This archive:', cache) + print('-' * 78) return self.exit_code def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev, - read_special=False): + read_special=False, dry_run=False): if exclude_path(path, excludes): return try: @@ -184,14 +192,16 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") return if (stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode)): - try: - status = archive.process_file(path, st, cache) - except IOError as e: - self.print_error('%s: %s', path, e) + if not dry_run: + try: + status = archive.process_file(path, st, cache) + except IOError as e: + self.print_error('%s: %s', path, e) elif stat.S_ISDIR(st.st_mode): if exclude_caches and is_cachedir(path): return - status = archive.process_dir(path, st) + if not dry_run: + status = archive.process_dir(path, st) try: entries = os.listdir(path) except OSError as e: @@ -200,13 +210,17 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") for filename in sorted(entries): entry_path = os.path.normpath(os.path.join(path, filename)) self._process(archive, cache, excludes, exclude_caches, skip_inodes, - entry_path, restrict_dev, read_special=read_special) + entry_path, restrict_dev, read_special=read_special, + dry_run=dry_run) elif stat.S_ISLNK(st.st_mode): - status = archive.process_symlink(path, st) + if not dry_run: + status = archive.process_symlink(path, st) elif stat.S_ISFIFO(st.st_mode): - status = archive.process_fifo(path, st) + if not dry_run: + status = archive.process_fifo(path, st) elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode): - status = archive.process_dev(path, st) + if not dry_run: + status = archive.process_dev(path, st) elif stat.S_ISSOCK(st.st_mode): # Ignore unix sockets return @@ -222,7 +236,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") # Note: A/M/U is relative to the "files" cache, not to the repo. # This would be an issue if the files cache is not used. if status is None: - status = '?' # need to add a status code somewhere + if not dry_run: + status = '?' # need to add a status code somewhere + else: + status = '-' # dry run, item was not backed up # output ALL the stuff - it can be easily filtered using grep. # even stuff considered unchanged might be interesting. self.print_verbose("%1s %s", status, remove_surrogates(path)) @@ -694,6 +711,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('--read-special', dest='read_special', action='store_true', default=False, help='open and read special files as if they were regular files') + subparser.add_argument('-n', '--dry-run', dest='dry_run', + action='store_true', default=False, + help='do not create a backup archive') subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to create') diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index e635d1b0c..95df90a0a 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -485,6 +485,14 @@ class ArchiverTestCase(ArchiverTestCaseBase): mode = os.stat(self.repository_path).st_mode self.assertEqual(stat.S_IMODE(mode), 0o700) + def test_create_dry_run(self): + self.cmd('init', self.repository_location) + self.cmd('create', '--dry-run', self.repository_location + '::test', 'input') + # Make sure no archive has been created + repository = Repository(self.repository_path) + manifest, key = Manifest.load(repository) + self.assert_equal(len(manifest.archives), 0) + def test_cmdline_compatibility(self): self.create_regular_file('file1', size=1024 * 80) self.cmd('init', self.repository_location) From d9fb1d2b03b58bccc1908c185b62346ee2677f79 Mon Sep 17 00:00:00 2001 From: Ed Blackman Date: Tue, 8 Sep 2015 23:33:34 -0400 Subject: [PATCH 13/17] Normalize paths before pattern matching on OS X The OS X file system HFS+ stores path names as Unicode, and converts them to a variant of Unicode NFD for storage. Because path names will always be in this canonical form, it's not friendly to require users to match this form exactly. Convert paths from the repository and patterns from the command line to NFD before comparing them. Unix (and Windows, I think) file systems don't convert path names into a canonical form, so users will continue to have to exactly match the path name they want, because there could be two paths with the same character visually that are actually composed of different byte sequences. --- borg/helpers.py | 43 +++++++++++++++--- borg/testsuite/helpers.py | 96 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 132 insertions(+), 7 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index aa5bead0b..ecf138125 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -7,6 +7,8 @@ import pwd import re import sys import time +import unicodedata + from datetime import datetime, timezone, timedelta from fnmatch import translate from operator import attrgetter @@ -220,6 +222,10 @@ def exclude_path(path, patterns): # unify the two cases, we add a path separator to the end of # the path before matching. +##### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +##### For discussion only, don't merge this code! +##### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + class IncludePattern: """Literal files or directories listed on the command line for some operations (e.g. extract, but not create). @@ -227,10 +233,22 @@ class IncludePattern: path match as well. A trailing slash makes no difference. """ def __init__(self, pattern): - self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep + def match(path): + return (path+os.path.sep).startswith(self.pattern) - def match(self, path): - return (path+os.path.sep).startswith(self.pattern) + # HFS+ converts paths to a canonical form, so users shouldn't be + # required to enter an exact match + if sys.platform in ('darwin',): + # repository paths will be mostly in NFD, as the OSX exception list + # to NFD is small, so normalize to that form for best performance + pattern = unicodedata.normalize("NFD", pattern) + self.match = lambda p: match(unicodedata.normalize("NFD", p)) + # Windows and Unix filesystems allow different forms, so users + # always have to enter an exact match + else: + self.match = match + + self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep def __repr__(self): return '%s(%s)' % (type(self), self.pattern) @@ -241,17 +259,30 @@ class ExcludePattern(IncludePattern): exclude the contents of a directory, but not the directory itself. """ def __init__(self, pattern): + def match(path): + return self.regex.match(path+os.path.sep) is not None + if pattern.endswith(os.path.sep): self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep else: self.pattern = os.path.normpath(pattern)+os.path.sep+'*' + + # HFS+ converts paths to a canonical form, so users shouldn't be + # required to enter an exact match + if sys.platform in ('darwin',): + # repository paths will be mostly in NFD, as the OSX exception list + # to NFD is small, so normalize to that form for best performance + self.pattern = unicodedata.normalize("NFD", self.pattern) + self.match = lambda p: match(unicodedata.normalize("NFD", p)) + # Windows and Unix filesystems allow different forms, so users + # always have to enter an exact match + else: + self.match = match + # fnmatch and re.match both cache compiled regular expressions. # Nevertheless, this is about 10 times faster. self.regex = re.compile(translate(self.pattern)) - def match(self, path): - return self.regex.match(path+os.path.sep) is not None - def __repr__(self): return '%s(%s)' % (type(self), self.pattern) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 95531df83..002033f57 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -3,9 +3,10 @@ from time import mktime, strptime from datetime import datetime, timezone, timedelta import pytest +import sys import msgpack -from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \ +from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, \ prune_within, prune_split, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams from . import BaseTestCase @@ -178,6 +179,99 @@ class PatternTestCase(BaseTestCase): ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']) +@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='all but OS X test') +class IncludePatternNonAsciiTestCase(BaseTestCase): + def testComposedUnicode(self): + pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' + i = IncludePattern(pattern) + + assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert not i.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testDecomposedUnicode(self): + pattern = 'ba\N{COMBINING ACUTE ACCENT}' + i = IncludePattern(pattern) + + assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testInvalidUnicode(self): + pattern = str(b'ba\x80', 'latin1') + i = IncludePattern(pattern) + + assert not i.match("ba/foo") + assert i.match(str(b"ba\x80/foo", 'latin1')) + + +@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='all but OS X test') +class ExcludePatternNonAsciiTestCase(BaseTestCase): + def testComposedUnicode(self): + pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' + e = ExcludePattern(pattern) + + assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert not e.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testDecomposedUnicode(self): + pattern = 'ba\N{COMBINING ACUTE ACCENT}' + e = ExcludePattern(pattern) + + assert not e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testInvalidUnicode(self): + pattern = str(b'ba\x80', 'latin1') + e = ExcludePattern(pattern) + + assert not e.match("ba/foo") + assert e.match(str(b"ba\x80/foo", 'latin1')) + +#@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='OS X only test') +class OSXPatternNormalizationTestCase(BaseTestCase): + # monkey patch sys.platform to allow testing on non-OSX during development + # remove and uncomment OSX-only decorator before push + def setUp(self): + self.oldplatform = sys.platform + sys.platform = 'darwin' + pass + + # monkey patch sys.platform to allow testing on non-OSX during development + # remove and uncomment OSX-only decorator before push + def tearDown(self): + sys.platform = self.oldplatform + pass + + def testComposedUnicode(self): + pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' + i = IncludePattern(pattern) + e = ExcludePattern(pattern) + + assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") + assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testDecomposedUnicode(self): + pattern = 'ba\N{COMBINING ACUTE ACCENT}' + i = IncludePattern(pattern) + e = ExcludePattern(pattern) + + assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") + assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testInvalidUnicode(self): + pattern = str(b'ba\x80', 'latin1') + i = IncludePattern(pattern) + e = ExcludePattern(pattern) + + assert not i.match("ba/foo") + assert i.match(str(b"ba\x80/foo", 'latin1')) + assert not e.match("ba/foo") + assert e.match(str(b"ba\x80/foo", 'latin1')) + + def test_compression_specs(): with pytest.raises(ValueError): CompressionSpec('') From d510ff7c63a4ad64f2c6a84e2af74092366136fa Mon Sep 17 00:00:00 2001 From: Ed Blackman Date: Wed, 9 Sep 2015 13:41:34 -0400 Subject: [PATCH 14/17] Merge non-ascii Include and ExcludePattern tests to parallel the OSX non-ascii tests --- borg/testsuite/helpers.py | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 002033f57..360695ba8 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -180,52 +180,38 @@ class PatternTestCase(BaseTestCase): @pytest.mark.skipif(sys.platform.startswith('darwin'), reason='all but OS X test') -class IncludePatternNonAsciiTestCase(BaseTestCase): +class PatternNonAsciiTestCase(BaseTestCase): def testComposedUnicode(self): pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' i = IncludePattern(pattern) + e = ExcludePattern(pattern) assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") assert not i.match("ba\N{COMBINING ACUTE ACCENT}/foo") - - def testDecomposedUnicode(self): - pattern = 'ba\N{COMBINING ACUTE ACCENT}' - i = IncludePattern(pattern) - - assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") - - def testInvalidUnicode(self): - pattern = str(b'ba\x80', 'latin1') - i = IncludePattern(pattern) - - assert not i.match("ba/foo") - assert i.match(str(b"ba\x80/foo", 'latin1')) - - -@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='all but OS X test') -class ExcludePatternNonAsciiTestCase(BaseTestCase): - def testComposedUnicode(self): - pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' - e = ExcludePattern(pattern) - assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") assert not e.match("ba\N{COMBINING ACUTE ACCENT}/foo") def testDecomposedUnicode(self): pattern = 'ba\N{COMBINING ACUTE ACCENT}' + i = IncludePattern(pattern) e = ExcludePattern(pattern) + assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") assert not e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") def testInvalidUnicode(self): pattern = str(b'ba\x80', 'latin1') + i = IncludePattern(pattern) e = ExcludePattern(pattern) + assert not i.match("ba/foo") + assert i.match(str(b"ba\x80/foo", 'latin1')) assert not e.match("ba/foo") assert e.match(str(b"ba\x80/foo", 'latin1')) + #@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='OS X only test') class OSXPatternNormalizationTestCase(BaseTestCase): # monkey patch sys.platform to allow testing on non-OSX during development From cc13f3db979300ab1ebc982106e1ad8074133bb7 Mon Sep 17 00:00:00 2001 From: Ed Blackman Date: Wed, 9 Sep 2015 13:48:46 -0400 Subject: [PATCH 15/17] Express non-ascii pattern platform skips better including correcting thinko in the commented-out OSX-only test --- borg/testsuite/helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 360695ba8..077c171b2 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -179,7 +179,7 @@ class PatternTestCase(BaseTestCase): ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']) -@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='all but OS X test') +@pytest.mark.skipif(sys.platform in ('darwin',), reason='all but OS X test') class PatternNonAsciiTestCase(BaseTestCase): def testComposedUnicode(self): pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' @@ -212,7 +212,7 @@ class PatternNonAsciiTestCase(BaseTestCase): assert e.match(str(b"ba\x80/foo", 'latin1')) -#@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='OS X only test') +#@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test') class OSXPatternNormalizationTestCase(BaseTestCase): # monkey patch sys.platform to allow testing on non-OSX during development # remove and uncomment OSX-only decorator before push From 13ddfdf4a3b64b109dde3a7ba5333a32e14be758 Mon Sep 17 00:00:00 2001 From: Ed Blackman Date: Wed, 9 Sep 2015 15:00:58 -0400 Subject: [PATCH 16/17] Move pattern normalization decision into decorator Using a decorator moves the duplicate code in the init methods into a single decorator method, while still retaining the same runtime overhead (zero for for the non-OSX path, one extra function call plus the call to unicodedata.normalize for OSX). The pattern classes are much visually cleaner, and duplicate code limited to two lines normalizing the pattern on OSX. Because the decoration happens at class init time (vs instance init time for the previous approach), the OSX and non-OSX test cases can no longer be called in the same run, so I also removed the OSX test case monkey patching and uncommented the platform skipif decorator. --- borg/helpers.py | 52 +++++++++++++++++++-------------------- borg/testsuite/helpers.py | 15 +---------- 2 files changed, 26 insertions(+), 41 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index ecf138125..0da9918f8 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -1,6 +1,7 @@ import argparse import binascii from collections import namedtuple +from functools import wraps import grp import os import pwd @@ -222,9 +223,22 @@ def exclude_path(path, patterns): # unify the two cases, we add a path separator to the end of # the path before matching. -##### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -##### For discussion only, don't merge this code! -##### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +def normalized(func): + """ Decorator for the Pattern match methods, returning a wrapper that + normalizes OSX paths to match the normalized pattern on OSX, and + returning the original method on other platforms""" + @wraps(func) + def normalize_wrapper(self, path): + return func(self, unicodedata.normalize("NFD", path)) + + if sys.platform in ('darwin',): + # HFS+ converts paths to a canonical form, so users shouldn't be + # required to enter an exact match + return normalize_wrapper + else: + # Windows and Unix filesystems allow different forms, so users + # always have to enter an exact match + return func class IncludePattern: """Literal files or directories listed on the command line @@ -233,23 +247,15 @@ class IncludePattern: path match as well. A trailing slash makes no difference. """ def __init__(self, pattern): - def match(path): - return (path+os.path.sep).startswith(self.pattern) - - # HFS+ converts paths to a canonical form, so users shouldn't be - # required to enter an exact match if sys.platform in ('darwin',): - # repository paths will be mostly in NFD, as the OSX exception list - # to NFD is small, so normalize to that form for best performance pattern = unicodedata.normalize("NFD", pattern) - self.match = lambda p: match(unicodedata.normalize("NFD", p)) - # Windows and Unix filesystems allow different forms, so users - # always have to enter an exact match - else: - self.match = match self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep + @normalized + def match(self, path): + return (path+os.path.sep).startswith(self.pattern) + def __repr__(self): return '%s(%s)' % (type(self), self.pattern) @@ -259,30 +265,22 @@ class ExcludePattern(IncludePattern): exclude the contents of a directory, but not the directory itself. """ def __init__(self, pattern): - def match(path): - return self.regex.match(path+os.path.sep) is not None - if pattern.endswith(os.path.sep): self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep else: self.pattern = os.path.normpath(pattern)+os.path.sep+'*' - # HFS+ converts paths to a canonical form, so users shouldn't be - # required to enter an exact match if sys.platform in ('darwin',): - # repository paths will be mostly in NFD, as the OSX exception list - # to NFD is small, so normalize to that form for best performance self.pattern = unicodedata.normalize("NFD", self.pattern) - self.match = lambda p: match(unicodedata.normalize("NFD", p)) - # Windows and Unix filesystems allow different forms, so users - # always have to enter an exact match - else: - self.match = match # fnmatch and re.match both cache compiled regular expressions. # Nevertheless, this is about 10 times faster. self.regex = re.compile(translate(self.pattern)) + @normalized + def match(self, path): + return self.regex.match(path+os.path.sep) is not None + def __repr__(self): return '%s(%s)' % (type(self), self.pattern) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 077c171b2..f755df22a 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -212,21 +212,8 @@ class PatternNonAsciiTestCase(BaseTestCase): assert e.match(str(b"ba\x80/foo", 'latin1')) -#@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test') +@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test') class OSXPatternNormalizationTestCase(BaseTestCase): - # monkey patch sys.platform to allow testing on non-OSX during development - # remove and uncomment OSX-only decorator before push - def setUp(self): - self.oldplatform = sys.platform - sys.platform = 'darwin' - pass - - # monkey patch sys.platform to allow testing on non-OSX during development - # remove and uncomment OSX-only decorator before push - def tearDown(self): - sys.platform = self.oldplatform - pass - def testComposedUnicode(self): pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' i = IncludePattern(pattern) From 1eecb020e88b635adbc7c2213430eed91b49bc5f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 10 Sep 2015 23:12:12 +0200 Subject: [PATCH 17/17] cython code: add some int types to get rid of unspecific python add / subtract operations they somehow pull in some floating point error code that led to a undefined symbol FPE_... when using the borgbackup wheel on some non-ubuntu/debian linux platform. --- borg/chunker.pyx | 2 +- borg/crypto.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/chunker.pyx b/borg/chunker.pyx index 1d4897db1..0faa06f38 100644 --- a/borg/chunker.pyx +++ b/borg/chunker.pyx @@ -20,7 +20,7 @@ cdef extern from "_chunker.c": cdef class Chunker: cdef _Chunker *chunker - def __cinit__(self, seed, chunk_min_exp, chunk_max_exp, hash_mask_bits, hash_window_size): + def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size): min_size = 1 << chunk_min_exp max_size = 1 << chunk_max_exp hash_mask = (1 << hash_mask_bits) - 1 diff --git a/borg/crypto.pyx b/borg/crypto.pyx index 61dbc42d5..d8143bdbc 100644 --- a/borg/crypto.pyx +++ b/borg/crypto.pyx @@ -52,7 +52,7 @@ bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0] long_to_bytes = lambda x: _long.pack(x) -def num_aes_blocks(length): +def num_aes_blocks(int length): """Return the number of AES blocks required to encrypt/decrypt *length* bytes of data. Note: this is only correct for modes without padding, like AES-CTR. """