From 0fa663febe59bc3a8857300bdea6beb139a737df Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 6 May 2016 14:55:27 +0200 Subject: [PATCH 01/38] seek /opt/pkg (pkgsrc OS X) for openssl and lz4 Add /opt/pkg (used by pkgsrc on Mac OS X) to possible openssl and lz4 header path. from 2015Q2 onwards the prefix has changed to /opt/pkg in order to be compatible with El Capitan's "System Integrity Protection" feature. -- pkgsrc.joynet.com thanks to @weakish for finding this! --- setup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 7223a0eb5..741070cc5 100644 --- a/setup.py +++ b/setup.py @@ -106,7 +106,8 @@ def detect_lz4(prefixes): include_dirs = [] library_dirs = [] -possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl', '/usr/local/borg', '/opt/local'] +possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl', + '/usr/local/borg', '/opt/local', '/opt/pkg', ] if os.environ.get('BORG_OPENSSL_PREFIX'): possible_openssl_prefixes.insert(0, os.environ.get('BORG_OPENSSL_PREFIX')) ssl_prefix = detect_openssl(possible_openssl_prefixes) @@ -116,7 +117,8 @@ include_dirs.append(os.path.join(ssl_prefix, 'include')) library_dirs.append(os.path.join(ssl_prefix, 'lib')) -possible_lz4_prefixes = ['/usr', '/usr/local', '/usr/local/opt/lz4', '/usr/local/lz4', '/usr/local/borg', '/opt/local'] +possible_lz4_prefixes = ['/usr', '/usr/local', '/usr/local/opt/lz4', '/usr/local/lz4', + '/usr/local/borg', '/opt/local', '/opt/pkg', ] if os.environ.get('BORG_LZ4_PREFIX'): possible_lz4_prefixes.insert(0, os.environ.get('BORG_LZ4_PREFIX')) lz4_prefix = detect_lz4(possible_lz4_prefixes) From ee8f7539463803604e5d4121739bc2e757d48a15 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 May 2016 17:02:09 +0200 Subject: [PATCH 02/38] load_excludes: reuse existing clean_lines function --- borg/helpers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index 999da93c5..7d8905942 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -282,8 +282,7 @@ def load_excludes(fh): """Load and parse exclude patterns from file object. Lines empty or starting with '#' after stripping whitespace on both line ends are ignored. """ - patterns = (line for line in (i.strip() for i in fh) if not line.startswith('#')) - return [parse_pattern(pattern) for pattern in patterns if pattern] + return [parse_pattern(pattern) for pattern in clean_lines(fh)] def update_excludes(args): From 61a73f5f917d991ed7fa3ec4937527bb2a891d07 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 May 2016 18:18:40 +0200 Subject: [PATCH 03/38] _chunker.c: remove tab characters always use 4 spaces for indenting. --- borg/_chunker.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 0f9494e79..7f772ca4b 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -184,9 +184,9 @@ chunker_fill(Chunker *c) length = c->bytes_read - offset; #if ( ( _XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L ) && defined(POSIX_FADV_DONTNEED) ) - // Only do it once per run. - if (pagemask == 0) - pagemask = getpagesize() - 1; + // Only do it once per run. + if (pagemask == 0) + pagemask = getpagesize() - 1; // We tell the OS that we do not need the data that we just have read any // more (that it maybe has in the cache). This avoids that we spoil the @@ -207,7 +207,7 @@ chunker_fill(Chunker *c) // fadvise. This will cancel the final page and is not part // of the above workaround. overshoot = 0; - } + } posix_fadvise(c->fh, offset & ~pagemask, length - overshoot, POSIX_FADV_DONTNEED); #endif From d2988444c0682cfc5e05fd134ee247c42819402b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 May 2016 18:53:58 +0200 Subject: [PATCH 04/38] better help / docs for borg extract --dry-run, fixes #1022 --- borg/archiver.py | 4 ++++ docs/usage.rst | 3 +++ 2 files changed, 7 insertions(+) diff --git a/borg/archiver.py b/borg/archiver.py index 42c6e4212..42cb0ba08 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -1401,6 +1401,10 @@ class Archiver: be restricted by using the ``--exclude`` option. See the output of the "borg help patterns" command for more help on exclude patterns. + + By using ``--dry-run``, you can do all extraction steps except actually writing the + output data: reading metadata and data chunks from the repo, checking the hash/hmac, + decrypting, decompressing. """) subparser = subparsers.add_parser('extract', parents=[common_parser], add_help=False, description=self.do_extract.__doc__, diff --git a/docs/usage.rst b/docs/usage.rst index f4b0b52ae..600af4fea 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -327,6 +327,9 @@ Examples # Extract entire archive and list files while processing $ borg extract -v --list /path/to/repo::my-files + # Verify whether an archive could be successfully extracted, but do not write files to disk + $ borg extract --dry-run /path/to/repo::my-files + # Extract the "src" directory $ borg extract /path/to/repo::my-files home/USERNAME/src From 06caee2ed0f4816c22feea28d8e422bfd7fbec7b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 May 2016 19:22:48 +0200 Subject: [PATCH 05/38] improve backup example script, fixes #1020 --- docs/quickstart.rst | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 1d15f5d23..c8456b85d 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -105,23 +105,27 @@ server. The script also uses the :ref:`borg_prune` subcommand to maintain a certain number of old archives:: #!/bin/sh - REPOSITORY=username@remoteserver.com:backup - # Backup all of /home and /var/www except a few - # excluded directories - borg create -v --stats \ - $REPOSITORY::`hostname`-`date +%Y-%m-%d` \ - /home \ - /var/www \ - --exclude '/home/*/.cache' \ - --exclude /home/Ben/Music/Justin\ Bieber \ + # setting this, so the repo does not need to be given on the commandline: + export BORG_REPO=username@remoteserver.com:backup + + # setting this, so you won't be asked for your passphrase - make sure the + # script has appropriate owner/group and mode, e.g. root.root 600: + export BORG_PASSPHRASE=mysecret + + # Backup most important stuff: + borg create -v --stats -C lz4 ::`hostname`-`date +%Y-%m-%d` \ + /etc \ + /home \ + /var \ + --exclude '/home/*/.cache' \ --exclude '*.pyc' # Use the `prune` subcommand to maintain 7 daily, 4 weekly and 6 monthly - # archives of THIS machine. --prefix `hostname`- is very important to + # archives of THIS machine. Using --prefix is very important to # limit prune's operation to this machine's archives and not apply to # other machine's archives also. - borg prune -v $REPOSITORY --prefix `hostname`- \ + borg prune -v --prefix `hostname`- \ --keep-daily=7 --keep-weekly=4 --keep-monthly=6 .. backup_compression: From a44e131661525c9f17eab7db046129073defb61c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 7 May 2016 21:03:31 +0200 Subject: [PATCH 06/38] prune: fix IndexError if giving --prefix resulted in an empty archives_checkpoints list, it crashed with an IndexError. --- borg/archiver.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 42c6e4212..448187e04 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -789,9 +789,8 @@ class Archiver: is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search checkpoints = [arch for arch in archives_checkpoints if is_checkpoint(arch.name)] # keep the latest checkpoint, if there is no later non-checkpoint archive - latest_checkpoint = checkpoints[0] if checkpoints else None - if archives_checkpoints[0] is latest_checkpoint: - keep_checkpoints = [latest_checkpoint, ] + if archives_checkpoints and checkpoints and archives_checkpoints[0] is checkpoints[0]: + keep_checkpoints = checkpoints[:1] else: keep_checkpoints = [] checkpoints = set(checkpoints) From 595e7c1dcd0f5a399c83a428554338a1342febdd Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 9 May 2016 04:14:50 +0200 Subject: [PATCH 07/38] remove openssl RAND_bytes from crypto.pyx was forgotten there and not used any more since a while. also removed the hint about PyCrypto being an alternative to crypto.pyx, it was not updated since 2014, looks pretty stale. --- borg/crypto.pyx | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/borg/crypto.pyx b/borg/crypto.pyx index 16f1cda82..dd0fa14ac 100644 --- a/borg/crypto.pyx +++ b/borg/crypto.pyx @@ -1,17 +1,11 @@ -"""A thin OpenSSL wrapper +"""A thin OpenSSL wrapper""" -This could be replaced by PyCrypto maybe? -""" from libc.stdlib cimport malloc, free from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release API_VERSION = 3 -cdef extern from "openssl/rand.h": - int RAND_bytes(unsigned char *buf, int num) - - cdef extern from "openssl/evp.h": ctypedef struct EVP_MD: pass From 037d54e4421867ff8242f11984f9c29fd300f5df Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Fri, 18 Mar 2016 02:58:29 +0100 Subject: [PATCH 08/38] ArchiverTestCase, test_symlink_extract --- borg/testsuite/archiver.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 1b89515c5..ddd0c5f66 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -353,6 +353,14 @@ class ArchiverTestCase(ArchiverTestCaseBase): # the interesting parts of info_output2 and info_output should be same self.assert_equal(filter(info_output), filter(info_output2)) + def test_symlink_extract(self): + self.create_test_files() + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + with changedir('output'): + self.cmd('extract', self.repository_location + '::test') + assert os.readlink('input/link1') == 'somewhere' + def test_atime(self): self.create_test_files() atime, mtime = 123456780, 234567890 From dece06fba998a548424a22ed357a4b9933150345 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sat, 16 Apr 2016 00:20:55 +0200 Subject: [PATCH 09/38] KeyTestCase, test_decrypt_integrity --- borg/testsuite/__init__.py | 2 ++ borg/testsuite/key.py | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/borg/testsuite/__init__.py b/borg/testsuite/__init__.py index cccf97a82..978c1ab42 100644 --- a/borg/testsuite/__init__.py +++ b/borg/testsuite/__init__.py @@ -45,6 +45,8 @@ class BaseTestCase(unittest.TestCase): assert_not_in = unittest.TestCase.assertNotIn assert_equal = unittest.TestCase.assertEqual assert_not_equal = unittest.TestCase.assertNotEqual + assert_raises = unittest.TestCase.assertRaises + assert_raises_regex = unittest.TestCase.assertRaisesRegex assert_true = unittest.TestCase.assertTrue if raises: diff --git a/borg/testsuite/key.py b/borg/testsuite/key.py index 11eb35061..4f9ec7b4c 100644 --- a/borg/testsuite/key.py +++ b/borg/testsuite/key.py @@ -6,8 +6,8 @@ from binascii import hexlify, unhexlify from ..crypto import bytes_to_long, num_aes_blocks from ..key import PlaintextKey, PassphraseKey, KeyfileKey -from ..helpers import Location, Chunk, bin_to_hex -from . import BaseTestCase, environment_variable +from ..helpers import Location, IntegrityError, Chunk, bin_to_hex +from . import environment_variable class KeyTestCase(BaseTestCase): @@ -126,3 +126,32 @@ class KeyTestCase(BaseTestCase): chunk = Chunk(b'foo') self.assert_equal(hexlify(key.id_hash(chunk.data)), b'818217cf07d37efad3860766dcdf1d21e401650fed2d76ed1d797d3aae925990') self.assert_equal(chunk, key2.decrypt(key2.id_hash(chunk.data), key.encrypt(chunk))) + + def test_decrypt_integrity(self): + with open(os.path.join(os.environ['BORG_KEYS_DIR'], 'keyfile'), 'w') as fd: + fd.write(self.keyfile2_key_file) + os.environ['BORG_PASSPHRASE'] = 'passphrase' + key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata) + with self.assert_raises_regex(IntegrityError, 'Invalid encryption envelope'): + data = bytearray(self.keyfile2_cdata) + data[0] += 5 # wrong TYPE + key.decrypt("", data) + with self.assert_raises_regex(IntegrityError, 'Encryption envelope checksum mismatch'): + data = bytearray(self.keyfile2_cdata) + data[5] += 5 # corrupt HMAC + key.decrypt("", data) + with self.assert_raises_regex(IntegrityError, 'Encryption envelope checksum mismatch'): + data = bytearray(self.keyfile2_cdata) + id = key.id_hash(data) + data[36] += 123 # this in the IV/CTR + key.decrypt(id, data) + with self.assert_raises_regex(IntegrityError, 'Encryption envelope checksum mismatch'): + data = bytearray(self.keyfile2_cdata) + id = key.id_hash(data) + data[50] += 1 # corrupt data + key.decrypt(id, data) + with self.assert_raises_regex(IntegrityError, 'Chunk id verification failed'): + data = bytearray(self.keyfile2_cdata) + id = bytearray(key.id_hash(data)) # corrupt chunk id + id[12] = 0 + key.decrypt(id, data) From e5356b7f66f074026f195b70b325bc6c4dac97c9 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sat, 16 Apr 2016 18:45:53 +0200 Subject: [PATCH 10/38] testsuite/key: port to pytest, test_decrypt_integrity w/ 'carpet bombing' --- borg/testsuite/key.py | 153 +++++++++++++++++++++--------------------- 1 file changed, 78 insertions(+), 75 deletions(-) diff --git a/borg/testsuite/key.py b/borg/testsuite/key.py index 4f9ec7b4c..8c57620a6 100644 --- a/borg/testsuite/key.py +++ b/borg/testsuite/key.py @@ -4,14 +4,20 @@ import shutil import tempfile from binascii import hexlify, unhexlify +import pytest from ..crypto import bytes_to_long, num_aes_blocks from ..key import PlaintextKey, PassphraseKey, KeyfileKey from ..helpers import Location, IntegrityError, Chunk, bin_to_hex from . import environment_variable -class KeyTestCase(BaseTestCase): +@pytest.fixture(autouse=True) +def clean_env(monkeypatch): + # Workaround for some tests (testsuite/archiver) polluting the environment + monkeypatch.delenv('BORG_PASSPHRASE', False) + +class TestKey: class MockArgs: location = Location(tempfile.mkstemp()[1]) @@ -31,14 +37,15 @@ class KeyTestCase(BaseTestCase): """)) keyfile2_id = unhexlify('c3fbf14bc001ebcc3cd86e696c13482ed071740927cd7cbe1b01b4bfcee49314') - def setUp(self): - self.tmppath = tempfile.mkdtemp() - os.environ['BORG_KEYS_DIR'] = self.tmppath - self.tmppath2 = tempfile.mkdtemp() + @pytest.fixture(autouse=True) + def keys_dir(self, request, monkeypatch): + tmp_keys_dir = tempfile.mkdtemp() - def tearDown(self): - shutil.rmtree(self.tmppath) - shutil.rmtree(self.tmppath2) + def finalize(): + shutil.rmtree(tmp_keys_dir) + + request.addfinalizer(finalize) + monkeypatch.setenv('BORG_KEYS_DIR', tmp_keys_dir) class MockRepository: class _Location: @@ -51,106 +58,102 @@ class KeyTestCase(BaseTestCase): def test_plaintext(self): key = PlaintextKey.create(None, None) chunk = Chunk(b'foo') - self.assert_equal(hexlify(key.id_hash(chunk.data)), b'2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae') - self.assert_equal(chunk, key.decrypt(key.id_hash(chunk.data), key.encrypt(chunk))) + assert hexlify(key.id_hash(chunk.data)) == b'2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae' + assert chunk == key.decrypt(key.id_hash(chunk.data), key.encrypt(chunk)) - def test_keyfile(self): - os.environ['BORG_PASSPHRASE'] = 'test' + def test_keyfile(self, monkeypatch): + monkeypatch.setenv('BORG_PASSPHRASE', 'test') key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) - self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0) + assert bytes_to_long(key.enc_cipher.iv, 8) == 0 manifest = key.encrypt(Chunk(b'XXX')) - self.assert_equal(key.extract_nonce(manifest), 0) + assert key.extract_nonce(manifest) == 0 manifest2 = key.encrypt(Chunk(b'XXX')) - self.assert_not_equal(manifest, manifest2) - self.assert_equal(key.decrypt(None, manifest), key.decrypt(None, manifest2)) - self.assert_equal(key.extract_nonce(manifest2), 1) + assert manifest != manifest2 + assert key.decrypt(None, manifest) == key.decrypt(None, manifest2) + assert key.extract_nonce(manifest2) == 1 iv = key.extract_nonce(manifest) key2 = KeyfileKey.detect(self.MockRepository(), manifest) - self.assert_equal(bytes_to_long(key2.enc_cipher.iv, 8), iv + num_aes_blocks(len(manifest) - KeyfileKey.PAYLOAD_OVERHEAD)) + assert bytes_to_long(key2.enc_cipher.iv, 8) == iv + num_aes_blocks(len(manifest) - KeyfileKey.PAYLOAD_OVERHEAD) # Key data sanity check - self.assert_equal(len(set([key2.id_key, key2.enc_key, key2.enc_hmac_key])), 3) - self.assert_equal(key2.chunk_seed == 0, False) + assert len({key2.id_key, key2.enc_key, key2.enc_hmac_key}) == 3 + assert key2.chunk_seed != 0 + data = b'foo' chunk = Chunk(b'foo') - self.assert_equal(chunk, key2.decrypt(key.id_hash(chunk.data), key.encrypt(chunk))) + assert chunk == key2.decrypt(key.id_hash(chunk.data), key.encrypt(chunk)) - def test_keyfile_kfenv(self): - keyfile = os.path.join(self.tmppath2, 'keyfile') - with environment_variable(BORG_KEY_FILE=keyfile, BORG_PASSPHRASE='testkf'): - assert not os.path.exists(keyfile) + def test_keyfile_kfenv(self, tmpdir): + keyfile = tmpdir.join('keyfile') + with environment_variable(BORG_KEY_FILE=str(keyfile), BORG_PASSPHRASE='testkf'): + assert not keyfile.exists() key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) - assert os.path.exists(keyfile) + assert keyfile.exists() chunk = Chunk(b'XXX') chunk_id = key.id_hash(chunk.data) chunk_cdata = key.encrypt(chunk) key = KeyfileKey.detect(self.MockRepository(), chunk_cdata) - self.assert_equal(chunk, key.decrypt(chunk_id, chunk_cdata)) - os.unlink(keyfile) - self.assert_raises(FileNotFoundError, KeyfileKey.detect, self.MockRepository(), chunk_cdata) + assert chunk == key.decrypt(chunk_id, chunk_cdata) + keyfile.unlink() + with pytest.raises(FileNotFoundError): + KeyfileKey.detect(self.MockRepository(), chunk_cdata) - def test_keyfile2(self): + def test_keyfile2(self, monkeypatch): with open(os.path.join(os.environ['BORG_KEYS_DIR'], 'keyfile'), 'w') as fd: fd.write(self.keyfile2_key_file) - os.environ['BORG_PASSPHRASE'] = 'passphrase' + monkeypatch.setenv('BORG_PASSPHRASE', 'passphrase') key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata) - self.assert_equal(key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data, b'payload') + assert key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data == b'payload' - def test_keyfile2_kfenv(self): - keyfile = os.path.join(self.tmppath2, 'keyfile') - with open(keyfile, 'w') as fd: + def test_keyfile2_kfenv(self, tmpdir): + keyfile = tmpdir.join('keyfile') + with keyfile.open('w') as fd: fd.write(self.keyfile2_key_file) with environment_variable(BORG_KEY_FILE=keyfile, BORG_PASSPHRASE='passphrase'): key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata) - self.assert_equal(key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data, b'payload') + assert key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data == b'payload' - def test_passphrase(self): - os.environ['BORG_PASSPHRASE'] = 'test' + def test_passphrase(self, keys_dir, monkeypatch): + monkeypatch.setenv('BORG_PASSPHRASE', 'test') key = PassphraseKey.create(self.MockRepository(), None) - self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0) - self.assert_equal(hexlify(key.id_key), b'793b0717f9d8fb01c751a487e9b827897ceea62409870600013fbc6b4d8d7ca6') - self.assert_equal(hexlify(key.enc_hmac_key), b'b885a05d329a086627412a6142aaeb9f6c54ab7950f996dd65587251f6bc0901') - self.assert_equal(hexlify(key.enc_key), b'2ff3654c6daf7381dbbe718d2b20b4f1ea1e34caa6cc65f6bb3ac376b93fed2a') - self.assert_equal(key.chunk_seed, -775740477) + assert bytes_to_long(key.enc_cipher.iv, 8) == 0 + assert hexlify(key.id_key) == b'793b0717f9d8fb01c751a487e9b827897ceea62409870600013fbc6b4d8d7ca6' + assert hexlify(key.enc_hmac_key) == b'b885a05d329a086627412a6142aaeb9f6c54ab7950f996dd65587251f6bc0901' + assert hexlify(key.enc_key) == b'2ff3654c6daf7381dbbe718d2b20b4f1ea1e34caa6cc65f6bb3ac376b93fed2a' + assert key.chunk_seed == -775740477 manifest = key.encrypt(Chunk(b'XXX')) - self.assert_equal(key.extract_nonce(manifest), 0) + assert key.extract_nonce(manifest) == 0 manifest2 = key.encrypt(Chunk(b'XXX')) - self.assert_not_equal(manifest, manifest2) - self.assert_equal(key.decrypt(None, manifest), key.decrypt(None, manifest2)) - self.assert_equal(key.extract_nonce(manifest2), 1) + assert manifest != manifest2 + assert key.decrypt(None, manifest) == key.decrypt(None, manifest2) + assert key.extract_nonce(manifest2) == 1 iv = key.extract_nonce(manifest) key2 = PassphraseKey.detect(self.MockRepository(), manifest) - self.assert_equal(bytes_to_long(key2.enc_cipher.iv, 8), iv + num_aes_blocks(len(manifest) - PassphraseKey.PAYLOAD_OVERHEAD)) - self.assert_equal(key.id_key, key2.id_key) - self.assert_equal(key.enc_hmac_key, key2.enc_hmac_key) - self.assert_equal(key.enc_key, key2.enc_key) - self.assert_equal(key.chunk_seed, key2.chunk_seed) + assert bytes_to_long(key2.enc_cipher.iv, 8) == iv + num_aes_blocks(len(manifest) - PassphraseKey.PAYLOAD_OVERHEAD) + assert key.id_key == key2.id_key + assert key.enc_hmac_key == key2.enc_hmac_key + assert key.enc_key == key2.enc_key + assert key.chunk_seed == key2.chunk_seed + data = b'foo' chunk = Chunk(b'foo') - self.assert_equal(hexlify(key.id_hash(chunk.data)), b'818217cf07d37efad3860766dcdf1d21e401650fed2d76ed1d797d3aae925990') - self.assert_equal(chunk, key2.decrypt(key2.id_hash(chunk.data), key.encrypt(chunk))) + assert hexlify(key.id_hash(chunk.data)) == b'818217cf07d37efad3860766dcdf1d21e401650fed2d76ed1d797d3aae925990' + assert chunk == key2.decrypt(key2.id_hash(chunk.data), key.encrypt(chunk)) - def test_decrypt_integrity(self): + def _corrupt_byte(self, key, data, offset): + data = bytearray(data) + data[offset] += 1 + with pytest.raises(IntegrityError): + key.decrypt("", data) + + def test_decrypt_integrity(self, monkeypatch): with open(os.path.join(os.environ['BORG_KEYS_DIR'], 'keyfile'), 'w') as fd: fd.write(self.keyfile2_key_file) - os.environ['BORG_PASSPHRASE'] = 'passphrase' + monkeypatch.setenv('BORG_PASSPHRASE', 'passphrase') key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata) - with self.assert_raises_regex(IntegrityError, 'Invalid encryption envelope'): - data = bytearray(self.keyfile2_cdata) - data[0] += 5 # wrong TYPE - key.decrypt("", data) - with self.assert_raises_regex(IntegrityError, 'Encryption envelope checksum mismatch'): - data = bytearray(self.keyfile2_cdata) - data[5] += 5 # corrupt HMAC - key.decrypt("", data) - with self.assert_raises_regex(IntegrityError, 'Encryption envelope checksum mismatch'): - data = bytearray(self.keyfile2_cdata) - id = key.id_hash(data) - data[36] += 123 # this in the IV/CTR - key.decrypt(id, data) - with self.assert_raises_regex(IntegrityError, 'Encryption envelope checksum mismatch'): - data = bytearray(self.keyfile2_cdata) - id = key.id_hash(data) - data[50] += 1 # corrupt data - key.decrypt(id, data) - with self.assert_raises_regex(IntegrityError, 'Chunk id verification failed'): + + data = self.keyfile2_cdata + for i in range(len(data)): + self._corrupt_byte(key, data, i) + + with pytest.raises(IntegrityError): data = bytearray(self.keyfile2_cdata) id = bytearray(key.id_hash(data)) # corrupt chunk id id[12] = 0 From c6f7866e3530bdffb46c221676c6af95a8b6ef96 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 8 May 2016 23:13:39 +0200 Subject: [PATCH 11/38] testsuite/key: add TestPassphrase --- borg/testsuite/key.py | 53 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/borg/testsuite/key.py b/borg/testsuite/key.py index 8c57620a6..65ad9a741 100644 --- a/borg/testsuite/key.py +++ b/borg/testsuite/key.py @@ -1,3 +1,4 @@ +import getpass import os import re import shutil @@ -5,9 +6,10 @@ import tempfile from binascii import hexlify, unhexlify import pytest + from ..crypto import bytes_to_long, num_aes_blocks -from ..key import PlaintextKey, PassphraseKey, KeyfileKey -from ..helpers import Location, IntegrityError, Chunk, bin_to_hex +from ..key import PlaintextKey, PassphraseKey, KeyfileKey, Passphrase, PasswordRetriesExceeded, bin_to_hex +from ..helpers import Location, Chunk, IntegrityError from . import environment_variable @@ -77,7 +79,6 @@ class TestKey: # Key data sanity check assert len({key2.id_key, key2.enc_key, key2.enc_hmac_key}) == 3 assert key2.chunk_seed != 0 - data = b'foo' chunk = Chunk(b'foo') assert chunk == key2.decrypt(key.id_hash(chunk.data), key.encrypt(chunk)) @@ -92,7 +93,7 @@ class TestKey: chunk_cdata = key.encrypt(chunk) key = KeyfileKey.detect(self.MockRepository(), chunk_cdata) assert chunk == key.decrypt(chunk_id, chunk_cdata) - keyfile.unlink() + keyfile.remove() with pytest.raises(FileNotFoundError): KeyfileKey.detect(self.MockRepository(), chunk_cdata) @@ -107,7 +108,7 @@ class TestKey: keyfile = tmpdir.join('keyfile') with keyfile.open('w') as fd: fd.write(self.keyfile2_key_file) - with environment_variable(BORG_KEY_FILE=keyfile, BORG_PASSPHRASE='passphrase'): + with environment_variable(BORG_KEY_FILE=str(keyfile), BORG_PASSPHRASE='passphrase'): key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata) assert key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data == b'payload' @@ -132,7 +133,6 @@ class TestKey: assert key.enc_hmac_key == key2.enc_hmac_key assert key.enc_key == key2.enc_key assert key.chunk_seed == key2.chunk_seed - data = b'foo' chunk = Chunk(b'foo') assert hexlify(key.id_hash(chunk.data)) == b'818217cf07d37efad3860766dcdf1d21e401650fed2d76ed1d797d3aae925990' assert chunk == key2.decrypt(key2.id_hash(chunk.data), key.encrypt(chunk)) @@ -158,3 +158,44 @@ class TestKey: id = bytearray(key.id_hash(data)) # corrupt chunk id id[12] = 0 key.decrypt(id, data) + + +class TestPassphrase: + def test_passphrase_new_verification(self, capsys, monkeypatch): + monkeypatch.setattr(getpass, 'getpass', lambda prompt: "12aöäü") + monkeypatch.setenv('BORG_DISPLAY_PASSPHRASE', 'no') + Passphrase.new() + out, err = capsys.readouterr() + assert "12" not in out + assert "12" not in err + + monkeypatch.setenv('BORG_DISPLAY_PASSPHRASE', 'yes') + passphrase = Passphrase.new() + out, err = capsys.readouterr() + assert "313261c3b6c3a4c3bc" not in out + assert "313261c3b6c3a4c3bc" in err + assert passphrase == "12aöäü" + + monkeypatch.setattr(getpass, 'getpass', lambda prompt: "1234/@=") + Passphrase.new() + out, err = capsys.readouterr() + assert "1234/@=" not in out + assert "1234/@=" in err + + def test_passphrase_new_empty(self, capsys, monkeypatch): + monkeypatch.delenv('BORG_PASSPHRASE', False) + monkeypatch.setattr(getpass, 'getpass', lambda prompt: "") + with pytest.raises(PasswordRetriesExceeded): + Passphrase.new(allow_empty=False) + out, err = capsys.readouterr() + assert "must not be blank" in err + + def test_passphrase_new_retries(self, monkeypatch): + monkeypatch.delenv('BORG_PASSPHRASE', False) + ascending_numbers = iter(range(20)) + monkeypatch.setattr(getpass, 'getpass', lambda prompt: str(next(ascending_numbers))) + with pytest.raises(PasswordRetriesExceeded): + Passphrase.new() + + def test_passphrase_repr(self): + assert "secret" not in repr(Passphrase("secret")) From 6a15e8d13886f6ab8d25d751338cbdfd866a9618 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Mon, 9 May 2016 12:02:16 +0200 Subject: [PATCH 12/38] testsuite/key: environment_variable -> monkeypatch.setenv --- borg/testsuite/__init__.py | 2 -- borg/testsuite/key.py | 37 +++++++++++++++++++------------------ 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/borg/testsuite/__init__.py b/borg/testsuite/__init__.py index 978c1ab42..cccf97a82 100644 --- a/borg/testsuite/__init__.py +++ b/borg/testsuite/__init__.py @@ -45,8 +45,6 @@ class BaseTestCase(unittest.TestCase): assert_not_in = unittest.TestCase.assertNotIn assert_equal = unittest.TestCase.assertEqual assert_not_equal = unittest.TestCase.assertNotEqual - assert_raises = unittest.TestCase.assertRaises - assert_raises_regex = unittest.TestCase.assertRaisesRegex assert_true = unittest.TestCase.assertTrue if raises: diff --git a/borg/testsuite/key.py b/borg/testsuite/key.py index 65ad9a741..360b88a0c 100644 --- a/borg/testsuite/key.py +++ b/borg/testsuite/key.py @@ -10,7 +10,6 @@ import pytest from ..crypto import bytes_to_long, num_aes_blocks from ..key import PlaintextKey, PassphraseKey, KeyfileKey, Passphrase, PasswordRetriesExceeded, bin_to_hex from ..helpers import Location, Chunk, IntegrityError -from . import environment_variable @pytest.fixture(autouse=True) @@ -82,20 +81,21 @@ class TestKey: chunk = Chunk(b'foo') assert chunk == key2.decrypt(key.id_hash(chunk.data), key.encrypt(chunk)) - def test_keyfile_kfenv(self, tmpdir): + def test_keyfile_kfenv(self, tmpdir, monkeypatch): keyfile = tmpdir.join('keyfile') - with environment_variable(BORG_KEY_FILE=str(keyfile), BORG_PASSPHRASE='testkf'): - assert not keyfile.exists() - key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) - assert keyfile.exists() - chunk = Chunk(b'XXX') - chunk_id = key.id_hash(chunk.data) - chunk_cdata = key.encrypt(chunk) - key = KeyfileKey.detect(self.MockRepository(), chunk_cdata) - assert chunk == key.decrypt(chunk_id, chunk_cdata) - keyfile.remove() - with pytest.raises(FileNotFoundError): - KeyfileKey.detect(self.MockRepository(), chunk_cdata) + monkeypatch.setenv('BORG_KEY_FILE', str(keyfile)) + monkeypatch.setenv('BORG_PASSPHRASE', 'testkf') + assert not keyfile.exists() + key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) + assert keyfile.exists() + chunk = Chunk(b'XXX') + chunk_id = key.id_hash(chunk.data) + chunk_cdata = key.encrypt(chunk) + key = KeyfileKey.detect(self.MockRepository(), chunk_cdata) + assert chunk == key.decrypt(chunk_id, chunk_cdata) + keyfile.remove() + with pytest.raises(FileNotFoundError): + KeyfileKey.detect(self.MockRepository(), chunk_cdata) def test_keyfile2(self, monkeypatch): with open(os.path.join(os.environ['BORG_KEYS_DIR'], 'keyfile'), 'w') as fd: @@ -104,13 +104,14 @@ class TestKey: key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata) assert key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data == b'payload' - def test_keyfile2_kfenv(self, tmpdir): + def test_keyfile2_kfenv(self, tmpdir, monkeypatch): keyfile = tmpdir.join('keyfile') with keyfile.open('w') as fd: fd.write(self.keyfile2_key_file) - with environment_variable(BORG_KEY_FILE=str(keyfile), BORG_PASSPHRASE='passphrase'): - key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata) - assert key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data == b'payload' + monkeypatch.setenv('BORG_KEY_FILE', str(keyfile)) + monkeypatch.setenv('BORG_PASSPHRASE', 'passphrase') + key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata) + assert key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data == b'payload' def test_passphrase(self, keys_dir, monkeypatch): monkeypatch.setenv('BORG_PASSPHRASE', 'test') From ad5ae32cc404b3f4a2915ceb094db9827b754652 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Fri, 13 May 2016 22:06:38 +0200 Subject: [PATCH 13/38] testsuite/key: Use pytest built-in fixture --- borg/testsuite/key.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/borg/testsuite/key.py b/borg/testsuite/key.py index 360b88a0c..8a1642a1d 100644 --- a/borg/testsuite/key.py +++ b/borg/testsuite/key.py @@ -38,15 +38,10 @@ class TestKey: """)) keyfile2_id = unhexlify('c3fbf14bc001ebcc3cd86e696c13482ed071740927cd7cbe1b01b4bfcee49314') - @pytest.fixture(autouse=True) - def keys_dir(self, request, monkeypatch): - tmp_keys_dir = tempfile.mkdtemp() - - def finalize(): - shutil.rmtree(tmp_keys_dir) - - request.addfinalizer(finalize) - monkeypatch.setenv('BORG_KEYS_DIR', tmp_keys_dir) + @pytest.fixture + def keys_dir(self, request, monkeypatch, tmpdir): + monkeypatch.setenv('BORG_KEYS_DIR', tmpdir) + return tmpdir class MockRepository: class _Location: @@ -62,7 +57,7 @@ class TestKey: assert hexlify(key.id_hash(chunk.data)) == b'2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae' assert chunk == key.decrypt(key.id_hash(chunk.data), key.encrypt(chunk)) - def test_keyfile(self, monkeypatch): + def test_keyfile(self, monkeypatch, keys_dir): monkeypatch.setenv('BORG_PASSPHRASE', 'test') key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) assert bytes_to_long(key.enc_cipher.iv, 8) == 0 @@ -97,8 +92,8 @@ class TestKey: with pytest.raises(FileNotFoundError): KeyfileKey.detect(self.MockRepository(), chunk_cdata) - def test_keyfile2(self, monkeypatch): - with open(os.path.join(os.environ['BORG_KEYS_DIR'], 'keyfile'), 'w') as fd: + def test_keyfile2(self, monkeypatch, keys_dir): + with keys_dir.join('keyfile').open('w') as fd: fd.write(self.keyfile2_key_file) monkeypatch.setenv('BORG_PASSPHRASE', 'passphrase') key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata) @@ -144,8 +139,8 @@ class TestKey: with pytest.raises(IntegrityError): key.decrypt("", data) - def test_decrypt_integrity(self, monkeypatch): - with open(os.path.join(os.environ['BORG_KEYS_DIR'], 'keyfile'), 'w') as fd: + def test_decrypt_integrity(self, monkeypatch, keys_dir): + with keys_dir.join('keyfile').open('w') as fd: fd.write(self.keyfile2_key_file) monkeypatch.setenv('BORG_PASSPHRASE', 'passphrase') key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata) From 0bceaf0736d503c7b5718275e340d2f0ab36185f Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Fri, 13 May 2016 22:50:34 +0200 Subject: [PATCH 14/38] check: support integrity verification with --verify-data --- borg/archive.py | 39 ++++++++++++++++++++++++++++++++++++-- borg/archiver.py | 22 ++++++++++++++++++++- borg/testsuite/archiver.py | 23 ++++++++++++++++++++++ 3 files changed, 81 insertions(+), 3 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index ef41c9900..0626acb04 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -22,7 +22,8 @@ from .helpers import Chunk, Error, uid2user, user2uid, gid2group, group2gid, \ Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \ ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \ PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume, \ - CompressionDecider1, CompressionDecider2, CompressionSpec + CompressionDecider1, CompressionDecider2, CompressionSpec, \ + IntegrityError from .repository import Repository from .platform import acl_get, acl_set from .chunker import Chunker @@ -698,7 +699,17 @@ class ArchiveChecker: self.error_found = False self.possibly_superseded = set() - def check(self, repository, repair=False, archive=None, last=None, prefix=None, save_space=False): + def check(self, repository, repair=False, archive=None, last=None, prefix=None, verify_data=False, + save_space=False): + """Perform a set of checks on 'repository' + + :param repair: enable repair mode, write updated or corrected data into repository + :param archive: only check this archive + :param last: only check this number of recent archives + :param prefix: only check archives with this prefix + :param verify_data: integrity verification of data referenced by archives + :param save_space: Repository.commit(save_space) + """ logger.info('Starting archive consistency check...') self.check_all = archive is None and last is None and prefix is None self.repair = repair @@ -712,6 +723,8 @@ class ArchiveChecker: else: self.manifest, _ = Manifest.load(repository, key=self.key) self.rebuild_refcounts(archive=archive, last=last, prefix=prefix) + if verify_data: + self.verify_data() self.orphan_chunks_check() self.finish(save_space=save_space) if self.error_found: @@ -741,6 +754,26 @@ class ArchiveChecker: cdata = repository.get(next(self.chunks.iteritems())[0]) return key_factory(repository, cdata) + def verify_data(self): + logger.info('Starting cryptographic data integrity verification...') + pi = ProgressIndicatorPercent(total=len(self.chunks), msg="Verifying data %6.2f%%", step=0.01, same_line=True) + count = errors = 0 + for chunk_id, (refcount, *_) in self.chunks.iteritems(): + pi.show() + if not refcount: + continue + encrypted_data = self.repository.get(chunk_id) + try: + _, data = self.key.decrypt(chunk_id, encrypted_data) + except IntegrityError as integrity_error: + self.error_found = True + errors += 1 + logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error) + count += 1 + pi.finish() + log = logger.error if errors else logger.info + log('Finished cryptographic data integrity verification, verified %d chunks with %d integrity errors.', count, errors) + def rebuild_manifest(self): """Rebuild the manifest object if it is missing @@ -874,6 +907,8 @@ class ArchiveChecker: else: # we only want one specific archive archive_items = [item for item in self.manifest.archives.items() if item[0] == archive] + if not archive_items: + logger.error("Archive '%s' not found.", archive) num_archives = 1 end = 1 diff --git a/borg/archiver.py b/borg/archiver.py index 42c6e4212..02e774623 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -185,12 +185,16 @@ class Archiver: if not yes(msg, false_msg="Aborting.", truish=('YES', ), env_var_override='BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'): return EXIT_ERROR + if args.repo_only and args.verify_data: + self.print_error("--repository-only and --verify-data contradict each other. Please select one.") + return EXIT_ERROR if not args.archives_only: if not repository.check(repair=args.repair, save_space=args.save_space): return EXIT_WARNING if not args.repo_only and not ArchiveChecker().check( repository, repair=args.repair, archive=args.location.archive, - last=args.last, prefix=args.prefix, save_space=args.save_space): + last=args.last, prefix=args.prefix, verify_data=args.verify_data, + save_space=args.save_space): return EXIT_WARNING return EXIT_SUCCESS @@ -1213,6 +1217,18 @@ class Archiver: required). - The archive checks can be time consuming, they can be skipped using the --repository-only option. + + The --verify-data option will perform a full integrity verification (as opposed to + checking the CRC32 of the segment) of data, which means reading the data from the + repository, decrypting and decompressing it. This is a cryptographic verification, + which will detect (accidental) corruption. For encrypted repositories it is + tamper-resistant as well, unless the attacker has access to the keys. + + It is also very slow. + + --verify-data only verifies data used by the archives specified with --last, + --prefix or an explicitly named archive. If none of these are passed, + all data in the repository is verified. """) subparser = subparsers.add_parser('check', parents=[common_parser], add_help=False, description=self.do_check.__doc__, @@ -1229,6 +1245,10 @@ class Archiver: subparser.add_argument('--archives-only', dest='archives_only', action='store_true', default=False, help='only perform archives checks') + subparser.add_argument('--verify-data', dest='verify_data', action='store_true', + default=False, + help='perform cryptographic archive data integrity verification ' + '(conflicts with --repository-only)') subparser.add_argument('--repair', dest='repair', action='store_true', default=False, help='attempt to repair any inconsistencies found') diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 1b89515c5..e03973a60 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -1590,6 +1590,29 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): self.cmd('check', self.repository_location, exit_code=0) self.cmd('extract', '--dry-run', self.repository_location + '::archive1', exit_code=0) + def _test_verify_data(self, *init_args): + shutil.rmtree(self.repository_path) + self.cmd('init', self.repository_location, *init_args) + self.create_src_archive('archive1') + archive, repository = self.open_archive('archive1') + with repository: + for item in archive.iter_items(): + if item[b'path'].endswith('testsuite/archiver.py'): + chunk = item[b'chunks'][-1] + data = repository.get(chunk.id) + b'1234' + repository.put(chunk.id, data) + break + repository.commit() + self.cmd('check', self.repository_location, exit_code=0) + output = self.cmd('check', '--verify-data', self.repository_location, exit_code=1) + assert bin_to_hex(chunk.id) + ', integrity error' in output + + def test_verify_data(self): + self._test_verify_data('--encryption', 'repokey') + + def test_verify_data_unencrypted(self): + self._test_verify_data('--encryption', 'none') + class RemoteArchiverTestCase(ArchiverTestCase): prefix = '__testsuite__:' From c52861e0cafafa2988ae98c3e25918068463012d Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sat, 14 May 2016 22:46:41 +0200 Subject: [PATCH 15/38] Improve LoggedIO write performance, make commit mechanism more solid - Instead of very small (5 MB-ish) segment files, use larger ones - Request asynchronous write-out or write-through (TODO) where it is supported, to achieve a continuously high throughput for writes - Instead of depending on ordered writes (write data, commit tag, sync) for consistency, do a double-sync commit as more serious RDBMS also do i.e. write data, sync, write commit tag, sync Since commits are very expensive in Borg at the moment this makes no difference performance-wise. New platform APIs: SyncFile, sync_dir [x] Naive implementation (equivalent to what Borg did before) [x] Linux implementation [ ] Windows implementation [-] OSX implementation (F_FULLSYNC) --- borg/constants.py | 10 ++++- borg/helpers.py | 2 +- borg/platform.py | 12 ++---- borg/platform_base.py | 78 +++++++++++++++++++++++++++++++++++++++ borg/platform_darwin.pyx | 2 +- borg/platform_freebsd.pyx | 2 +- borg/platform_linux.pyx | 63 ++++++++++++++++++++++++++----- borg/repository.py | 38 ++++++++++--------- docs/usage.rst | 16 ++++++++ 9 files changed, 181 insertions(+), 42 deletions(-) create mode 100644 borg/platform_base.py diff --git a/borg/constants.py b/borg/constants.py index 95b16c47a..0f6d3ddc7 100644 --- a/borg/constants.py +++ b/borg/constants.py @@ -12,8 +12,14 @@ UMASK_DEFAULT = 0o077 CACHE_TAG_NAME = 'CACHEDIR.TAG' CACHE_TAG_CONTENTS = b'Signature: 8a477f597d28d172789f06886806bc55' -DEFAULT_MAX_SEGMENT_SIZE = 5 * 1024 * 1024 -DEFAULT_SEGMENTS_PER_DIR = 10000 +# A large, but not unreasonably large segment size. Always less than 2 GiB (for legacy file systems). We choose +# 500 MiB which means that no indirection from the inode is needed for typical Linux file systems. +# Note that this is a soft-limit and can be exceeded (worst case) by a full maximum chunk size and some metadata +# bytes. That's why it's 500 MiB instead of 512 MiB. +DEFAULT_MAX_SEGMENT_SIZE = 500 * 1024 * 1024 + +# A few hundred files per directory to go easy on filesystems which don't like too many files per dir (NTFS) +DEFAULT_SEGMENTS_PER_DIR = 500 CHUNK_MIN_EXP = 19 # 2**19 == 512kiB CHUNK_MAX_EXP = 23 # 2**23 == 8MiB diff --git a/borg/helpers.py b/borg/helpers.py index 7d8905942..c73553e94 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -82,7 +82,7 @@ def check_extension_modules(): raise ExtensionModuleError if crypto.API_VERSION != 3: raise ExtensionModuleError - if platform.API_VERSION != 2: + if platform.API_VERSION != 3: raise ExtensionModuleError diff --git a/borg/platform.py b/borg/platform.py index 1bc8ee5e4..9c1c4ebed 100644 --- a/borg/platform.py +++ b/borg/platform.py @@ -1,16 +1,10 @@ import sys +from .platform_base import acl_get, acl_set, SyncFile, sync_dir, API_VERSION + if sys.platform.startswith('linux'): # pragma: linux only - from .platform_linux import acl_get, acl_set, API_VERSION + from .platform_linux import acl_get, acl_set, SyncFile, API_VERSION elif sys.platform.startswith('freebsd'): # pragma: freebsd only from .platform_freebsd import acl_get, acl_set, API_VERSION elif sys.platform == 'darwin': # pragma: darwin only from .platform_darwin import acl_get, acl_set, API_VERSION -else: # pragma: unknown platform only - API_VERSION = 2 - - def acl_get(path, item, st, numeric_owner=False): - pass - - def acl_set(path, item, numeric_owner=False): - pass diff --git a/borg/platform_base.py b/borg/platform_base.py new file mode 100644 index 000000000..a897f9f10 --- /dev/null +++ b/borg/platform_base.py @@ -0,0 +1,78 @@ +import os + +API_VERSION = 3 + +fdatasync = getattr(os, 'fdatasync', os.fsync) + + +def acl_get(path, item, st, numeric_owner=False): + """ + Saves ACL Entries + + If `numeric_owner` is True the user/group field is not preserved only uid/gid + """ + + +def acl_set(path, item, numeric_owner=False): + """ + Restore ACL Entries + + If `numeric_owner` is True the stored uid/gid is used instead + of the user/group names + """ + + +def sync_dir(path): + fd = os.open(path, os.O_RDONLY) + try: + os.fsync(fd) + finally: + os.close(fd) + + +class SyncFile: + """ + A file class that is supposed to enable write ordering (one way or another) and data durability after close(). + + The degree to which either is possible varies with operating system, file system and hardware. + + This fallback implements a naive and slow way of doing this. On some operating systems it can't actually + guarantee any of the above, since fsync() doesn't guarantee it. Furthermore it may not be possible at all + to satisfy the above guarantees on some hardware or operating systems. In these cases we hope that the thorough + checksumming implemented catches any corrupted data due to misordered, delayed or partial writes. + + Note that POSIX doesn't specify *anything* about power failures (or similar failures). A system that + routinely loses files or corrupts file on power loss is POSIX compliant. + + TODO: Use F_FULLSYNC on OSX. + TODO: A Windows implementation should use CreateFile with FILE_FLAG_WRITE_THROUGH. + """ + + def __init__(self, path): + self.fd = open(path, 'wb') + self.fileno = self.fd.fileno() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def write(self, data): + self.fd.write(data) + + def sync(self): + """ + Synchronize file contents. Everything written prior to sync() must become durable before anything written + after sync(). + """ + self.fd.flush() + fdatasync(self.fileno) + if hasattr(os, 'posix_fadvise'): + os.posix_fadvise(self.fileno, 0, 0, os.POSIX_FADV_DONTNEED) + + def close(self): + """sync() and close.""" + self.sync() + self.fd.close() + sync_dir(os.path.dirname(self.fd.name)) diff --git a/borg/platform_darwin.pyx b/borg/platform_darwin.pyx index edb41f715..4dc25b83a 100644 --- a/borg/platform_darwin.pyx +++ b/borg/platform_darwin.pyx @@ -1,7 +1,7 @@ import os from .helpers import user2uid, group2gid, safe_decode, safe_encode -API_VERSION = 2 +API_VERSION = 3 cdef extern from "sys/acl.h": ctypedef struct _acl_t: diff --git a/borg/platform_freebsd.pyx b/borg/platform_freebsd.pyx index 27d636263..ae69af68a 100644 --- a/borg/platform_freebsd.pyx +++ b/borg/platform_freebsd.pyx @@ -1,7 +1,7 @@ import os from .helpers import posix_acl_use_stored_uid_gid, safe_encode, safe_decode -API_VERSION = 2 +API_VERSION = 3 cdef extern from "errno.h": int errno diff --git a/borg/platform_linux.pyx b/borg/platform_linux.pyx index f9ed42415..854904f3f 100644 --- a/borg/platform_linux.pyx +++ b/borg/platform_linux.pyx @@ -1,13 +1,17 @@ import os import re +import resource from stat import S_ISLNK from .helpers import posix_acl_use_stored_uid_gid, user2uid, group2gid, safe_decode, safe_encode +from .platform_base import SyncFile as BaseSyncFile +from libc cimport errno -API_VERSION = 2 +API_VERSION = 3 cdef extern from "sys/types.h": int ACL_TYPE_ACCESS int ACL_TYPE_DEFAULT + ctypedef off64_t cdef extern from "sys/acl.h": ctypedef struct _acl_t: @@ -23,6 +27,12 @@ cdef extern from "sys/acl.h": cdef extern from "acl/libacl.h": int acl_extended_file(const char *path) +cdef extern from "fcntl.h": + int sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags) + unsigned int SYNC_FILE_RANGE_WRITE + unsigned int SYNC_FILE_RANGE_WAIT_BEFORE + unsigned int SYNC_FILE_RANGE_WAIT_AFTER + _comment_re = re.compile(' *#.*', re.M) @@ -77,10 +87,6 @@ cdef acl_numeric_ids(acl): def acl_get(path, item, st, numeric_owner=False): - """Saves ACL Entries - - If `numeric_owner` is True the user/group field is not preserved only uid/gid - """ cdef acl_t default_acl = NULL cdef acl_t access_acl = NULL cdef char *default_text = NULL @@ -112,11 +118,6 @@ def acl_get(path, item, st, numeric_owner=False): def acl_set(path, item, numeric_owner=False): - """Restore ACL Entries - - If `numeric_owner` is True the stored uid/gid is used instead - of the user/group names - """ cdef acl_t access_acl = NULL cdef acl_t default_acl = NULL @@ -141,3 +142,45 @@ def acl_set(path, item, numeric_owner=False): acl_set_file(p, ACL_TYPE_DEFAULT, default_acl) finally: acl_free(default_acl) + +cdef _sync_file_range(fd, offset, length, flags): + assert offset & PAGE_MASK == 0, "offset %d not page-aligned" % offset + assert length & PAGE_MASK == 0, "length %d not page-aligned" % length + if sync_file_range(fd, offset, length, flags) != 0: + raise OSError(errno, os.strerror(errno)) + os.posix_fadvise(fd, offset, length, os.POSIX_FADV_DONTNEED) + +cdef unsigned PAGE_MASK = resource.getpagesize() - 1 + + +class SyncFile(BaseSyncFile): + """ + Implemented using sync_file_range for asynchronous write-out and fdatasync for actual durability. + + "write-out" means that dirty pages (= data that was written) are submitted to an I/O queue and will be send to + disk in the immediate future. + """ + + def __init__(self, path): + super().__init__(path) + self.offset = 0 + self.write_window = (16 * 1024 ** 2) & ~PAGE_MASK + self.last_sync = 0 + self.pending_sync = None + + def write(self, data): + self.offset += self.fd.write(data) + offset = self.offset & ~PAGE_MASK + if offset >= self.last_sync + self.write_window: + self.fd.flush() + _sync_file_range(self.fileno, self.last_sync, offset - self.last_sync, SYNC_FILE_RANGE_WRITE) + if self.pending_sync is not None: + _sync_file_range(self.fileno, self.pending_sync, self.last_sync - self.pending_sync, + SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WAIT_AFTER) + self.pending_sync = self.last_sync + self.last_sync = offset + + def sync(self): + self.fd.flush() + os.fdatasync(self.fileno) + os.posix_fadvise(self.fileno, 0, 0, os.POSIX_FADV_DONTNEED) diff --git a/borg/repository.py b/borg/repository.py index 3f8d5d68d..52937c76f 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -17,6 +17,7 @@ from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, Progre from .hashindex import NSIndex from .locking import UpgradableLock, LockError, LockErrorT from .lrucache import LRUCache +from .platform import SyncFile, sync_dir MAX_OBJECT_SIZE = 20 * 1024 * 1024 MAGIC = b'BORG_SEG' @@ -32,7 +33,7 @@ class Repository: On disk layout: dir/README dir/config - dir/data// + dir/data// dir/index.X dir/hints.X """ @@ -507,7 +508,7 @@ class LoggedIO: def __init__(self, path, limit, segments_per_dir, capacity=90): self.path = path self.fds = LRUCache(capacity, - dispose=lambda fd: fd.close()) + dispose=self.close_fd) self.segment = 0 self.limit = limit self.segments_per_dir = segments_per_dir @@ -519,6 +520,11 @@ class LoggedIO: self.fds.clear() self.fds = None # Just to make sure we're disabled + def close_fd(self, fd): + if hasattr(os, 'posix_fadvise'): # only on UNIX + os.posix_fadvise(fd.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) + fd.close() + def segment_iterator(self, reverse=False): data_path = os.path.join(self.path, 'data') dirs = sorted((dir for dir in os.listdir(data_path) if dir.isdigit()), key=int, reverse=reverse) @@ -535,7 +541,7 @@ class LoggedIO: return None def get_segments_transaction_id(self): - """Verify that the transaction id is consistent with the index transaction id + """Return the last committed segment. """ for segment, filename in self.segment_iterator(reverse=True): if self.is_committed_segment(filename): @@ -578,7 +584,8 @@ class LoggedIO: dirname = os.path.join(self.path, 'data', str(self.segment // self.segments_per_dir)) if not os.path.exists(dirname): os.mkdir(dirname) - self._write_fd = open(self.segment_filename(self.segment), 'ab') + sync_dir(os.path.join(self.path, 'data')) + self._write_fd = SyncFile(self.segment_filename(self.segment)) self._write_fd.write(MAGIC) self.offset = MAGIC_LEN return self._write_fd @@ -591,6 +598,13 @@ class LoggedIO: self.fds[segment] = fd return fd + def close_segment(self): + if self._write_fd: + self.segment += 1 + self.offset = 0 + self._write_fd.close() + self._write_fd = None + def delete_segment(self, segment): if segment in self.fds: del self.fds[segment] @@ -641,7 +655,7 @@ class LoggedIO: def read(self, segment, offset, id): if segment == self.segment and self._write_fd: - self._write_fd.flush() + self._write_fd.sync() fd = self.get_fd(segment) fd.seek(offset) header = fd.read(self.put_header_fmt.size) @@ -703,20 +717,8 @@ class LoggedIO: def write_commit(self): fd = self.get_write_fd(no_new=True) + fd.sync() header = self.header_no_crc_fmt.pack(self.header_fmt.size, TAG_COMMIT) crc = self.crc_fmt.pack(crc32(header) & 0xffffffff) fd.write(b''.join((crc, header))) self.close_segment() - - def close_segment(self): - if self._write_fd: - self.segment += 1 - self.offset = 0 - self._write_fd.flush() - os.fsync(self._write_fd.fileno()) - if hasattr(os, 'posix_fadvise'): # only on UNIX - # tell the OS that it does not need to cache what we just wrote, - # avoids spoiling the cache for the OS and other processes. - os.posix_fadvise(self._write_fd.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) - self._write_fd.close() - self._write_fd = None diff --git a/docs/usage.rst b/docs/usage.rst index 600af4fea..7473e8597 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -170,6 +170,22 @@ Network: In case you are interested in more details, please read the internals documentation. +File systems +~~~~~~~~~~~~ + +We strongly recommend against using Borg (or any other database-like +software) on non-journaling file systems like FAT, since it is not +possible to assume any consistency in case of power failures (or a +sudden disconnect of an external drive or similar failures). + +While Borg uses a data store that is resilient against these failures +when used on journaling file systems, it is not possible to guarantee +this with some hardware -- independent of the software used. We don't +know a list of affected hardware. + +If you are suspicious whether your Borg repository is still consistent +and readable after one of the failures mentioned above occured, run +``borg check --verify-data`` to make sure it is consistent. Units ~~~~~ From deab8ca7e230b004447441feca32ffc61c39c3c1 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Tue, 17 May 2016 01:00:46 +0200 Subject: [PATCH 16/38] init: move epilogue from docs/usage into help output Also tell which algos are used and that HW accel is supported. --- borg/archiver.py | 43 ++++++++++++++++++++++++++++++++++++++++++- docs/usage.rst | 40 ---------------------------------------- 2 files changed, 42 insertions(+), 41 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 497f5cf99..8c9d018db 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -1167,7 +1167,48 @@ class Archiver: init_epilog = textwrap.dedent(""" This command initializes an empty repository. A repository is a filesystem directory containing the deduplicated data from zero or more archives. - Encryption can be enabled at repository init time. + + Encryption can be enabled at repository init time (the default). + + It is not recommended to disable encryption. Repository encryption protects you + e.g. against the case that an attacker has access to your backup repository. + + But be careful with the key / the passphrase: + + If you want "passphrase-only" security, use the repokey mode. The key will + be stored inside the repository (in its "config" file). In above mentioned + attack scenario, the attacker will have the key (but not the passphrase). + + If you want "passphrase and having-the-key" security, use the keyfile mode. + The key will be stored in your home directory (in .config/borg/keys). In + the attack scenario, the attacker who has just access to your repo won't have + the key (and also not the passphrase). + + Make a backup copy of the key file (keyfile mode) or repo config file + (repokey mode) and keep it at a safe place, so you still have the key in + case it gets corrupted or lost. Also keep the passphrase at a safe place. + The backup that is encrypted with that key won't help you with that, of course. + + Make sure you use a good passphrase. Not too short, not too simple. The real + encryption / decryption key is encrypted with / locked by your passphrase. + If an attacker gets your key, he can't unlock and use it without knowing the + passphrase. + + Be careful with special or non-ascii characters in your passphrase: + + - Borg processes the passphrase as unicode (and encodes it as utf-8), + so it does not have problems dealing with even the strangest characters. + - BUT: that does not necessarily apply to your OS / VM / keyboard configuration. + + So better use a long passphrase made from simple ascii chars than one that + includes non-ascii stuff or characters that are hard/impossible to enter on + a different keyboard layout. + + You can change your passphrase for existing repos at any time, it won't affect + the encryption/decryption key or other secrets. + + When encrypting, AES-CTR-256 is used for encryption, and HMAC-SHA256 for + authentication. Hardware acceleration will be used automatically. """) subparser = subparsers.add_parser('init', parents=[common_parser], add_help=False, description=self.do_init.__doc__, epilog=init_epilog, diff --git a/docs/usage.rst b/docs/usage.rst index 7473e8597..d559c0bb0 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -236,46 +236,6 @@ Examples # Remote repository (store the key your home dir) $ borg init --encryption=keyfile user@hostname:backup -Important notes about encryption: - -It is not recommended to disable encryption. Repository encryption protects you -e.g. against the case that an attacker has access to your backup repository. - -But be careful with the key / the passphrase: - -If you want "passphrase-only" security, use the ``repokey`` mode. The key will -be stored inside the repository (in its "config" file). In above mentioned -attack scenario, the attacker will have the key (but not the passphrase). - -If you want "passphrase and having-the-key" security, use the ``keyfile`` mode. -The key will be stored in your home directory (in ``.config/borg/keys``). In -the attack scenario, the attacker who has just access to your repo won't have -the key (and also not the passphrase). - -Make a backup copy of the key file (``keyfile`` mode) or repo config file -(``repokey`` mode) and keep it at a safe place, so you still have the key in -case it gets corrupted or lost. Also keep the passphrase at a safe place. -The backup that is encrypted with that key won't help you with that, of course. - -Make sure you use a good passphrase. Not too short, not too simple. The real -encryption / decryption key is encrypted with / locked by your passphrase. -If an attacker gets your key, he can't unlock and use it without knowing the -passphrase. - -Be careful with special or non-ascii characters in your passphrase: - -- |project_name| processes the passphrase as unicode (and encodes it as utf-8), - so it does not have problems dealing with even the strangest characters. -- BUT: that does not necessarily apply to your OS / VM / keyboard configuration. - -So better use a long passphrase made from simple ascii chars than one that -includes non-ascii stuff or characters that are hard/impossible to enter on -a different keyboard layout. - -You can change your passphrase for existing repos at any time, it won't affect -the encryption/decryption key or other secrets. - - .. include:: usage/create.rst.inc Examples From 6cf61024d9bcc48768349d2b5523240fafdfcbc4 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Tue, 17 May 2016 18:57:53 +0200 Subject: [PATCH 17/38] Handle SIGTERM and make a clean exit --- borg/archiver.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/borg/archiver.py b/borg/archiver.py index ae2fd4645..f2825dfa1 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -1488,6 +1488,14 @@ def sig_info_handler(signum, stack): # pragma: no cover break +class SIGTERMReceived(BaseException): + pass + + +def sig_term_handler(signum, stack): + raise SIGTERMReceived + + def setup_signal_handlers(): # pragma: no cover sigs = [] if hasattr(signal, 'SIGUSR1'): @@ -1496,6 +1504,7 @@ def setup_signal_handlers(): # pragma: no cover sigs.append(signal.SIGINFO) # kill -INFO pid (or ctrl-t) for sig in sigs: signal.signal(sig, sig_info_handler) + signal.signal(signal.SIGTERM, sig_term_handler) def main(): # pragma: no cover @@ -1523,6 +1532,9 @@ def main(): # pragma: no cover except KeyboardInterrupt: msg = 'Keyboard interrupt.\n%s\n%s' % (traceback.format_exc(), sysinfo()) exit_code = EXIT_ERROR + except SIGTERMReceived: + msg = 'Received SIGTERM.' + exit_code = EXIT_ERROR if msg: logger.error(msg) if args.show_rc: From 805f63167c910167f3f38f9ddc1b5936706d2509 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Wed, 18 May 2016 00:22:49 +0200 Subject: [PATCH 18/38] Translate Linux fsflags to BSD flags and vice versa --- borg/archive.py | 13 ++++---- borg/archiver.py | 5 ++- borg/helpers.py | 3 +- borg/platform.py | 4 +-- borg/platform_base.py | 14 ++++++++ borg/platform_linux.pyx | 67 ++++++++++++++++++++++++++++++++++++-- borg/testsuite/__init__.py | 22 +++++++++++-- borg/testsuite/archiver.py | 10 +++--- borg/testsuite/conftest.py | 4 --- conftest.py | 18 ++++++++++ 10 files changed, 132 insertions(+), 28 deletions(-) delete mode 100644 borg/testsuite/conftest.py create mode 100644 conftest.py diff --git a/borg/archive.py b/borg/archive.py index 0626acb04..710b52fb8 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -25,14 +25,13 @@ from .helpers import Chunk, Error, uid2user, user2uid, gid2group, group2gid, \ CompressionDecider1, CompressionDecider2, CompressionSpec, \ IntegrityError from .repository import Repository -from .platform import acl_get, acl_set +from .platform import acl_get, acl_set, set_flags, get_flags from .chunker import Chunker from .hashindex import ChunkIndex, ChunkIndexEntry from .cache import ChunkListEntry import msgpack has_lchmod = hasattr(os, 'lchmod') -has_lchflags = hasattr(os, 'lchflags') flags_normal = os.O_RDONLY | getattr(os, 'O_BINARY', 0) flags_noatime = flags_normal | getattr(os, 'O_NOATIME', 0) @@ -435,10 +434,9 @@ Number of files: {0.stats.nfiles}'''.format( else: os.utime(path, None, ns=(atime, mtime), follow_symlinks=False) acl_set(path, item, self.numeric_owner) - # Only available on OS X and FreeBSD - if has_lchflags and b'bsdflags' in item: + if b'bsdflags' in item: try: - os.lchflags(path, item[b'bsdflags']) + set_flags(path, item[b'bsdflags'], fd=fd) except OSError: pass # chown removes Linux capabilities, so set the extended attributes at the end, after chown, since they include @@ -506,8 +504,9 @@ Number of files: {0.stats.nfiles}'''.format( xattrs = xattr.get_all(path, follow_symlinks=False) if xattrs: item[b'xattrs'] = StableDict(xattrs) - if has_lchflags and st.st_flags: - item[b'bsdflags'] = st.st_flags + bsdflags = get_flags(path, st) + if bsdflags: + item[b'bsdflags'] = bsdflags acl_get(path, item, st, self.numeric_owner) return item diff --git a/borg/archiver.py b/borg/archiver.py index 8c9d018db..bd45da2ba 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -38,8 +38,7 @@ from .archive import Archive, ArchiveChecker, ArchiveRecreater from .remote import RepositoryServer, RemoteRepository, cache_if_remote from .selftest import selftest from .hashindex import ChunkIndexEntry - -has_lchflags = hasattr(os, 'lchflags') +from .platform import get_flags def argument(args, str_or_bool): @@ -316,7 +315,7 @@ class Archiver: return status = None # Ignore if nodump flag is set - if has_lchflags and (st.st_flags & stat.UF_NODUMP): + if get_flags(path, st) & stat.UF_NODUMP: return if stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode): if not dry_run: diff --git a/borg/helpers.py b/borg/helpers.py index c73553e94..7e783f3eb 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -1166,7 +1166,7 @@ class ItemFormatter: 'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath', } KEY_GROUPS = ( - ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget'), + ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget', 'flags'), ('size', 'csize', 'num_chunks', 'unique_chunks'), ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'), tuple(sorted(hashlib.algorithms_guaranteed)), @@ -1259,6 +1259,7 @@ class ItemFormatter: item_data['source'] = source item_data['linktarget'] = source item_data['extra'] = extra + item_data['flags'] = item.get(b'bsdflags') for key in self.used_call_keys: item_data[key] = self.call_keys[key](item) return item_data diff --git a/borg/platform.py b/borg/platform.py index 9c1c4ebed..e57c08707 100644 --- a/borg/platform.py +++ b/borg/platform.py @@ -1,9 +1,9 @@ import sys -from .platform_base import acl_get, acl_set, SyncFile, sync_dir, API_VERSION +from .platform_base import acl_get, acl_set, SyncFile, sync_dir, set_flags, get_flags, API_VERSION if sys.platform.startswith('linux'): # pragma: linux only - from .platform_linux import acl_get, acl_set, SyncFile, API_VERSION + from .platform_linux import acl_get, acl_set, SyncFile, set_flags, get_flags, API_VERSION elif sys.platform.startswith('freebsd'): # pragma: freebsd only from .platform_freebsd import acl_get, acl_set, API_VERSION elif sys.platform == 'darwin': # pragma: darwin only diff --git a/borg/platform_base.py b/borg/platform_base.py index a897f9f10..675b5af9e 100644 --- a/borg/platform_base.py +++ b/borg/platform_base.py @@ -21,6 +21,20 @@ def acl_set(path, item, numeric_owner=False): of the user/group names """ +try: + from os import lchflags + + def set_flags(path, bsd_flags, fd=None): + lchflags(path, bsd_flags) +except ImportError: + def set_flags(path, bsd_flags, fd=None): + pass + + +def get_flags(path, st): + """Return BSD-style file flags for path or stat without following symlinks.""" + return getattr(st, 'st_flags', 0) + def sync_dir(path): fd = os.open(path, os.O_RDONLY) diff --git a/borg/platform_linux.pyx b/borg/platform_linux.pyx index 854904f3f..8eb2b56d9 100644 --- a/borg/platform_linux.pyx +++ b/borg/platform_linux.pyx @@ -1,7 +1,8 @@ import os import re import resource -from stat import S_ISLNK +import stat + from .helpers import posix_acl_use_stored_uid_gid, user2uid, group2gid, safe_decode, safe_encode from .platform_base import SyncFile as BaseSyncFile from libc cimport errno @@ -33,10 +34,72 @@ cdef extern from "fcntl.h": unsigned int SYNC_FILE_RANGE_WAIT_BEFORE unsigned int SYNC_FILE_RANGE_WAIT_AFTER +cdef extern from "linux/fs.h": + # ioctls + int FS_IOC_SETFLAGS + int FS_IOC_GETFLAGS + + # inode flags + int FS_NODUMP_FL + int FS_IMMUTABLE_FL + int FS_APPEND_FL + int FS_COMPR_FL + +cdef extern from "stropts.h": + int ioctl(int fildes, int request, ...) + +cdef extern from "errno.h": + int errno + +cdef extern from "string.h": + char *strerror(int errnum) _comment_re = re.compile(' *#.*', re.M) +BSD_TO_LINUX_FLAGS = { + stat.UF_NODUMP: FS_NODUMP_FL, + stat.UF_IMMUTABLE: FS_IMMUTABLE_FL, + stat.UF_APPEND: FS_APPEND_FL, + stat.UF_COMPRESSED: FS_COMPR_FL, +} + + +def set_flags(path, bsd_flags, fd=None): + if fd is None and stat.S_ISLNK(os.lstat(path).st_mode): + return + cdef int flags = 0 + for bsd_flag, linux_flag in BSD_TO_LINUX_FLAGS.items(): + if bsd_flags & bsd_flag: + flags |= linux_flag + open_fd = fd is None + if open_fd: + fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW) + try: + if ioctl(fd, FS_IOC_SETFLAGS, &flags) == -1: + raise OSError(errno, strerror(errno).decode(), path) + finally: + if open_fd: + os.close(fd) + + +def get_flags(path, st): + if stat.S_ISLNK(st.st_mode): + return 0 + cdef int linux_flags + fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW) + try: + if ioctl(fd, FS_IOC_GETFLAGS, &linux_flags) == -1: + return 0 + finally: + os.close(fd) + bsd_flags = 0 + for bsd_flag, linux_flag in BSD_TO_LINUX_FLAGS.items(): + if linux_flags & linux_flag: + bsd_flags |= bsd_flag + return bsd_flags + + def acl_use_local_uid_gid(acl): """Replace the user/group field with the local uid/gid if possible """ @@ -93,7 +156,7 @@ def acl_get(path, item, st, numeric_owner=False): cdef char *access_text = NULL p = os.fsencode(path) - if S_ISLNK(st.st_mode) or acl_extended_file(p) <= 0: + if stat.S_ISLNK(st.st_mode) or acl_extended_file(p) <= 0: return if numeric_owner: converter = acl_numeric_ids diff --git a/borg/testsuite/__init__.py b/borg/testsuite/__init__.py index cccf97a82..50d005397 100644 --- a/borg/testsuite/__init__.py +++ b/borg/testsuite/__init__.py @@ -5,9 +5,13 @@ import posix import stat import sys import sysconfig +import tempfile import time import unittest + from ..xattr import get_all +from ..platform import get_flags +from .. import platform # Note: this is used by borg.selftest, do not use or import py.test functionality here. @@ -23,8 +27,20 @@ try: except ImportError: raises = None -has_lchflags = hasattr(os, 'lchflags') +has_lchflags = hasattr(os, 'lchflags') or sys.platform.startswith('linux') +no_lchlfags_because = '' if has_lchflags else '(not supported on this platform)' +try: + with tempfile.NamedTemporaryFile() as file: + platform.set_flags(file.name, stat.UF_NODUMP) +except OSError: + has_lchflags = False + no_lchlfags_because = '(the file system at %s does not support flags)' % tempfile.gettempdir() +try: + import llfuse + has_llfuse = True or llfuse # avoids "unused import" +except ImportError: + has_llfuse = False # The mtime get/set precision varies on different OS and Python versions if 'HAVE_FUTIMENS' in getattr(posix, '_have_functions', []): @@ -75,13 +91,13 @@ class BaseTestCase(unittest.TestCase): # Assume path2 is on FUSE if st_dev is different fuse = s1.st_dev != s2.st_dev attrs = ['st_mode', 'st_uid', 'st_gid', 'st_rdev'] - if has_lchflags: - attrs.append('st_flags') if not fuse or not os.path.isdir(path1): # dir nlink is always 1 on our fuse filesystem attrs.append('st_nlink') d1 = [filename] + [getattr(s1, a) for a in attrs] d2 = [filename] + [getattr(s2, a) for a in attrs] + d1.append(get_flags(path1, s1)) + d2.append(get_flags(path2, s2)) # ignore st_rdev if file is not a block/char device, fixes #203 if not stat.S_ISCHR(d1[1]) and not stat.S_ISBLK(d1[1]): d1[4] = None diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 363cc19e5..75ee0c04f 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -16,7 +16,7 @@ from hashlib import sha256 import pytest -from .. import xattr, helpers +from .. import xattr, helpers, platform from ..archive import Archive, ChunkBuffer, ArchiveRecreater from ..archiver import Archiver from ..cache import Cache @@ -26,15 +26,13 @@ from ..helpers import Chunk, Manifest, EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, b from ..key import KeyfileKeyBase from ..remote import RemoteRepository, PathNotAllowed from ..repository import Repository +from . import has_lchflags, has_llfuse from . import BaseTestCase, changedir, environment_variable try: import llfuse - has_llfuse = True or llfuse # avoids "unused import" except ImportError: - has_llfuse = False - -has_lchflags = hasattr(os, 'lchflags') + pass src_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) @@ -280,7 +278,7 @@ class ArchiverTestCaseBase(BaseTestCase): # FIFO node os.mkfifo(os.path.join(self.input_path, 'fifo1')) if has_lchflags: - os.lchflags(os.path.join(self.input_path, 'flagfile'), stat.UF_NODUMP) + platform.set_flags(os.path.join(self.input_path, 'flagfile'), stat.UF_NODUMP) try: # Block device os.mknod('input/bdev', 0o600 | stat.S_IFBLK, os.makedev(10, 20)) diff --git a/borg/testsuite/conftest.py b/borg/testsuite/conftest.py deleted file mode 100644 index 0c350fb7f..000000000 --- a/borg/testsuite/conftest.py +++ /dev/null @@ -1,4 +0,0 @@ -from ..logger import setup_logging - -# Ensure that the loggers exist for all tests -setup_logging() diff --git a/conftest.py b/conftest.py new file mode 100644 index 000000000..44ff5cde9 --- /dev/null +++ b/conftest.py @@ -0,0 +1,18 @@ +from borg.logger import setup_logging + +# Ensure that the loggers exist for all tests +setup_logging() + +from borg.testsuite import has_lchflags, no_lchlfags_because, has_llfuse +from borg.testsuite.platform import fakeroot_detected +from borg import xattr + + +def pytest_report_header(config, startdir): + yesno = ['no', 'yes'] + flags = 'Testing BSD-style flags: %s %s' % (yesno[has_lchflags], no_lchlfags_because) + fakeroot = 'fakeroot: %s (>=1.20.2: %s)' % ( + yesno[fakeroot_detected()], + yesno[xattr.XATTR_FAKEROOT]) + llfuse = 'Testing fuse: %s' % yesno[has_llfuse] + return '\n'.join((flags, llfuse, fakeroot)) From 43258abf2117a44238044587501e5a1a1e383dca Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Wed, 18 May 2016 17:08:45 +0200 Subject: [PATCH 19/38] Fix tox --- conftest.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/conftest.py b/conftest.py index 44ff5cde9..6ca799a3e 100644 --- a/conftest.py +++ b/conftest.py @@ -1,3 +1,24 @@ +import sys + +# This is a hack to fix path problems because "borg" (the package) is in the source root. +# When importing the conftest an "import borg" can incorrectly import the borg from the +# source root instead of the one installed in the environment. +# The workaround is to remove entries pointing there from the path and check whether "borg" +# is still importable. If it is not, then it has not been installed in the environment +# and the entries are put back. +# +# TODO: After moving the package to src/: remove this. + +original_path = list(sys.path) +for entry in original_path: + if entry == '' or entry.endswith('/borg'): + sys.path.remove(entry) + +try: + import borg +except ImportError: + sys.path = original_path + from borg.logger import setup_logging # Ensure that the loggers exist for all tests From 24cd303ca97d750de55ca17e7bb9a8de5554d6d9 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Wed, 18 May 2016 17:37:47 +0200 Subject: [PATCH 20/38] When probing key files, do binary reads --- borg/key.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/borg/key.py b/borg/key.py index d52c1fd2d..be79dfc14 100644 --- a/borg/key.py +++ b/borg/key.py @@ -394,13 +394,15 @@ class KeyfileKey(KeyfileKeyBase): FILE_ID = 'BORG_KEY' def find_key(self): - id = hexlify(self.repository.id).decode('ascii') + file_id = self.FILE_ID.encode() + first_line = file_id + b' ' + hexlify(self.repository.id) keys_dir = get_keys_dir() for name in os.listdir(keys_dir): filename = os.path.join(keys_dir, name) - with open(filename, 'r') as fd: - line = fd.readline().strip() - if line.startswith(self.FILE_ID) and line[len(self.FILE_ID) + 1:] == id: + # we do the magic / id check in binary mode to avoid stumbling over + # decoding errors if somebody has binary files in the keys dir for some reason. + with open(filename, 'rb') as fd: + if fd.read(len(first_line)) == first_line: return filename raise KeyfileNotFoundError(self.repository._location.canonical_path(), get_keys_dir()) From b5404209bbf641127e46db14cce9b58d6ef047bb Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 18 May 2016 04:26:43 +0200 Subject: [PATCH 21/38] add swidth call to determine string width on terminal on posix platforms, this is a small wrapper around wcswidth(3). --- borg/platform.py | 8 ++++---- borg/platform_base.py | 8 ++++++++ borg/platform_darwin.pyx | 6 ++++++ borg/platform_freebsd.pyx | 6 ++++++ borg/platform_linux.pyx | 6 ++++++ borg/testsuite/platform.py | 15 ++++++++++++++- 6 files changed, 44 insertions(+), 5 deletions(-) diff --git a/borg/platform.py b/borg/platform.py index e57c08707..d05b60fb9 100644 --- a/borg/platform.py +++ b/borg/platform.py @@ -1,10 +1,10 @@ import sys -from .platform_base import acl_get, acl_set, SyncFile, sync_dir, set_flags, get_flags, API_VERSION +from .platform_base import acl_get, acl_set, SyncFile, sync_dir, set_flags, get_flags, swidth, API_VERSION if sys.platform.startswith('linux'): # pragma: linux only - from .platform_linux import acl_get, acl_set, SyncFile, set_flags, get_flags, API_VERSION + from .platform_linux import acl_get, acl_set, SyncFile, set_flags, get_flags, swidth, API_VERSION elif sys.platform.startswith('freebsd'): # pragma: freebsd only - from .platform_freebsd import acl_get, acl_set, API_VERSION + from .platform_freebsd import acl_get, acl_set, swidth, API_VERSION elif sys.platform == 'darwin': # pragma: darwin only - from .platform_darwin import acl_get, acl_set, API_VERSION + from .platform_darwin import acl_get, acl_set, swidth, API_VERSION diff --git a/borg/platform_base.py b/borg/platform_base.py index 675b5af9e..da62260b6 100644 --- a/borg/platform_base.py +++ b/borg/platform_base.py @@ -90,3 +90,11 @@ class SyncFile: self.sync() self.fd.close() sync_dir(os.path.dirname(self.fd.name)) + + +def swidth(s): + """terminal output width of string + + For western scripts, this is just len(s), but for cjk glyphs, 2 cells are used. + """ + return len(s) diff --git a/borg/platform_darwin.pyx b/borg/platform_darwin.pyx index 4dc25b83a..fde6ca0f5 100644 --- a/borg/platform_darwin.pyx +++ b/borg/platform_darwin.pyx @@ -3,6 +3,12 @@ from .helpers import user2uid, group2gid, safe_decode, safe_encode API_VERSION = 3 +cdef extern from "wchar.h": + cdef int wcswidth(const Py_UNICODE *str, size_t n) + +def swidth(s): + return wcswidth(s, len(s)) + cdef extern from "sys/acl.h": ctypedef struct _acl_t: pass diff --git a/borg/platform_freebsd.pyx b/borg/platform_freebsd.pyx index ae69af68a..0d02cd06c 100644 --- a/borg/platform_freebsd.pyx +++ b/borg/platform_freebsd.pyx @@ -7,6 +7,12 @@ cdef extern from "errno.h": int errno int EINVAL +cdef extern from "wchar.h": + cdef int wcswidth(const Py_UNICODE *str, size_t n) + +def swidth(s): + return wcswidth(s, len(s)) + cdef extern from "sys/types.h": int ACL_TYPE_ACCESS int ACL_TYPE_DEFAULT diff --git a/borg/platform_linux.pyx b/borg/platform_linux.pyx index 8eb2b56d9..9a509efdb 100644 --- a/borg/platform_linux.pyx +++ b/borg/platform_linux.pyx @@ -9,6 +9,12 @@ from libc cimport errno API_VERSION = 3 +cdef extern from "wchar.h": + cdef int wcswidth(const Py_UNICODE *str, size_t n) + +def swidth(s): + return wcswidth(s, len(s)) + cdef extern from "sys/types.h": int ACL_TYPE_ACCESS int ACL_TYPE_DEFAULT diff --git a/borg/testsuite/platform.py b/borg/testsuite/platform.py index feb0685c5..9eaede988 100644 --- a/borg/testsuite/platform.py +++ b/borg/testsuite/platform.py @@ -4,7 +4,7 @@ import sys import tempfile import unittest -from ..platform import acl_get, acl_set +from ..platform import acl_get, acl_set, swidth from . import BaseTestCase @@ -138,3 +138,16 @@ class PlatformDarwinTestCase(BaseTestCase): self.set_acl(file2.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=True) self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)[b'acl_extended']) self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)[b'acl_extended']) + + +@unittest.skipUnless(sys.platform.startswith(('linux', 'freebsd', 'darwin')), 'POSIX only tests') +class PlatformPosixTestCase(BaseTestCase): + + def test_swidth_ascii(self): + self.assert_equal(swidth("borg"), 4) + + def test_swidth_cjk(self): + self.assert_equal(swidth("バックアップ"), 6 * 2) + + def test_swidth_mixed(self): + self.assert_equal(swidth("borgバックアップ"), 4 + 6 * 2) From d7500a119172a74aadd1ff46e936d5b71add719b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 18 May 2016 16:08:27 +0200 Subject: [PATCH 22/38] create new platform_posix module move common posix cython code there. --- borg/platform_darwin.pyx | 7 +------ borg/platform_freebsd.pyx | 7 +------ borg/platform_linux.pyx | 8 ++------ borg/platform_posix.pyx | 5 +++++ setup.py | 8 +++++++- 5 files changed, 16 insertions(+), 19 deletions(-) create mode 100644 borg/platform_posix.pyx diff --git a/borg/platform_darwin.pyx b/borg/platform_darwin.pyx index fde6ca0f5..fb94d9c84 100644 --- a/borg/platform_darwin.pyx +++ b/borg/platform_darwin.pyx @@ -1,14 +1,9 @@ import os from .helpers import user2uid, group2gid, safe_decode, safe_encode +from .platform_posix import swidth API_VERSION = 3 -cdef extern from "wchar.h": - cdef int wcswidth(const Py_UNICODE *str, size_t n) - -def swidth(s): - return wcswidth(s, len(s)) - cdef extern from "sys/acl.h": ctypedef struct _acl_t: pass diff --git a/borg/platform_freebsd.pyx b/borg/platform_freebsd.pyx index 0d02cd06c..eae730f49 100644 --- a/borg/platform_freebsd.pyx +++ b/borg/platform_freebsd.pyx @@ -1,5 +1,6 @@ import os from .helpers import posix_acl_use_stored_uid_gid, safe_encode, safe_decode +from .platform_posix import swidth API_VERSION = 3 @@ -7,12 +8,6 @@ cdef extern from "errno.h": int errno int EINVAL -cdef extern from "wchar.h": - cdef int wcswidth(const Py_UNICODE *str, size_t n) - -def swidth(s): - return wcswidth(s, len(s)) - cdef extern from "sys/types.h": int ACL_TYPE_ACCESS int ACL_TYPE_DEFAULT diff --git a/borg/platform_linux.pyx b/borg/platform_linux.pyx index 9a509efdb..142185f3b 100644 --- a/borg/platform_linux.pyx +++ b/borg/platform_linux.pyx @@ -5,16 +5,12 @@ import stat from .helpers import posix_acl_use_stored_uid_gid, user2uid, group2gid, safe_decode, safe_encode from .platform_base import SyncFile as BaseSyncFile +from .platform_posix import swidth + from libc cimport errno API_VERSION = 3 -cdef extern from "wchar.h": - cdef int wcswidth(const Py_UNICODE *str, size_t n) - -def swidth(s): - return wcswidth(s, len(s)) - cdef extern from "sys/types.h": int ACL_TYPE_ACCESS int ACL_TYPE_DEFAULT diff --git a/borg/platform_posix.pyx b/borg/platform_posix.pyx new file mode 100644 index 000000000..f2a8e1773 --- /dev/null +++ b/borg/platform_posix.pyx @@ -0,0 +1,5 @@ +cdef extern from "wchar.h": + cdef int wcswidth(const Py_UNICODE *str, size_t n) + +def swidth(s): + return wcswidth(s, len(s)) diff --git a/setup.py b/setup.py index 741070cc5..a11de388b 100644 --- a/setup.py +++ b/setup.py @@ -40,6 +40,7 @@ compress_source = 'borg/compress.pyx' crypto_source = 'borg/crypto.pyx' chunker_source = 'borg/chunker.pyx' hashindex_source = 'borg/hashindex.pyx' +platform_posix_source = 'borg/platform_posix.pyx' platform_linux_source = 'borg/platform_linux.pyx' platform_darwin_source = 'borg/platform_darwin.pyx' platform_freebsd_source = 'borg/platform_freebsd.pyx' @@ -60,6 +61,7 @@ try: 'borg/crypto.c', 'borg/chunker.c', 'borg/_chunker.c', 'borg/hashindex.c', 'borg/_hashindex.c', + 'borg/platform_posix.c', 'borg/platform_linux.c', 'borg/platform_freebsd.c', 'borg/platform_darwin.c', @@ -75,13 +77,14 @@ except ImportError: crypto_source = crypto_source.replace('.pyx', '.c') chunker_source = chunker_source.replace('.pyx', '.c') hashindex_source = hashindex_source.replace('.pyx', '.c') + platform_posix_source = platform_posix_source.replace('.pyx', '.c') platform_linux_source = platform_linux_source.replace('.pyx', '.c') platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c') platform_darwin_source = platform_darwin_source.replace('.pyx', '.c') from distutils.command.build_ext import build_ext if not on_rtd and not all(os.path.exists(path) for path in [ compress_source, crypto_source, chunker_source, hashindex_source, - platform_linux_source, platform_freebsd_source]): + platform_posix_source, platform_linux_source, platform_freebsd_source]): raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version.') @@ -286,6 +289,9 @@ if not on_rtd: Extension('borg.chunker', [chunker_source]), Extension('borg.hashindex', [hashindex_source]) ] + if sys.platform.startswith(('linux', 'freebsd', 'darwin')): + ext_modules.append(Extension('borg.platform_posix', [platform_posix_source])) + if sys.platform == 'linux': ext_modules.append(Extension('borg.platform_linux', [platform_linux_source], libraries=['acl'])) elif sys.platform.startswith('freebsd'): From 596a276a264e93687aa41783acb45536a0bebb32 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 18 May 2016 18:07:10 +0200 Subject: [PATCH 23/38] setup.py: add check for platform_darwin.c this was just forgotten... --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ff9899da5..e7abc79e0 100644 --- a/setup.py +++ b/setup.py @@ -79,7 +79,7 @@ except ImportError: from distutils.command.build_ext import build_ext if not on_rtd and not all(os.path.exists(path) for path in [ compress_source, crypto_source, chunker_source, hashindex_source, - platform_linux_source, platform_freebsd_source]): + platform_linux_source, platform_freebsd_source, platform_darwin_source]): raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version.') From bf00eba1c1d2cbdcc5d512b5d9e17dc80f4189a2 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 18 May 2016 18:32:29 +0200 Subject: [PATCH 24/38] use swidth when doing terminal output width computations, fixes #1051 cjk glyphs occupy 2 cells' width on the terminal. --- borg/helpers.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index 7e783f3eb..9582564a1 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -32,6 +32,7 @@ from . import chunker from .constants import * # NOQA from . import crypto from .compress import COMPR_BUFFER, get_compressor +from .platform import swidth from . import shellpattern import msgpack import msgpack.fallback @@ -214,9 +215,9 @@ class Statistics: if not final: msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self) path = remove_surrogates(item[b'path']) if item else '' - space = columns - len(msg) - if space < len('...') + len(path): - path = '%s...%s' % (path[:(space // 2) - len('...')], path[-space // 2:]) + space = columns - swidth(msg) + if space < swidth('...') + swidth(path): + path = '%s...%s' % (path[:(space // 2) - swidth('...')], path[-space // 2:]) msg += "{0:<{space}}".format(path, space=space) else: msg = ' ' * columns From afb9dc9d7a44e8ed34a755bbf78aabc85bf59d17 Mon Sep 17 00:00:00 2001 From: Ed Blackman Date: Tue, 17 May 2016 22:59:58 -0400 Subject: [PATCH 25/38] Update logging parser to allow remotes to pass logger name Parser now understands both old format messages (to keep talking to old server) and new format messages that pass a logger name. If logger name is passed, the message is directed to the same logger locally. This could be cherry-picked to 1.x-maint (and 0.x-maint?) to allow point releases to understand borg 1.1 server messages changed in the next commit. Worst case, currently existing 0.x and 1.0.x clients talking to a 1.1.x server will see messages like: borg.repository Remote: hi borg.archiver Remote: foo instead of Remote: hi Remote: foo --- borg/logger.py | 2 +- borg/remote.py | 8 +++- borg/testsuite/repository.py | 84 +++++++++++++++++++++++++++++++++++- 3 files changed, 91 insertions(+), 3 deletions(-) diff --git a/borg/logger.py b/borg/logger.py index f2350f8d7..c75aaef75 100644 --- a/borg/logger.py +++ b/borg/logger.py @@ -88,7 +88,7 @@ def setup_logging(stream=None, conf_fname=None, env_var='BORG_LOGGING_CONF', lev logger = logging.getLogger('') handler = logging.StreamHandler(stream) if is_serve: - fmt = '$LOG %(levelname)s Remote: %(message)s' + fmt = '$LOG %(levelname)s %(name)s Remote: %(message)s' else: fmt = '%(message)s' handler.setFormatter(logging.Formatter(fmt)) diff --git a/borg/remote.py b/borg/remote.py index 5444f05bf..72c26c5e1 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -301,7 +301,13 @@ class RemoteRepository: if line.startswith('$LOG '): _, level, msg = line.split(' ', 2) level = getattr(logging, level, logging.CRITICAL) # str -> int - logging.log(level, msg.rstrip()) + if msg.startswith('Remote:'): + # server format: '$LOG Remote: ' + logging.log(level, msg.rstrip()) + else: + # server format '$LOG Remote: ' + logname, msg = msg.split(' ', 1) + logging.getLogger(logname).log(level, msg.rstrip()) else: sys.stderr.write("Remote: " + line) if w: diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index aa935e322..2695a5cbe 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -1,3 +1,5 @@ +import io +import logging import os import shutil import sys @@ -7,7 +9,7 @@ from unittest.mock import patch from ..hashindex import NSIndex from ..helpers import Location, IntegrityError from ..locking import UpgradableLock, LockFailed -from ..remote import RemoteRepository, InvalidRPCMethod +from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint from ..repository import Repository from . import BaseTestCase @@ -389,3 +391,83 @@ class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase): def test_crash_before_compact(self): # skip this test, we can't mock-patch a Repository class in another process! pass + + +class RemoteRepositoryLoggingStub(RemoteRepository): + """ run a remote command that just prints a logging-formatted message to + stderr, and stub out enough of RemoteRepository to avoid the resulting + exceptions """ + def __init__(self, *args, **kw): + self.msg = kw.pop('msg') + super().__init__(*args, **kw) + + def borg_cmd(self, cmd, testing): + return [sys.executable, '-c', 'import sys; print("{}", file=sys.stderr)'.format(self.msg), ] + + def __del__(self): + # clean up from exception without triggering assert + if self.p: + self.close() + + +class RemoteRepositoryLoggerTestCase(RepositoryTestCaseBase): + def setUp(self): + self.location = Location('__testsuite__:/doesntexist/repo') + self.stream = io.StringIO() + self.handler = logging.StreamHandler(self.stream) + logging.getLogger().handlers[:] = [self.handler] + logging.getLogger('borg.repository').handlers[:] = [] + logging.getLogger('borg.repository.foo').handlers[:] = [] + + def tearDown(self): + pass + + def create_repository(self, msg): + try: + RemoteRepositoryLoggingStub(self.location, msg=msg) + except ConnectionClosedWithHint: + # stub is dumb, so this exception expected + pass + + def test_old_format_messages(self): + self.handler.setLevel(logging.DEBUG) + logging.getLogger().setLevel(logging.DEBUG) + + self.create_repository("$LOG INFO Remote: old format message") + self.assert_equal(self.stream.getvalue(), 'Remote: old format message\n') + + def test_new_format_messages(self): + self.handler.setLevel(logging.DEBUG) + logging.getLogger().setLevel(logging.DEBUG) + + self.create_repository("$LOG INFO borg.repository Remote: new format message") + self.assert_equal(self.stream.getvalue(), 'Remote: new format message\n') + + def test_remote_messages_screened(self): + # default borg config for root logger + self.handler.setLevel(logging.WARNING) + logging.getLogger().setLevel(logging.WARNING) + + self.create_repository("$LOG INFO borg.repository Remote: new format info message") + self.assert_equal(self.stream.getvalue(), '') + + def test_info_to_correct_local_child(self): + logging.getLogger('borg.repository').setLevel(logging.INFO) + logging.getLogger('borg.repository.foo').setLevel(logging.INFO) + # default borg config for root logger + self.handler.setLevel(logging.WARNING) + logging.getLogger().setLevel(logging.WARNING) + + child_stream = io.StringIO() + child_handler = logging.StreamHandler(child_stream) + child_handler.setLevel(logging.INFO) + logging.getLogger('borg.repository').handlers[:] = [child_handler] + foo_stream = io.StringIO() + foo_handler = logging.StreamHandler(foo_stream) + foo_handler.setLevel(logging.INFO) + logging.getLogger('borg.repository.foo').handlers[:] = [foo_handler] + + self.create_repository("$LOG INFO borg.repository Remote: new format child message") + self.assert_equal(foo_stream.getvalue(), '') + self.assert_equal(child_stream.getvalue(), 'Remote: new format child message\n') + self.assert_equal(self.stream.getvalue(), '') From 226e5519f33bfd5b14082808c6af72b24f0ee3a8 Mon Sep 17 00:00:00 2001 From: Ed Blackman Date: Tue, 17 May 2016 22:59:58 -0400 Subject: [PATCH 26/38] Print implied output without --info/-v There are persistent questions why output from options like --list and --stats doesn't show up. Also, borg currently isn't able to show *just* the output for a given option (--list, --stats, --show-rc, --show-version, or --progress), without other INFO level messages. The solution is to use more granular loggers, so that messages specific to a given option goes to a logger designated for that option. That option-specific logger can then be configured separately from the regular loggers. Those option-specific loggers can also be used as a hook in a BORG_LOGGING_CONF config file to log the --list output to a separate file, or send --stats output to a network socket where some daemon could analyze it. Steps: - create an option-specific logger for each of the implied output options - modify the messages specific to each option to go to the correct logger - if an implied output option is passed, change the option-specific logger (only) to log at INFO level - test that root logger messages don't come through option-specific loggers They shouldn't, per https://docs.python.org/3/howto/logging.html#logging-flow but test just the same. Particularly test a message that can come from remote repositories. Fixes #526, #573, #665, #824 --- borg/archiver.py | 47 ++++++++++++----- borg/helpers.py | 32 +++++++++--- borg/testsuite/archiver.py | 102 ++++++++++++++++++++++++++++++------- borg/testsuite/helpers.py | 26 ++++++++-- docs/changes.rst | 6 ++- docs/development.rst | 6 ++- docs/quickstart.rst | 11 ++-- docs/usage.rst | 17 +++---- 8 files changed, 187 insertions(+), 60 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index bd45da2ba..59d7e3336 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -8,6 +8,7 @@ import functools import hashlib import inspect import io +import logging import os import re import shlex @@ -111,7 +112,7 @@ class Archiver: def print_file_status(self, status, path): if self.output_list and (self.output_filter is None or status in self.output_filter): - logger.info("%1s %s", status, remove_surrogates(path)) + logging.getLogger('borg.output.list').info("%1s %s", status, remove_surrogates(path)) @staticmethod def compare_chunk_contents(chunks1, chunks2): @@ -277,7 +278,7 @@ class Archiver: DASHES, str(archive.stats), str(cache), - DASHES) + DASHES, logger=logging.getLogger('borg.output.stats')) self.output_filter = args.output_filter self.output_list = args.output_list @@ -416,7 +417,7 @@ class Archiver: while dirs and not item[b'path'].startswith(dirs[-1][b'path']): archive.extract_item(dirs.pop(-1), stdout=stdout) if output_list: - logger.info(remove_surrogates(orig_path)) + logging.getLogger('borg.output.list').info(remove_surrogates(orig_path)) try: if dry_run: archive.extract_item(item, dry_run=True) @@ -673,7 +674,7 @@ class Archiver: log_multi(DASHES, stats.summary.format(label='Deleted data:', stats=stats), str(cache), - DASHES) + DASHES, logger=logging.getLogger('borg.output.stats')) else: if not args.cache_only: msg = [] @@ -820,18 +821,19 @@ class Archiver: to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints)) stats = Statistics() with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache: + list_logger = logging.getLogger('borg.output.list') for archive in archives_checkpoints: if archive in to_delete: if args.dry_run: if args.output_list: - logger.info('Would prune: %s' % format_archive(archive)) + list_logger.info('Would prune: %s' % format_archive(archive)) else: if args.output_list: - logger.info('Pruning archive: %s' % format_archive(archive)) + list_logger.info('Pruning archive: %s' % format_archive(archive)) Archive(repository, key, manifest, archive.name, cache).delete(stats) else: if args.output_list: - logger.info('Keeping archive: %s' % format_archive(archive)) + list_logger.info('Keeping archive: %s' % format_archive(archive)) if to_delete and not args.dry_run: manifest.write() repository.commit(save_space=args.save_space) @@ -840,7 +842,7 @@ class Archiver: log_multi(DASHES, stats.summary.format(label='Deleted data:', stats=stats), str(cache), - DASHES) + DASHES, logger=logging.getLogger('borg.output.stats')) return self.exit_code def do_upgrade(self, args): @@ -1299,6 +1301,9 @@ class Archiver: help='only check last N archives (Default: all)') subparser.add_argument('-P', '--prefix', dest='prefix', type=str, help='only consider archive names starting with this prefix') + subparser.add_argument('-p', '--progress', dest='progress', + action='store_true', default=False, + help="""show progress display while checking""") change_passphrase_epilog = textwrap.dedent(""" The key files used for repository encryption are optionally passphrase @@ -2065,12 +2070,27 @@ class Archiver: check_extension_modules() selftest(logger) + def _setup_implied_logging(self, args): + """ turn on INFO level logging for args that imply that they will produce output """ + # map of option name to name of logger for that option + option_logger = { + 'output_list': 'borg.output.list', + 'show_version': 'borg.output.show-version', + 'show_rc': 'borg.output.show-rc', + 'stats': 'borg.output.stats', + 'progress': 'borg.output.progress', + } + for option, logger_name in option_logger.items(): + if args.get(option, False): + logging.getLogger(logger_name).setLevel('INFO') + def run(self, args): os.umask(args.umask) # early, before opening files self.lock_wait = args.lock_wait setup_logging(level=args.log_level, is_serve=args.func == self.do_serve) # do not use loggers before this! + self._setup_implied_logging(vars(args)) if args.show_version: - logger.info('borgbackup version %s' % __version__) + logging.getLogger('borg.output.show-version').info('borgbackup version %s' % __version__) self.prerun_checks(logger) if is_slow_msgpack(): logger.warning("Using a pure-python msgpack! This will result in lower performance.") @@ -2142,15 +2162,16 @@ def main(): # pragma: no cover if msg: logger.error(msg) if args.show_rc: + rc_logger = logging.getLogger('borg.output.show-rc') exit_msg = 'terminating with %s status, rc %d' if exit_code == EXIT_SUCCESS: - logger.info(exit_msg % ('success', exit_code)) + rc_logger.info(exit_msg % ('success', exit_code)) elif exit_code == EXIT_WARNING: - logger.warning(exit_msg % ('warning', exit_code)) + rc_logger.warning(exit_msg % ('warning', exit_code)) elif exit_code == EXIT_ERROR: - logger.error(exit_msg % ('error', exit_code)) + rc_logger.error(exit_msg % ('error', exit_code)) else: - logger.error(exit_msg % ('abnormal', exit_code or 666)) + rc_logger.error(exit_msg % ('abnormal', exit_code or 666)) sys.exit(exit_code) diff --git a/borg/helpers.py b/borg/helpers.py index 7e783f3eb..d6fa4aba3 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -1037,7 +1037,7 @@ def yes(msg=None, false_msg=None, true_msg=None, default_msg=None, class ProgressIndicatorPercent: - def __init__(self, total, step=5, start=0, same_line=False, msg="%3.0f%%", file=None): + def __init__(self, total, step=5, start=0, same_line=False, msg="%3.0f%%"): """ Percentage-based progress indicator @@ -1046,17 +1046,33 @@ class ProgressIndicatorPercent: :param start: at which percent value to start :param same_line: if True, emit output always on same line :param msg: output message, must contain one %f placeholder for the percentage - :param file: output file, default: sys.stderr """ self.counter = 0 # 0 .. (total-1) self.total = total self.trigger_at = start # output next percentage value when reaching (at least) this self.step = step - if file is None: - file = sys.stderr - self.file = file self.msg = msg self.same_line = same_line + self.handler = None + self.logger = logging.getLogger('borg.output.progress') + + # If there are no handlers, set one up explicitly because the + # terminator and propagation needs to be set. If there are, + # they must have been set up by BORG_LOGGING_CONF: skip setup. + if not self.logger.handlers: + self.handler = logging.StreamHandler(stream=sys.stderr) + self.handler.setLevel(logging.INFO) + self.handler.terminator = '\r' if self.same_line else '\n' + + self.logger.addHandler(self.handler) + if self.logger.level == logging.NOTSET: + self.logger.setLevel(logging.WARN) + self.logger.propagate = False + + def __del__(self): + if self.handler is not None: + self.logger.removeHandler(self.handler) + self.handler.close() def progress(self, current=None): if current is not None: @@ -1073,11 +1089,11 @@ class ProgressIndicatorPercent: return self.output(pct) def output(self, percent): - print(self.msg % percent, file=self.file, end='\r' if self.same_line else '\n', flush=True) + self.logger.info(self.msg % percent) def finish(self): if self.same_line: - print(" " * len(self.msg % 100.0), file=self.file, end='\r') + self.logger.info(" " * len(self.msg % 100.0)) class ProgressIndicatorEndless: @@ -1127,7 +1143,7 @@ def sysinfo(): return '\n'.join(info) -def log_multi(*msgs, level=logging.INFO): +def log_multi(*msgs, level=logging.INFO, logger=logger): """ log multiple lines of text, each line by a separate logging call for cosmetic reasons diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 75ee0c04f..5b8cf95af 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -3,6 +3,7 @@ import errno import os import inspect from io import StringIO +import logging import random import stat import subprocess @@ -297,9 +298,14 @@ class ArchiverTestCaseBase(BaseTestCase): class ArchiverTestCase(ArchiverTestCaseBase): def test_basic_functionality(self): have_root = self.create_test_files() - self.cmd('init', self.repository_location) + # fork required to test show-rc output + output = self.cmd('init', '--show-version', '--show-rc', self.repository_location, fork=True) + self.assert_in('borgbackup version', output) + self.assert_in('terminating with success status, rc 0', output) self.cmd('create', self.repository_location + '::test', 'input') - self.cmd('create', '--stats', self.repository_location + '::test.2', 'input') + output = self.cmd('create', '--stats', self.repository_location + '::test.2', 'input') + self.assert_in('Archive name: test.2', output) + self.assert_in('This archive: ', output) with changedir('output'): self.cmd('extract', self.repository_location + '::test') list_output = self.cmd('list', '--short', self.repository_location) @@ -645,6 +651,31 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd("extract", self.repository_location + "::test", "fm:input/file1", "fm:*file33*", "input/file2") self.assert_equal(sorted(os.listdir("output/input")), ["file1", "file2", "file333"]) + def test_extract_list_output(self): + self.cmd('init', self.repository_location) + self.create_regular_file('file', size=1024 * 80) + + self.cmd('create', self.repository_location + '::test', 'input') + + with changedir('output'): + output = self.cmd('extract', self.repository_location + '::test') + self.assert_not_in("input/file", output) + shutil.rmtree('output/input') + + with changedir('output'): + output = self.cmd('extract', '--info', self.repository_location + '::test') + self.assert_not_in("input/file", output) + shutil.rmtree('output/input') + + with changedir('output'): + output = self.cmd('extract', '--list', self.repository_location + '::test') + self.assert_in("input/file", output) + shutil.rmtree('output/input') + + with changedir('output'): + output = self.cmd('extract', '--list', '--info', self.repository_location + '::test') + self.assert_in("input/file", output) + def _create_test_caches(self): self.cmd('init', self.repository_location) self.create_regular_file('file1', size=1024 * 80) @@ -857,7 +888,8 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('extract', '--dry-run', self.repository_location + '::test.2') self.cmd('delete', self.repository_location + '::test') self.cmd('extract', '--dry-run', self.repository_location + '::test.2') - self.cmd('delete', '--stats', self.repository_location + '::test.2') + output = self.cmd('delete', '--stats', self.repository_location + '::test.2') + self.assert_in('Deleted data:', output) # Make sure all data except the manifest has been deleted with Repository(self.repository_path) as repository: self.assert_equal(len(repository), 1) @@ -880,12 +912,16 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('init', self.repository_location) self.create_src_archive('test') self.cmd('extract', '--dry-run', self.repository_location + '::test') - self.cmd('check', self.repository_location) + output = self.cmd('check', '--show-version', self.repository_location) + self.assert_in('borgbackup version', output) # implied output even without --info given + self.assert_not_in('Starting repository check', output) # --info not given for root logger + name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0] with open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+b') as fd: fd.seek(100) fd.write(b'XXXX') - self.cmd('check', self.repository_location, exit_code=1) + output = self.cmd('check', '--info', self.repository_location, exit_code=1) + self.assert_in('Starting repository check', output) # --info given for root logger # we currently need to be able to create a lock directory inside the repo: @pytest.mark.xfail(reason="we need to be able to create the lock directory inside the repo") @@ -933,11 +969,11 @@ class ArchiverTestCase(ArchiverTestCaseBase): os.utime('input/file1', (now - 5, now - 5)) # 5 seconds ago self.create_regular_file('file2', size=1024 * 80) self.cmd('init', self.repository_location) - output = self.cmd('create', '-v', '--list', self.repository_location + '::test', 'input') + output = self.cmd('create', '--list', self.repository_location + '::test', 'input') self.assert_in("A input/file1", output) self.assert_in("A input/file2", output) # should find first file as unmodified - output = self.cmd('create', '-v', '--list', self.repository_location + '::test1', 'input') + output = self.cmd('create', '--list', self.repository_location + '::test1', 'input') self.assert_in("U input/file1", output) # this is expected, although surprising, for why, see: # https://borgbackup.readthedocs.org/en/latest/faq.html#i-am-seeing-a-added-status-for-a-unchanged-file @@ -951,11 +987,11 @@ class ArchiverTestCase(ArchiverTestCaseBase): os.utime('input/file1', (now - 5, now - 5)) # 5 seconds ago self.create_regular_file('file2', size=1024 * 80) self.cmd('init', self.repository_location) - output = self.cmd('create', '-v', '--list', self.repository_location + '::test', 'input') + output = self.cmd('create', '--list', self.repository_location + '::test', 'input') self.assert_in("A input/file1", output) self.assert_in("A input/file2", output) # should find second file as excluded - output = self.cmd('create', '-v', '--list', self.repository_location + '::test1', 'input', '--exclude', '*/file2') + output = self.cmd('create', '--list', self.repository_location + '::test1', 'input', '--exclude', '*/file2') self.assert_in("U input/file1", output) self.assert_in("x input/file2", output) @@ -972,15 +1008,15 @@ class ArchiverTestCase(ArchiverTestCaseBase): output = self.cmd('create', self.repository_location + '::test0', 'input') self.assert_not_in('file1', output) # should list the file as unchanged - output = self.cmd('create', '-v', '--list', '--filter=U', self.repository_location + '::test1', 'input') + output = self.cmd('create', '--list', '--filter=U', self.repository_location + '::test1', 'input') self.assert_in('file1', output) # should *not* list the file as changed - output = self.cmd('create', '-v', '--filter=AM', self.repository_location + '::test2', 'input') + output = self.cmd('create', '--list', '--filter=AM', self.repository_location + '::test2', 'input') self.assert_not_in('file1', output) # change the file self.create_regular_file('file1', size=1024 * 100) # should list the file as changed - output = self.cmd('create', '-v', '--list', '--filter=AM', self.repository_location + '::test3', 'input') + output = self.cmd('create', '--list', '--filter=AM', self.repository_location + '::test3', 'input') self.assert_in('file1', output) # def test_cmdline_compatibility(self): @@ -998,7 +1034,8 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('create', self.repository_location + '::test3.checkpoint', src_dir) self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir) self.cmd('create', self.repository_location + '::test4.checkpoint', src_dir) - output = self.cmd('prune', '-v', '--list', '--dry-run', self.repository_location, '--keep-daily=2') + output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2') + self.assert_in('Keeping archive: test2', output) self.assert_in('Would prune: test1', output) # must keep the latest non-checkpoint archive: self.assert_in('Keeping archive: test2', output) @@ -1032,9 +1069,10 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('init', self.repository_location) self.cmd('create', self.repository_location + '::test1', src_dir) self.cmd('create', self.repository_location + '::test2', src_dir) - output = self.cmd('prune', '-v', '--list', '--dry-run', self.repository_location, '--keep-daily=2') + output = self.cmd('prune', '--list', '--stats', '--dry-run', self.repository_location, '--keep-daily=2') self.assert_in('Keeping archive: test2', output) self.assert_in('Would prune: test1', output) + self.assert_in('Deleted data:', output) output = self.cmd('list', self.repository_location) self.assert_in('test1', output) self.assert_in('test2', output) @@ -1049,7 +1087,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('create', self.repository_location + '::foo-2015-08-12-20:00', src_dir) self.cmd('create', self.repository_location + '::bar-2015-08-12-10:00', src_dir) self.cmd('create', self.repository_location + '::bar-2015-08-12-20:00', src_dir) - output = self.cmd('prune', '-v', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--prefix=foo-') + output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--prefix=foo-') self.assert_in('Keeping archive: foo-2015-08-12-20:00', output) self.assert_in('Would prune: foo-2015-08-12-10:00', output) output = self.cmd('list', self.repository_location) @@ -1401,7 +1439,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): if interrupt_early: process_files = 0 with patch.object(ArchiveRecreater, 'process_item', self._recreate_interrupt_patch(process_files)): - self.cmd('recreate', '-sv', '--list', self.repository_location, 'input/dir2') + self.cmd('recreate', self.repository_location, 'input/dir2') assert 'test.recreate' in self.cmd('list', self.repository_location) if change_args: with patch.object(sys, 'argv', sys.argv + ['non-forking tests don\'t use sys.argv']): @@ -1492,6 +1530,32 @@ class ArchiverTestCase(ArchiverTestCaseBase): cmd = 'python3', '-c', 'import os, sys; sys.exit(42 if os.path.exists("%s") else 23)' % lock_path self.cmd('with-lock', self.repository_location, *cmd, fork=True, exit_code=42) + def test_recreate_list_output(self): + self.cmd('init', self.repository_location) + self.create_regular_file('file1', size=0) + self.create_regular_file('file2', size=0) + self.create_regular_file('file3', size=0) + self.create_regular_file('file4', size=0) + self.create_regular_file('file5', size=0) + + self.cmd('create', self.repository_location + '::test', 'input') + + output = self.cmd('recreate', '--list', '--info', self.repository_location + '::test', '-e', 'input/file2') + self.assert_in("input/file1", output) + self.assert_in("x input/file2", output) + + output = self.cmd('recreate', '--list', self.repository_location + '::test', '-e', 'input/file3') + self.assert_in("input/file1", output) + self.assert_in("x input/file3", output) + + output = self.cmd('recreate', self.repository_location + '::test', '-e', 'input/file4') + self.assert_not_in("input/file1", output) + self.assert_not_in("x input/file4", output) + + output = self.cmd('recreate', '--info', self.repository_location + '::test', '-e', 'input/file5') + self.assert_not_in("input/file1", output) + self.assert_not_in("x input/file5", output) + @unittest.skipUnless('binary' in BORG_EXES, 'no borg.exe available') class ArchiverTestCaseBinary(ArchiverTestCase): @@ -1532,12 +1596,16 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): return archive, repository def test_check_usage(self): - output = self.cmd('check', '-v', self.repository_location, exit_code=0) + output = self.cmd('check', '-v', '--progress', self.repository_location, exit_code=0) self.assert_in('Starting repository check', output) self.assert_in('Starting archive consistency check', output) + self.assert_in('Checking segments', output) + # reset logging to new process default to avoid need for fork=True on next check + logging.getLogger('borg.output.progress').setLevel(logging.NOTSET) output = self.cmd('check', '-v', '--repository-only', self.repository_location, exit_code=0) self.assert_in('Starting repository check', output) self.assert_not_in('Starting archive consistency check', output) + self.assert_not_in('Checking segments', output) output = self.cmd('check', '-v', '--archives-only', self.repository_location, exit_code=0) self.assert_not_in('Starting repository check', output) self.assert_in('Starting archive consistency check', output) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index c86a0b3b2..beb67fc23 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -1,4 +1,6 @@ import hashlib +import io +import logging from time import mktime, strptime from datetime import datetime, timezone, timedelta from io import StringIO @@ -826,7 +828,7 @@ def test_yes_output(capfd): def test_progress_percentage_multiline(capfd): - pi = ProgressIndicatorPercent(1000, step=5, start=0, same_line=False, msg="%3.0f%%", file=sys.stderr) + pi = ProgressIndicatorPercent(1000, step=5, start=0, same_line=False, msg="%3.0f%%") pi.show(0) out, err = capfd.readouterr() assert err == ' 0%\n' @@ -842,13 +844,14 @@ def test_progress_percentage_multiline(capfd): def test_progress_percentage_sameline(capfd): - pi = ProgressIndicatorPercent(1000, step=5, start=0, same_line=True, msg="%3.0f%%", file=sys.stderr) + pi = ProgressIndicatorPercent(1000, step=5, start=0, same_line=True, msg="%3.0f%%") pi.show(0) out, err = capfd.readouterr() assert err == ' 0%\r' pi.show(420) + pi.show(680) out, err = capfd.readouterr() - assert err == ' 42%\r' + assert err == ' 42%\r 68%\r' pi.show(1000) out, err = capfd.readouterr() assert err == '100%\r' @@ -858,7 +861,7 @@ def test_progress_percentage_sameline(capfd): def test_progress_percentage_step(capfd): - pi = ProgressIndicatorPercent(100, step=2, start=0, same_line=False, msg="%3.0f%%", file=sys.stderr) + pi = ProgressIndicatorPercent(100, step=2, start=0, same_line=False, msg="%3.0f%%") pi.show() out, err = capfd.readouterr() assert err == ' 0%\n' @@ -870,6 +873,21 @@ def test_progress_percentage_step(capfd): assert err == ' 2%\n' +def test_progress_percentage_quiet(capfd): + logging.getLogger('borg.output.progress').setLevel(logging.WARN) + + pi = ProgressIndicatorPercent(1000, step=5, start=0, same_line=False, msg="%3.0f%%") + pi.show(0) + out, err = capfd.readouterr() + assert err == '' + pi.show(1000) + out, err = capfd.readouterr() + assert err == '' + pi.finish() + out, err = capfd.readouterr() + assert err == '' + + def test_progress_endless(capfd): pi = ProgressIndicatorEndless(step=1, file=sys.stderr) pi.show() diff --git a/docs/changes.rst b/docs/changes.rst index 1e6d1a6f1..90c9f1eb8 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -6,6 +6,10 @@ Version 1.1.0 (not released yet) New features: +- borg check: will not produce the "Checking segments" output unless + new --progress option is passed, #824. +- options that imply output (--show-rc, --show-version, --list, --stats, + --progress) don't need -v/--info to have that output displayed, #865 - borg recreate: re-create existing archives, #787 #686 #630 #70, also see #757, #770. @@ -34,7 +38,7 @@ New features: - borg comment: add archive comments, #842 - provide "borgfs" wrapper for borg mount, enables usage via fstab, #743 - create: add 'x' status for excluded paths, #814 -- --show-version: shows/logs the borg version (use -v), #725 +- --show-version: shows/logs the borg version, #725 - borg list/prune/delete: also output archive id, #731 Bug fixes: diff --git a/docs/development.rst b/docs/development.rst index 7d6323dce..b94554462 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -27,8 +27,10 @@ errors, critical for critical errors/states). When directly talking to the user (e.g. Y/N questions), do not use logging, but directly output to stderr (not: stdout, it could be connected to a pipe). -To control the amount and kinds of messages output to stderr or emitted at -info level, use flags like ``--stats`` or ``--list``. +To control the amount and kinds of messages output emitted at info level, use +flags like ``--stats`` or ``--list``, then create a topic logger for messages +controlled by that flag. See ``_setup_implied_logging()`` in +``borg/archiver.py`` for the entry point to topic logging. Building a development environment ---------------------------------- diff --git a/docs/quickstart.rst b/docs/quickstart.rst index c8456b85d..f61656a3b 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -46,7 +46,7 @@ A step by step example 3. The next day create a new archive called *Tuesday*:: - $ borg create -v --stats /path/to/repo::Tuesday ~/src ~/Documents + $ borg create --stats /path/to/repo::Tuesday ~/src ~/Documents This backup will be a lot quicker and a lot smaller since only new never before seen data is stored. The ``--stats`` option causes |project_name| to @@ -93,9 +93,10 @@ A step by step example .. Note:: Borg is quiet by default (it works on WARNING log level). - Add the ``-v`` (or ``--verbose`` or ``--info``) option to adjust the log - level to INFO and also use options like ``--progress`` or ``--list`` to - get progress reporting during command execution. + You can use options like ``--progress`` or ``--list`` to get specific + reports during command execution. You can also add the ``-v`` (or + ``--verbose`` or ``--info``) option to adjust the log level to INFO to + get other informational messages. Automating backups ------------------ @@ -114,7 +115,7 @@ certain number of old archives:: export BORG_PASSPHRASE=mysecret # Backup most important stuff: - borg create -v --stats -C lz4 ::`hostname`-`date +%Y-%m-%d` \ + borg create --stats -C lz4 ::`hostname`-`date +%Y-%m-%d` \ /etc \ /home \ /var \ diff --git a/docs/usage.rst b/docs/usage.rst index d559c0bb0..b19234c6d 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -16,7 +16,8 @@ Type of log output The log level of the builtin logging configuration defaults to WARNING. This is because we want |project_name| to be mostly silent and only output -warnings, errors and critical messages. +warnings, errors and critical messages, unless output has been requested +by supplying an option that implies output (eg, --list or --progress). Log levels: DEBUG < INFO < WARNING < ERROR < CRITICAL @@ -41,10 +42,6 @@ give different output on different log levels - it's just a possibility. .. warning:: Options --critical and --error are provided for completeness, their usage is not recommended as you might miss important information. -.. warning:: While some options (like ``--stats`` or ``--list``) will emit more - informational messages, you have to use INFO (or lower) log level to make - them show up in log output. Use ``-v`` or a logging configuration. - Return codes ~~~~~~~~~~~~ @@ -245,8 +242,8 @@ Examples # Backup ~/Documents into an archive named "my-documents" $ borg create /path/to/repo::my-documents ~/Documents - # same, but verbosely list all files as we process them - $ borg create -v --list /path/to/repo::my-documents ~/Documents + # same, but list all files as we process them + $ borg create --list /path/to/repo::my-documents ~/Documents # Backup ~/Documents and ~/src but exclude pyc files $ borg create /path/to/repo::my-files \ @@ -301,7 +298,7 @@ Examples $ borg extract /path/to/repo::my-files # Extract entire archive and list files while processing - $ borg extract -v --list /path/to/repo::my-files + $ borg extract --list /path/to/repo::my-files # Verify whether an archive could be successfully extracted, but do not write files to disk $ borg extract --dry-run /path/to/repo::my-files @@ -480,7 +477,7 @@ Examples Username: root Time (start): Mon, 2016-02-15 19:36:29 Time (end): Mon, 2016-02-15 19:39:26 - Command line: /usr/local/bin/borg create -v --list -C zlib,6 /path/to/repo::root-2016-02-15 / --one-file-system + Command line: /usr/local/bin/borg create --list -C zlib,6 /path/to/repo::root-2016-02-15 / --one-file-system Number of files: 38100 Original size Compressed size Deduplicated size @@ -655,7 +652,7 @@ Here are misc. notes about topics that are maybe not covered in enough detail in Item flags ~~~~~~~~~~ -``borg create -v --list`` outputs a verbose list of all files, directories and other +``borg create --list`` outputs a list of all files, directories and other file system items it considered (no matter whether they had content changes or not). For each item, it prefixes a single-letter flag that indicates type and/or status of the item. From a8e457f93521c9c5a917412d96415007a4977bfb Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 18 May 2016 23:59:47 +0200 Subject: [PATCH 27/38] move Statistics class to archive module, avoid cyclic import also move the tests accordingly. --- borg/archive.py | 57 +++++++++++++++++++++++++++++++++++++-- borg/archiver.py | 4 +-- borg/helpers.py | 53 ------------------------------------ borg/testsuite/archive.py | 52 ++++++++++++++++++++++++++++++++++- borg/testsuite/helpers.py | 50 +--------------------------------- 5 files changed, 109 insertions(+), 107 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 710b52fb8..e630202a6 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -9,6 +9,7 @@ from .key import key_factory from .remote import cache_if_remote import os +from shutil import get_terminal_size import socket import stat import sys @@ -19,13 +20,13 @@ from .compress import COMPR_BUFFER from .constants import * # NOQA from .helpers import Chunk, Error, uid2user, user2uid, gid2group, group2gid, \ parse_timestamp, to_localtime, format_time, format_timedelta, safe_encode, safe_decode, \ - Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \ + Manifest, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \ ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \ PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume, \ CompressionDecider1, CompressionDecider2, CompressionSpec, \ IntegrityError from .repository import Repository -from .platform import acl_get, acl_set, set_flags, get_flags +from .platform import acl_get, acl_set, set_flags, get_flags, swidth from .chunker import Chunker from .hashindex import ChunkIndex, ChunkIndexEntry from .cache import ChunkListEntry @@ -37,6 +38,58 @@ flags_normal = os.O_RDONLY | getattr(os, 'O_BINARY', 0) flags_noatime = flags_normal | getattr(os, 'O_NOATIME', 0) +class Statistics: + + def __init__(self): + self.osize = self.csize = self.usize = self.nfiles = 0 + self.last_progress = 0 # timestamp when last progress was shown + + def update(self, size, csize, unique): + self.osize += size + self.csize += csize + if unique: + self.usize += csize + + summary = """\ + Original size Compressed size Deduplicated size +{label:15} {stats.osize_fmt:>20s} {stats.csize_fmt:>20s} {stats.usize_fmt:>20s}""" + + def __str__(self): + return self.summary.format(stats=self, label='This archive:') + + def __repr__(self): + return "<{cls} object at {hash:#x} ({self.osize}, {self.csize}, {self.usize})>".format( + cls=type(self).__name__, hash=id(self), self=self) + + @property + def osize_fmt(self): + return format_file_size(self.osize) + + @property + def usize_fmt(self): + return format_file_size(self.usize) + + @property + def csize_fmt(self): + return format_file_size(self.csize) + + def show_progress(self, item=None, final=False, stream=None, dt=None): + now = time.time() + if dt is None or now - self.last_progress > dt: + self.last_progress = now + columns, lines = get_terminal_size() + if not final: + msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self) + path = remove_surrogates(item[b'path']) if item else '' + space = columns - swidth(msg) + if space < swidth('...') + swidth(path): + path = '%s...%s' % (path[:(space // 2) - swidth('...')], path[-space // 2:]) + msg += "{0:<{space}}".format(path, space=space) + else: + msg = ' ' * columns + print(msg, file=stream or sys.stderr, end="\r", flush=True) + + class DownloadPipeline: def __init__(self, repository, key): diff --git a/borg/archiver.py b/borg/archiver.py index bd45da2ba..91cf061c3 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -22,7 +22,7 @@ from . import __version__ from .helpers import Error, location_validator, archivename_validator, format_time, format_file_size, \ parse_pattern, PathPrefixPattern, to_localtime, timestamp, \ get_cache_dir, prune_within, prune_split, bin_to_hex, safe_encode, \ - Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ + Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, \ dir_is_tagged, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \ log_multi, PatternMatcher, ItemFormatter from .logger import create_logger, setup_logging @@ -34,7 +34,7 @@ from .repository import Repository from .cache import Cache from .constants import * # NOQA from .key import key_creator, RepoKey, PassphraseKey -from .archive import Archive, ArchiveChecker, ArchiveRecreater +from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics from .remote import RepositoryServer, RemoteRepository, cache_if_remote from .selftest import selftest from .hashindex import ChunkIndexEntry diff --git a/borg/helpers.py b/borg/helpers.py index 9582564a1..d2f9bab27 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -11,7 +11,6 @@ import stat import textwrap import pwd import re -from shutil import get_terminal_size import sys from string import Formatter import platform @@ -32,7 +31,6 @@ from . import chunker from .constants import * # NOQA from . import crypto from .compress import COMPR_BUFFER, get_compressor -from .platform import swidth from . import shellpattern import msgpack import msgpack.fallback @@ -173,57 +171,6 @@ def prune_split(archives, pattern, n, skip=[]): return keep -class Statistics: - - def __init__(self): - self.osize = self.csize = self.usize = self.nfiles = 0 - self.last_progress = 0 # timestamp when last progress was shown - - def update(self, size, csize, unique): - self.osize += size - self.csize += csize - if unique: - self.usize += csize - - summary = """\ - Original size Compressed size Deduplicated size -{label:15} {stats.osize_fmt:>20s} {stats.csize_fmt:>20s} {stats.usize_fmt:>20s}""" - - def __str__(self): - return self.summary.format(stats=self, label='This archive:') - - def __repr__(self): - return "<{cls} object at {hash:#x} ({self.osize}, {self.csize}, {self.usize})>".format(cls=type(self).__name__, hash=id(self), self=self) - - @property - def osize_fmt(self): - return format_file_size(self.osize) - - @property - def usize_fmt(self): - return format_file_size(self.usize) - - @property - def csize_fmt(self): - return format_file_size(self.csize) - - def show_progress(self, item=None, final=False, stream=None, dt=None): - now = time.time() - if dt is None or now - self.last_progress > dt: - self.last_progress = now - columns, lines = get_terminal_size() - if not final: - msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self) - path = remove_surrogates(item[b'path']) if item else '' - space = columns - swidth(msg) - if space < swidth('...') + swidth(path): - path = '%s...%s' % (path[:(space // 2) - swidth('...')], path[-space // 2:]) - msg += "{0:<{space}}".format(path, space=space) - else: - msg = ' ' * columns - print(msg, file=stream or sys.stderr, end="\r", flush=True) - - def get_home_dir(): """Get user's home directory while preferring a possibly set HOME environment variable diff --git a/borg/testsuite/archive.py b/borg/testsuite/archive.py index e9722cf22..2f276086b 100644 --- a/borg/testsuite/archive.py +++ b/borg/testsuite/archive.py @@ -1,14 +1,64 @@ +import os from datetime import datetime, timezone +from io import StringIO from unittest.mock import Mock +import pytest import msgpack -from ..archive import Archive, CacheChunkBuffer, RobustUnpacker +from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, Statistics from ..key import PlaintextKey from ..helpers import Manifest from . import BaseTestCase +@pytest.fixture() +def stats(): + stats = Statistics() + stats.update(20, 10, unique=True) + return stats + + +def test_stats_basic(stats): + assert stats.osize == 20 + assert stats.csize == stats.usize == 10 + stats.update(20, 10, unique=False) + assert stats.osize == 40 + assert stats.csize == 20 + assert stats.usize == 10 + + +def tests_stats_progress(stats, columns=80): + os.environ['COLUMNS'] = str(columns) + out = StringIO() + stats.show_progress(stream=out) + s = '20 B O 10 B C 10 B D 0 N ' + buf = ' ' * (columns - len(s)) + assert out.getvalue() == s + buf + "\r" + + out = StringIO() + stats.update(10**3, 0, unique=False) + stats.show_progress(item={b'path': 'foo'}, final=False, stream=out) + s = '1.02 kB O 10 B C 10 B D 0 N foo' + buf = ' ' * (columns - len(s)) + assert out.getvalue() == s + buf + "\r" + out = StringIO() + stats.show_progress(item={b'path': 'foo'*40}, final=False, stream=out) + s = '1.02 kB O 10 B C 10 B D 0 N foofoofoofoofoofoofoofo...oofoofoofoofoofoofoofoofoo' + buf = ' ' * (columns - len(s)) + assert out.getvalue() == s + buf + "\r" + + +def test_stats_format(stats): + assert str(stats) == """\ + Original size Compressed size Deduplicated size +This archive: 20 B 10 B 10 B""" + s = "{0.osize_fmt}".format(stats) + assert s == "20 B" + # kind of redundant, but id is variable so we can't match reliably + assert repr(stats) == ''.format(id(stats)) + + class MockCache: def __init__(self): diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index c86a0b3b2..09c17403b 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -1,7 +1,6 @@ import hashlib from time import mktime, strptime from datetime import datetime, timezone, timedelta -from io import StringIO import os import pytest @@ -11,7 +10,7 @@ import msgpack.fallback import time from ..helpers import Location, format_file_size, format_timedelta, make_path_safe, clean_lines, \ - prune_within, prune_split, get_cache_dir, get_keys_dir, Statistics, is_slow_msgpack, \ + prune_within, prune_split, get_cache_dir, get_keys_dir, is_slow_msgpack, \ yes, TRUISH, FALSISH, DEFAULTISH, \ StableDict, int_to_bigint, bigint_to_int, bin_to_hex, parse_timestamp, ChunkerParams, Chunk, \ ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \ @@ -629,53 +628,6 @@ def test_get_keys_dir(): os.environ['BORG_KEYS_DIR'] = old_env -@pytest.fixture() -def stats(): - stats = Statistics() - stats.update(20, 10, unique=True) - return stats - - -def test_stats_basic(stats): - assert stats.osize == 20 - assert stats.csize == stats.usize == 10 - stats.update(20, 10, unique=False) - assert stats.osize == 40 - assert stats.csize == 20 - assert stats.usize == 10 - - -def tests_stats_progress(stats, columns=80): - os.environ['COLUMNS'] = str(columns) - out = StringIO() - stats.show_progress(stream=out) - s = '20 B O 10 B C 10 B D 0 N ' - buf = ' ' * (columns - len(s)) - assert out.getvalue() == s + buf + "\r" - - out = StringIO() - stats.update(10**3, 0, unique=False) - stats.show_progress(item={b'path': 'foo'}, final=False, stream=out) - s = '1.02 kB O 10 B C 10 B D 0 N foo' - buf = ' ' * (columns - len(s)) - assert out.getvalue() == s + buf + "\r" - out = StringIO() - stats.show_progress(item={b'path': 'foo'*40}, final=False, stream=out) - s = '1.02 kB O 10 B C 10 B D 0 N foofoofoofoofoofoofoofo...oofoofoofoofoofoofoofoofoo' - buf = ' ' * (columns - len(s)) - assert out.getvalue() == s + buf + "\r" - - -def test_stats_format(stats): - assert str(stats) == """\ - Original size Compressed size Deduplicated size -This archive: 20 B 10 B 10 B""" - s = "{0.osize_fmt}".format(stats) - assert s == "20 B" - # kind of redundant, but id is variable so we can't match reliably - assert repr(stats) == ''.format(id(stats)) - - def test_file_size(): """test the size formatting routines""" si_size_map = { From 714e93bf6452a6027c1222d4911c897fb7a09a52 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Thu, 19 May 2016 01:11:51 +0200 Subject: [PATCH 28/38] Repository: fix commit tags being seen in data --- borg/repository.py | 27 +++++++++++++++++++++++---- borg/testsuite/repository.py | 11 +++++++++-- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/borg/repository.py b/borg/repository.py index 2c2ac8079..8c645eaac 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -536,7 +536,7 @@ class LoggedIO: """Verify that the transaction id is consistent with the index transaction id """ for segment, filename in self.segment_iterator(reverse=True): - if self.is_committed_segment(filename): + if self.is_committed_segment(segment): return segment return None @@ -550,10 +550,14 @@ class LoggedIO: else: break - def is_committed_segment(self, filename): + def is_committed_segment(self, segment): """Check if segment ends with a COMMIT_TAG tag """ - with open(filename, 'rb') as fd: + try: + iterator = self.iter_objects(segment) + except IntegrityError: + return False + with open(self.segment_filename(segment), 'rb') as fd: try: fd.seek(-self.header_fmt.size, os.SEEK_END) except OSError as e: @@ -561,7 +565,22 @@ class LoggedIO: if e.errno == errno.EINVAL: return False raise e - return fd.read(self.header_fmt.size) == self.COMMIT + if fd.read(self.header_fmt.size) != self.COMMIT: + return False + seen_commit = False + while True: + try: + tag, key, offset = next(iterator) + except IntegrityError: + return False + except StopIteration: + break + if tag == TAG_COMMIT: + seen_commit = True + continue + if seen_commit: + return False + return seen_commit def segment_filename(self, segment): return os.path.join(self.path, 'data', str(segment // self.segments_per_dir), str(segment)) diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index aa935e322..0f384380a 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -8,7 +8,7 @@ from ..hashindex import NSIndex from ..helpers import Location, IntegrityError from ..locking import UpgradableLock, LockFailed from ..remote import RemoteRepository, InvalidRPCMethod -from ..repository import Repository +from ..repository import Repository, LoggedIO, TAG_COMMIT from . import BaseTestCase @@ -192,6 +192,13 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): self.assert_equal(self.repository.check(), True) self.assert_equal(len(self.repository), 3) + def test_ignores_commit_tag_in_data(self): + self.repository.put(b'0' * 32, LoggedIO.COMMIT) + self.reopen() + with self.repository: + io = self.repository.io + assert not io.is_committed_segment(io.get_latest_segment()) + class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase): def test_destroy_append_only(self): @@ -268,7 +275,7 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase): return set(int(key) for key in self.repository.list()) def test_repair_corrupted_segment(self): - self.add_objects([[1, 2, 3], [4, 5, 6]]) + self.add_objects([[1, 2, 3], [4, 5], [6]]) self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects()) self.check(status=True) self.corrupt_object(5) From f7f95ae731e0b845758daf0a92f0aa033fcd6a5b Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Fri, 20 May 2016 02:01:07 +0200 Subject: [PATCH 29/38] RepositoryCache: don't cache large objects avoids excessive cache repository disk usage --- borg/remote.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/borg/remote.py b/borg/remote.py index 6848784f1..5662a8dbc 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -414,6 +414,9 @@ class RepositoryCache(RepositoryNoCache): Caches Repository GET operations using a local temporary Repository. """ + # maximum object size that will be cached, 64 kiB. + THRESHOLD = 2**16 + def __init__(self, repository): super().__init__(repository) tmppath = tempfile.mkdtemp(prefix='borg-tmp') @@ -434,7 +437,8 @@ class RepositoryCache(RepositoryNoCache): except Repository.ObjectNotFound: for key_, data in repository_iterator: if key_ == key: - self.caching_repo.put(key, data) + if len(data) <= self.THRESHOLD: + self.caching_repo.put(key, data) yield data break # Consume any pending requests From 1e061a2fa4dc0383a3a617a8145f076d665e6100 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 19 May 2016 17:52:02 +0200 Subject: [PATCH 30/38] update CHANGES --- docs/changes.rst | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index 7adabb903..8935e4994 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -6,17 +6,27 @@ Version 1.0.3 (not released yet) Bug fixes: -- prune: ignore checkpoints, #997 -- prune: fix bad validator, #942 +- prune: ignore checkpoints (avoid that checkpoints are kept and completed + archives are deleted in a prune run), #997 +- prune: fix bad commandline argument validator, #942 - fix capabilities extraction on Linux (set xattrs last, after chown()) +- repository: fix commit tags being seen in data +- when probing key files, do binary reads. avoids crash when non-borg binary + files are located in borg's key files directory. +- handle SIGTERM and make a clean exit - avoids orphan lock files. +- repository cache: don't cache large objects (avoid using lots of temp. disk + space), #1063 Other changes: -- update readthedocs URLs, #991 -- add missing docs for "borg break-lock", #992 -- borg create help: add some words to about the archive name -- borg create help: document format tags, #894 - Vagrantfile: OS X: update osxfuse / install lzma package, #933 +- setup.py: add check for platform_darwin.c +- docs / help: + + - update readthedocs URLs, #991 + - add missing docs for "borg break-lock", #992 + - borg create help: add some words to about the archive name + - borg create help: document format tags, #894 Version 1.0.2 From 8d8374d23c13b531f7c6618d114b137c884430f6 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 20 May 2016 14:34:48 +0200 Subject: [PATCH 31/38] ran build_api + build_usage --- docs/api.rst | 72 +++++++++++++++++++-------------------- docs/usage/create.rst.inc | 6 ++++ 2 files changed, 42 insertions(+), 36 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 7f56c4f77..628d21d13 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -2,31 +2,11 @@ API Documentation ================= -.. automodule:: borg.platform +.. automodule:: borg.archiver :members: :undoc-members: -.. automodule:: borg.shellpattern - :members: - :undoc-members: - -.. automodule:: borg.locking - :members: - :undoc-members: - -.. automodule:: borg.hash_sizes - :members: - :undoc-members: - -.. automodule:: borg.logger - :members: - :undoc-members: - -.. automodule:: borg.remote - :members: - :undoc-members: - -.. automodule:: borg.fuse +.. automodule:: borg.upgrader :members: :undoc-members: @@ -34,7 +14,23 @@ API Documentation :members: :undoc-members: -.. automodule:: borg.helpers +.. automodule:: borg.fuse + :members: + :undoc-members: + +.. automodule:: borg.platform + :members: + :undoc-members: + +.. automodule:: borg.locking + :members: + :undoc-members: + +.. automodule:: borg.shellpattern + :members: + :undoc-members: + +.. automodule:: borg.repository :members: :undoc-members: @@ -42,15 +38,19 @@ API Documentation :members: :undoc-members: +.. automodule:: borg.remote + :members: + :undoc-members: + +.. automodule:: borg.hash_sizes + :members: + :undoc-members: + .. automodule:: borg.xattr :members: :undoc-members: -.. automodule:: borg.archiver - :members: - :undoc-members: - -.. automodule:: borg.repository +.. automodule:: borg.helpers :members: :undoc-members: @@ -62,7 +62,7 @@ API Documentation :members: :undoc-members: -.. automodule:: borg.upgrader +.. automodule:: borg.logger :members: :undoc-members: @@ -70,15 +70,15 @@ API Documentation :members: :undoc-members: -.. automodule:: borg.compress - :members: - :undoc-members: - .. automodule:: borg.platform_linux :members: :undoc-members: -.. automodule:: borg.crypto +.. automodule:: borg.hashindex + :members: + :undoc-members: + +.. automodule:: borg.compress :members: :undoc-members: @@ -86,10 +86,10 @@ API Documentation :members: :undoc-members: -.. automodule:: borg.platform_freebsd +.. automodule:: borg.crypto :members: :undoc-members: -.. automodule:: borg.hashindex +.. automodule:: borg.platform_freebsd :members: :undoc-members: diff --git a/docs/usage/create.rst.inc b/docs/usage/create.rst.inc index ec9e7dc66..74c228b6c 100644 --- a/docs/usage/create.rst.inc +++ b/docs/usage/create.rst.inc @@ -82,6 +82,12 @@ This command creates a backup archive containing all files found while recursive traversing all paths specified. The archive will consume almost no disk space for files or parts of files that have already been stored in other archives. +The archive name needs to be unique. It must not end in '.checkpoint' or +'.checkpoint.N' (with N being a number), because these names are used for +checkpoints and treated in special ways. + +In the archive name, you may use the following format tags: +{now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid} To speed up pulling backups over sshfs and similar network file systems which do not provide correct inode information the --ignore-inode flag can be used. This From 5be71b506e87533ad122ab3251128b9a1478d6d7 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 20 May 2016 14:38:09 +0200 Subject: [PATCH 32/38] update CHANGES --- docs/changes.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index 8935e4994..23ff0bdad 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -1,15 +1,16 @@ Changelog ========= -Version 1.0.3 (not released yet) --------------------------------- +Version 1.0.3 +------------- Bug fixes: -- prune: ignore checkpoints (avoid that checkpoints are kept and completed - archives are deleted in a prune run), #997 -- prune: fix bad commandline argument validator, #942 -- fix capabilities extraction on Linux (set xattrs last, after chown()) +- prune: avoid that checkpoints are kept and completed archives are deleted in + a prune run), #997 +- prune: fix commandline argument validation - some valid command lines were + considered invalid (annoying, but harmless), #942 +- fix capabilities extraction on Linux (set xattrs last, after chown()), #1069 - repository: fix commit tags being seen in data - when probing key files, do binary reads. avoids crash when non-borg binary files are located in borg's key files directory. @@ -21,6 +22,7 @@ Other changes: - Vagrantfile: OS X: update osxfuse / install lzma package, #933 - setup.py: add check for platform_darwin.c +- setup.py: on freebsd, use a llfuse release that builds ok - docs / help: - update readthedocs URLs, #991 From 21a30269c2c8b967e0a01334f4540b762ce56255 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 20 May 2016 20:00:24 +0200 Subject: [PATCH 33/38] freebsd: use a older llfuse release that builds llfuse >0.41.1 <=1.0.0 does not build on freebsd. the issue is already fixed, but no new release yet. --- Vagrantfile | 4 ++-- setup.py | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index 59ce9f5bb..c9306b54d 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -202,8 +202,8 @@ def install_borg(boxname) rm -f borg/{chunker,crypto,compress,hashindex,platform_linux}.c rm -rf borg/__pycache__ borg/support/__pycache__ borg/testsuite/__pycache__ pip install -r requirements.d/development.txt - pip install -r requirements.d/fuse.txt - pip install -e . + # by using [fuse], setup.py can handle different fuse requirements: + pip install -e .[fuse] EOF end diff --git a/setup.py b/setup.py index e7abc79e0..94331086e 100644 --- a/setup.py +++ b/setup.py @@ -24,12 +24,18 @@ install_requires = ['msgpack-python>=0.4.6', ] extras_require = { # llfuse 0.40 (tested, proven, ok), needs FUSE version >= 2.8.0 # llfuse 0.41 (tested shortly, looks ok), needs FUSE version >= 2.8.0 + # llfuse 0.41.1 (tested shortly, looks ok), needs FUSE version >= 2.8.0 # llfuse 0.42 (tested shortly, looks ok), needs FUSE version >= 2.8.0 # llfuse 1.0 (tested shortly, looks ok), needs FUSE version >= 2.8.0 # llfuse 2.0 will break API 'fuse': ['llfuse<2.0', ], } +if sys.platform.startswith('freebsd'): + # while llfuse 1.0 is the latest llfuse release right now, + # llfuse 0.41.1 is the latest release that actually builds on freebsd: + extras_require['fuse'] = ['llfuse==0.41.1', ] + from setuptools import setup, Extension from setuptools.command.sdist import sdist From c6f59899057b8b496214d2ce65e3232e536eb7be Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 25 Apr 2016 05:57:30 +0200 Subject: [PATCH 34/38] Item implementation + tests --- borg/item.py | 129 +++++++++++++++++++++++++++++++++++++++++ borg/testsuite/item.py | 106 +++++++++++++++++++++++++++++++++ 2 files changed, 235 insertions(+) create mode 100644 borg/item.py create mode 100644 borg/testsuite/item.py diff --git a/borg/item.py b/borg/item.py new file mode 100644 index 000000000..ff0b71b55 --- /dev/null +++ b/borg/item.py @@ -0,0 +1,129 @@ +from .constants import ITEM_KEYS +from .helpers import safe_encode, safe_decode, StableDict + +# we want str keys for this code +ITEM_KEYS = set(key.decode() for key in ITEM_KEYS) + + +class Item: + """ + Item abstraction that deals with validation and the low-level details internally: + + - msgpack gives us a dict with bytes-typed keys - but we do not want to have the ugly + bytes-typed keys and the hard-to-type dict item access all over the place (like: item[b'keyname']), + so we define properties (and use it like: item.keyname) + - msgpack gives us byte-typed values for stuff that should be str, we need to decode/encode them here. + - we want to be safe against typos in keys and badly typed values, so we check them. + + Items are created either from msgpack unpacker output, from another dict or + built step-by-step by setting attributes. + + If an Item shall be serialized, give as_dict() method output to msgpack packer. + """ + + VALID_KEYS = ITEM_KEYS + + def __init__(self, data_dict=None, **kw): + if data_dict is None: + data = kw + elif not isinstance(data_dict, dict): + raise TypeError("data_dict must be dict") + else: + data = data_dict + # internally, we want an dict with only str-typed keys + _dict = {} + for k, v in data.items(): + if isinstance(k, bytes): + k = k.decode() + elif not isinstance(k, str): + raise TypeError("dict keys must be str or bytes, not %r" % k) + _dict[k] = v + unknown_keys = set(_dict) - self.VALID_KEYS + if unknown_keys: + raise ValueError("dict contains unknown keys %s" % ','.join(unknown_keys)) + self._dict = _dict + + def as_dict(self): + """return the internal dictionary""" + return self._dict # XXX use StableDict? + + def _check_key(self, key): + """make sure key is of type str and known""" + if not isinstance(key, str): + raise TypeError("key must be str") + if key not in self.VALID_KEYS: + raise ValueError("key '%s' unknown" % key) + return key + + def __contains__(self, key): + """do we have this key?""" + return self._check_key(key) in self._dict + + def get(self, key, default=None): + """get value for key, return default if key does not exist""" + return getattr(self, self._check_key(key), default) + + def _make_property(key, value_type, value_type_name=None, encode=None, decode=None): + """return a property that deals with self._dict[key]: + + - sets the value (checking type and optionally encoding it) + - gets the value (optionally decoding it) + - deletes the entry from the internal dict + - creates reasonable docstring and exceptions / exception messages + """ + assert isinstance(key, str) + if value_type_name is None: + value_type_name = value_type.__name__ + doc = "%s (%s)" % (key, value_type_name) + type_error_msg = "%s value must be %s" % (key, value_type_name) + attr_error_msg = "attribute %s not found" % key + + def _get(self): + try: + value = self._dict[key] + except KeyError: + raise AttributeError(attr_error_msg) from None + if decode is not None: + value = decode(value) + return value + + def _set(self, value): + if not isinstance(value, value_type): + raise TypeError(type_error_msg) + if encode is not None: + value = encode(value) + self._dict[key] = value + + def _del(self): + try: + del self._dict[key] + except KeyError: + raise AttributeError(attr_error_msg) from None + + return property(_get, _set, _del, doc=doc) + + # properties statically defined, so that IDEs can know their names: + + path = _make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + source = _make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + user = _make_property('user', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + group = _make_property('group', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + acl_access = _make_property('acl_access', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + acl_default = _make_property('acl_default', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + acl_extended = _make_property('acl_extended', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + acl_nfs4 = _make_property('acl_nfs4', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + + mode = _make_property('mode', int) + uid = _make_property('uid', int) + gid = _make_property('gid', int) + atime = _make_property('atime', int) + ctime = _make_property('ctime', int) + mtime = _make_property('mtime', int) + rdev = _make_property('rdev', int) + bsdflags = _make_property('bsdflags', int) + + hardlink_master = _make_property('hardlink_master', bool) + + chunks = _make_property('chunks', list) + + xattrs = _make_property('xattrs', StableDict) diff --git a/borg/testsuite/item.py b/borg/testsuite/item.py new file mode 100644 index 000000000..e41d6ae18 --- /dev/null +++ b/borg/testsuite/item.py @@ -0,0 +1,106 @@ +import pytest + +from ..item import Item +from ..helpers import StableDict + + +def test_item_empty(): + item = Item() + + assert item.as_dict() == {} + + assert 'path' not in item + with pytest.raises(ValueError): + 'invalid-key' in item + with pytest.raises(TypeError): + b'path' in item + with pytest.raises(TypeError): + 42 in item + + assert item.get('mode') is None + assert item.get('mode', 0o666) == 0o666 + with pytest.raises(ValueError): + item.get('invalid-key') + with pytest.raises(TypeError): + item.get(b'mode') + with pytest.raises(TypeError): + item.get(42) + + with pytest.raises(AttributeError): + item.path + + with pytest.raises(AttributeError): + del item.path + + +def test_item_from_dict(): + # does not matter whether we get str or bytes keys + item = Item({b'path': b'/a/b/c', b'mode': 0o666}) + assert item.path == '/a/b/c' + assert item.mode == 0o666 + assert 'path' in item + + item = Item({'path': b'/a/b/c', 'mode': 0o666}) + assert item.path == '/a/b/c' + assert item.mode == 0o666 + assert 'mode' in item + + +def test_item_from_kw(): + item = Item(path=b'/a/b/c', mode=0o666) + assert item.path == '/a/b/c' + assert item.mode == 0o666 + + +def test_item_int_property(): + item = Item() + item.mode = 0o666 + assert item.mode == 0o666 + assert item.as_dict() == {'mode': 0o666} + del item.mode + assert item.as_dict() == {} + with pytest.raises(TypeError): + item.mode = "invalid" + + +def test_item_se_str_property(): + # start simple + item = Item() + item.path = '/a/b/c' + assert item.path == '/a/b/c' + assert item.as_dict() == {'path': b'/a/b/c'} + del item.path + assert item.as_dict() == {} + with pytest.raises(TypeError): + item.path = 42 + + # non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut + item = Item({'path': b'/a/\xfc/c'}) + assert item.path == '/a/\udcfc/c' # getting a surrogate-escaped representation + assert item.as_dict() == {'path': b'/a/\xfc/c'} + del item.path + assert 'path' not in item + item.path = '/a/\udcfc/c' # setting using a surrogate-escaped representation + assert item.as_dict() == {'path': b'/a/\xfc/c'} + + +def test_item_list_property(): + item = Item() + item.chunks = [] + assert item.chunks == [] + item.chunks.append(0) + assert item.chunks == [0] + item.chunks.append(1) + assert item.chunks == [0, 1] + assert item.as_dict() == {'chunks': [0, 1]} + + +def test_item_dict_property(): + item = Item() + item.xattrs = StableDict() + assert item.xattrs == StableDict() + item.xattrs['foo'] = 'bar' + assert item.xattrs['foo'] == 'bar' + item.xattrs['bar'] = 'baz' + assert item.xattrs == StableDict({'foo': 'bar', 'bar': 'baz'}) + assert item.as_dict() == {'xattrs': {'foo': 'bar', 'bar': 'baz'}} From c18209fc908f15eaa90f3b2f6ba5479670408f1f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 25 Apr 2016 07:39:17 +0200 Subject: [PATCH 35/38] split into generic PropDict and special Item class --- borg/item.py | 94 ++++++++++++++++++++++++++++------------------------ 1 file changed, 50 insertions(+), 44 deletions(-) diff --git a/borg/item.py b/borg/item.py index ff0b71b55..2bf9c650b 100644 --- a/borg/item.py +++ b/borg/item.py @@ -1,27 +1,21 @@ from .constants import ITEM_KEYS from .helpers import safe_encode, safe_decode, StableDict -# we want str keys for this code -ITEM_KEYS = set(key.decode() for key in ITEM_KEYS) - -class Item: +class PropDict: """ - Item abstraction that deals with validation and the low-level details internally: + Manage a dictionary via properties. - - msgpack gives us a dict with bytes-typed keys - but we do not want to have the ugly - bytes-typed keys and the hard-to-type dict item access all over the place (like: item[b'keyname']), - so we define properties (and use it like: item.keyname) - - msgpack gives us byte-typed values for stuff that should be str, we need to decode/encode them here. - - we want to be safe against typos in keys and badly typed values, so we check them. - - Items are created either from msgpack unpacker output, from another dict or - built step-by-step by setting attributes. - - If an Item shall be serialized, give as_dict() method output to msgpack packer. + - initialization by giving a dict or kw args + - on initialization, normalize dict keys to be str type + - access dict via properties, like: x.key_name + - membership check via: 'key_name' in x + - optionally, encode when setting a value + - optionally, decode when getting a value + - be safe against typos in key names: check against VALID_KEYS + - when setting a value: check type of value """ - - VALID_KEYS = ITEM_KEYS + VALID_KEYS = None # override with in child class def __init__(self, data_dict=None, **kw): if data_dict is None: @@ -52,7 +46,7 @@ class Item: if not isinstance(key, str): raise TypeError("key must be str") if key not in self.VALID_KEYS: - raise ValueError("key '%s' unknown" % key) + raise ValueError("key '%s' is not a valid key" % key) return key def __contains__(self, key): @@ -63,14 +57,9 @@ class Item: """get value for key, return default if key does not exist""" return getattr(self, self._check_key(key), default) + @staticmethod def _make_property(key, value_type, value_type_name=None, encode=None, decode=None): - """return a property that deals with self._dict[key]: - - - sets the value (checking type and optionally encoding it) - - gets the value (optionally decoding it) - - deletes the entry from the internal dict - - creates reasonable docstring and exceptions / exception messages - """ + """return a property that deals with self._dict[key]""" assert isinstance(key, str) if value_type_name is None: value_type_name = value_type.__name__ @@ -102,28 +91,45 @@ class Item: return property(_get, _set, _del, doc=doc) + +class Item(PropDict): + """ + Item abstraction that deals with validation and the low-level details internally: + + Items are created either from msgpack unpacker output, from another dict, from kwargs or + built step-by-step by setting attributes. + + msgpack gives us a dict with bytes-typed keys, just give it to Item(d) and use item.key_name later. + msgpack gives us byte-typed values for stuff that should be str, we automatically decode when getting + such a property and encode when setting it. + + If an Item shall be serialized, give as_dict() method output to msgpack packer. + """ + + VALID_KEYS = set(key.decode() for key in ITEM_KEYS) # we want str-typed keys + # properties statically defined, so that IDEs can know their names: - path = _make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - source = _make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - user = _make_property('user', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - group = _make_property('group', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - acl_access = _make_property('acl_access', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - acl_default = _make_property('acl_default', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - acl_extended = _make_property('acl_extended', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - acl_nfs4 = _make_property('acl_nfs4', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + source = PropDict._make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + user = PropDict._make_property('user', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + group = PropDict._make_property('group', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + acl_access = PropDict._make_property('acl_access', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + acl_default = PropDict._make_property('acl_default', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + acl_extended = PropDict._make_property('acl_extended', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + acl_nfs4 = PropDict._make_property('acl_nfs4', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - mode = _make_property('mode', int) - uid = _make_property('uid', int) - gid = _make_property('gid', int) - atime = _make_property('atime', int) - ctime = _make_property('ctime', int) - mtime = _make_property('mtime', int) - rdev = _make_property('rdev', int) - bsdflags = _make_property('bsdflags', int) + mode = PropDict._make_property('mode', int) + uid = PropDict._make_property('uid', int) + gid = PropDict._make_property('gid', int) + atime = PropDict._make_property('atime', int) + ctime = PropDict._make_property('ctime', int) + mtime = PropDict._make_property('mtime', int) + rdev = PropDict._make_property('rdev', int) + bsdflags = PropDict._make_property('bsdflags', int) - hardlink_master = _make_property('hardlink_master', bool) + hardlink_master = PropDict._make_property('hardlink_master', bool) - chunks = _make_property('chunks', list) + chunks = PropDict._make_property('chunks', list) - xattrs = _make_property('xattrs', StableDict) + xattrs = PropDict._make_property('xattrs', StableDict) From e81407ff813137aa0bd48d33b270c9cefa33a555 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 25 Apr 2016 14:08:47 +0200 Subject: [PATCH 36/38] add bigint coding, allow None as user/group --- borg/helpers.py | 4 ++++ borg/item.py | 14 ++++++++------ borg/testsuite/item.py | 19 +++++++++++++++++++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index b342c60e0..15c01bb7c 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -681,11 +681,15 @@ def posix_acl_use_stored_uid_gid(acl): def safe_decode(s, coding='utf-8', errors='surrogateescape'): """decode bytes to str, with round-tripping "invalid" bytes""" + if s is None: + return None return s.decode(coding, errors) def safe_encode(s, coding='utf-8', errors='surrogateescape'): """encode str to bytes, with round-tripping "invalid" bytes""" + if s is None: + return None return s.encode(coding, errors) diff --git a/borg/item.py b/borg/item.py index 2bf9c650b..b0cbf12a3 100644 --- a/borg/item.py +++ b/borg/item.py @@ -1,5 +1,5 @@ from .constants import ITEM_KEYS -from .helpers import safe_encode, safe_decode, StableDict +from .helpers import safe_encode, safe_decode, bigint_to_int, int_to_bigint, StableDict class PropDict: @@ -112,22 +112,24 @@ class Item(PropDict): path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) source = PropDict._make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - user = PropDict._make_property('user', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - group = PropDict._make_property('group', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) acl_access = PropDict._make_property('acl_access', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) acl_default = PropDict._make_property('acl_default', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) acl_extended = PropDict._make_property('acl_extended', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) acl_nfs4 = PropDict._make_property('acl_nfs4', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) + group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) + mode = PropDict._make_property('mode', int) uid = PropDict._make_property('uid', int) gid = PropDict._make_property('gid', int) - atime = PropDict._make_property('atime', int) - ctime = PropDict._make_property('ctime', int) - mtime = PropDict._make_property('mtime', int) rdev = PropDict._make_property('rdev', int) bsdflags = PropDict._make_property('bsdflags', int) + atime = PropDict._make_property('atime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int) + ctime = PropDict._make_property('ctime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int) + mtime = PropDict._make_property('mtime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int) + hardlink_master = PropDict._make_property('hardlink_master', bool) chunks = PropDict._make_property('chunks', list) diff --git a/borg/testsuite/item.py b/borg/testsuite/item.py index e41d6ae18..16e887cbb 100644 --- a/borg/testsuite/item.py +++ b/borg/testsuite/item.py @@ -63,6 +63,25 @@ def test_item_int_property(): item.mode = "invalid" +def test_item_bigint_property(): + item = Item() + small, big = 42, 2 ** 65 + item.atime = small + assert item.atime == small + assert item.as_dict() == {'atime': small} + item.atime = big + assert item.atime == big + assert item.as_dict() == {'atime': b'\0' * 8 + b'\x02'} + + +def test_item_user_group_none(): + item = Item() + item.user = None + assert item.user is None + item.group = None + assert item.group is None + + def test_item_se_str_property(): # start simple item = Item() From 46362a1962d68f42f18d6d4fd6acf0d8ccc54f58 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 25 Apr 2016 14:12:39 +0200 Subject: [PATCH 37/38] add more tests --- borg/testsuite/item.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/borg/testsuite/item.py b/borg/testsuite/item.py index 16e887cbb..f8b6b5117 100644 --- a/borg/testsuite/item.py +++ b/borg/testsuite/item.py @@ -40,11 +40,24 @@ def test_item_from_dict(): assert item.mode == 0o666 assert 'path' in item + # does not matter whether we get str or bytes keys item = Item({'path': b'/a/b/c', 'mode': 0o666}) assert item.path == '/a/b/c' assert item.mode == 0o666 assert 'mode' in item + # invalid - no dict + with pytest.raises(TypeError): + Item(42) + + # invalid - no bytes/str key + with pytest.raises(TypeError): + Item({42: 23}) + + # invalid - unknown key + with pytest.raises(ValueError): + Item({'foobar': 'baz'}) + def test_item_from_kw(): item = Item(path=b'/a/b/c', mode=0o666) From e6cf4ee627d367cbd6cfc165f48285190b1c7ef7 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 27 Apr 2016 23:17:10 +0200 Subject: [PATCH 38/38] disallow setting unknown attributes, use StableDict as .as_dict() result --- borg/item.py | 6 +++++- borg/testsuite/item.py | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/borg/item.py b/borg/item.py index b0cbf12a3..05ac47a2c 100644 --- a/borg/item.py +++ b/borg/item.py @@ -17,6 +17,8 @@ class PropDict: """ VALID_KEYS = None # override with in child class + __slots__ = ("_dict", ) # avoid setting attributes not supported by properties + def __init__(self, data_dict=None, **kw): if data_dict is None: data = kw @@ -39,7 +41,7 @@ class PropDict: def as_dict(self): """return the internal dictionary""" - return self._dict # XXX use StableDict? + return StableDict(self._dict) def _check_key(self, key): """make sure key is of type str and known""" @@ -108,6 +110,8 @@ class Item(PropDict): VALID_KEYS = set(key.decode() for key in ITEM_KEYS) # we want str-typed keys + __slots__ = ("_dict", ) # avoid setting attributes not supported by properties + # properties statically defined, so that IDEs can know their names: path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) diff --git a/borg/testsuite/item.py b/borg/testsuite/item.py index f8b6b5117..bd66e4831 100644 --- a/borg/testsuite/item.py +++ b/borg/testsuite/item.py @@ -136,3 +136,12 @@ def test_item_dict_property(): item.xattrs['bar'] = 'baz' assert item.xattrs == StableDict({'foo': 'bar', 'bar': 'baz'}) assert item.as_dict() == {'xattrs': {'foo': 'bar', 'bar': 'baz'}} + + +def test_unknown_property(): + # we do not want the user to be able to set unknown attributes - + # they won't get into the .as_dict() result dictionary. + # also they might be just typos of known attributes. + item = Item() + with pytest.raises(AttributeError): + item.unknown_attribute = None