From d27a3521ece0367ed32f68b2b5d7c6abe6a1ac81 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 21 May 2016 19:24:18 +0200 Subject: [PATCH 01/28] test on py36 --- .travis.yml | 5 +++++ tox.ini | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 0ec266edd..c9dfaea81 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,6 +14,9 @@ matrix: - python: 3.5 os: linux env: TOXENV=py35 + - python: nightly + os: linux + env: TOXENV=py36 - python: 3.5 os: linux env: TOXENV=flake8 @@ -25,6 +28,8 @@ matrix: os: osx osx_image: xcode6.4 env: TOXENV=py35 + allow_failures: + - python: nightly install: - ./.travis/install.sh diff --git a/tox.ini b/tox.ini index 0473cb271..211ab98a8 100644 --- a/tox.ini +++ b/tox.ini @@ -2,7 +2,7 @@ # fakeroot -u tox --recreate [tox] -envlist = py{34,35},flake8 +envlist = py{34,35,36},flake8 [testenv] # Change dir to avoid import problem for cython code. The directory does From 99fa484726ee3a918c33ec3790edb876ac87f3e8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 21 May 2016 21:08:03 +0200 Subject: [PATCH 02/28] clarify comment about linux kernel versions currently, ALL linux kernels are affected. --- borg/_chunker.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 7f772ca4b..3705a3d69 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -196,10 +196,10 @@ chunker_fill(Chunker *c) // We rollback the initial offset back to the start of the page, // to avoid it not being truncated as a partial page request. if (length > 0) { - // Linux kernels prior to 4.7 have a bug where they truncate - // last partial page of POSIX_FADV_DONTNEED request, so we need - // to page-align it ourselves. We'll need the rest of this page - // on the next read (assuming this was not EOF) + // All Linux kernels (at least up to and including 4.6(.0)) have a bug where + // they truncate last partial page of POSIX_FADV_DONTNEED request, so we need + // to page-align it ourselves. We'll need the rest of this page on the next + // read (assuming this was not EOF). overshoot = (offset + length) & pagemask; } else { // For length == 0 we set overshoot 0, so the below From 8834f6fdbddb083dc60221fe9a49f437ee161cb5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 21 May 2016 23:16:18 +0200 Subject: [PATCH 03/28] chunker: do not buzhash if not needed, fixes #1021 For small remainders of files (last chunk), we do not need to buzhash if it is already clear that there is not enough left (we want at least min_size big chunks). Small files are handled by same code - as they only give 1 chunk, that is the last chunk (see above). See "Cases" considerations below. For big files, we do not need to buzhash the first min_size bytes of a chunk - we do not want to cut there anyway, so we can start buzhashing at offset min_size. Cases (before this change) -------------------------- - A) remaining <= window_size - would do 2 chunker_fill calls (both line 253) and trigger eof with the 2nd call - no buzhashing - result is 1 length chunk - B) window_size < remaining <= min_size: - the chunker would do 1 chunker_fill call (line 253) that would read the entire remaining file (but not trigger eof yet) - would compute all possible remaining - window_size + 1 buzhashes, but without a chance for a cut, because there is also the n < min_size condition - would do another chunker_fill call (line 282), but not get more data, so loop ends - result is 1 length chunk - C) file > min_size: - normal chunking Cases (after this change) ------------------------- - A) similar to above A), but up to remaining < min_size + window_size + 1, so it does not buzhash if there is no chance for a cut. - B) see C) above --- borg/_chunker.c | 12 ++++++++++-- borg/chunker.pyx | 2 ++ borg/testsuite/archiver.py | 4 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 7f772ca4b..2d1a03629 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -249,11 +249,12 @@ chunker_process(Chunker *c) PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch"); return NULL; } - while(c->remaining <= window_size && !c->eof) { + while(c->remaining < min_size + window_size + 1 && !c->eof) { /* see assert in Chunker init */ if(!chunker_fill(c)) { return NULL; } } + /* here we either are at eof ... */ if(c->eof) { c->done = 1; if(c->remaining) { @@ -268,8 +269,15 @@ chunker_process(Chunker *c) return NULL; } } + /* ... or we have at least min_size + window_size + 1 bytes remaining. + * We do not want to "cut" a chunk smaller than min_size and the hash + * window starts at the potential cutting place. + */ + c->position += min_size; + c->remaining -= min_size; + n += min_size; sum = buzhash(c->data + c->position, window_size, c->table); - while(c->remaining > c->window_size && ((sum & chunk_mask) || n < min_size)) { + while(c->remaining > c->window_size && (sum & chunk_mask)) { sum = buzhash_update(sum, c->data[c->position], c->data[c->position + window_size], window_size, c->table); diff --git a/borg/chunker.pyx b/borg/chunker.pyx index 0faa06f38..560e14c82 100644 --- a/borg/chunker.pyx +++ b/borg/chunker.pyx @@ -23,6 +23,8 @@ cdef class Chunker: def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size): min_size = 1 << chunk_min_exp max_size = 1 << chunk_max_exp + # see chunker_process, first while loop condition, first term must be able to get True: + assert hash_window_size + min_size + 1 <= max_size, "too small max_size" hash_mask = (1 << hash_mask_bits) - 1 self.chunker = chunker_init(hash_window_size, hash_mask, min_size, max_size, seed & 0xffffffff) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 5b8cf95af..7f4719d74 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -1491,9 +1491,9 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('create', self.repository_location + '::test', 'input') archive_before = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}') with patch.object(Cache, 'add_chunk', self._test_recreate_chunker_interrupt_patch()): - self.cmd('recreate', '-pv', '--chunker-params', '10,12,11,4095', self.repository_location) + self.cmd('recreate', '-pv', '--chunker-params', '10,13,11,4095', self.repository_location) assert 'test.recreate' in self.cmd('list', self.repository_location) - output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '10,12,11,4095', self.repository_location) + output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '10,13,11,4095', self.repository_location) assert 'Found test.recreate, will resume' in output assert 'Copied 1 chunks from a partially processed item' in output archive_after = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}') From 96a798debb3cf487ec659540ee26d2127402afcf Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 22 May 2016 01:22:52 +0200 Subject: [PATCH 04/28] chunker: add a comment about a potential speedup --- borg/_chunker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 2d1a03629..9ce5699f6 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -96,7 +96,7 @@ buzhash(const unsigned char *data, size_t len, const uint32_t *h) static uint32_t buzhash_update(uint32_t sum, unsigned char remove, unsigned char add, size_t len, const uint32_t *h) { - uint32_t lenmod = len & 0x1f; + uint32_t lenmod = len & 0x1f; /* Note: replace by constant to get small speedup */ return BARREL_SHIFT(sum, 1) ^ BARREL_SHIFT(h[remove], lenmod) ^ h[add]; } From ce6b838da8049f81887acc70264711bfb225eceb Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 22 May 2016 11:09:07 +0200 Subject: [PATCH 05/28] Fix crash regression for UDS introduced in 805f631 --- borg/platform_linux.pyx | 7 ++++--- borg/testsuite/archiver.py | 11 +++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/borg/platform_linux.pyx b/borg/platform_linux.pyx index 142185f3b..e60768aaa 100644 --- a/borg/platform_linux.pyx +++ b/borg/platform_linux.pyx @@ -86,10 +86,11 @@ def set_flags(path, bsd_flags, fd=None): def get_flags(path, st): - if stat.S_ISLNK(st.st_mode): - return 0 cdef int linux_flags - fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW) + try: + fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW) + except OSError: + return 0 try: if ioctl(fd, FS_IOC_GETFLAGS, &linux_flags) == -1: return 0 diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 5b8cf95af..173a01e18 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -5,6 +5,7 @@ import inspect from io import StringIO import logging import random +import socket import stat import subprocess import sys @@ -357,6 +358,16 @@ class ArchiverTestCase(ArchiverTestCaseBase): # the interesting parts of info_output2 and info_output should be same self.assert_equal(filter(info_output), filter(info_output2)) + def test_unix_socket(self): + self.cmd('init', self.repository_location) + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + sock.bind(os.path.join(self.input_path, 'unix-socket')) + self.cmd('create', self.repository_location + '::test', 'input') + sock.close() + with changedir('output'): + self.cmd('extract', self.repository_location + '::test') + assert not os.path.exists('input/unix-socket') + def test_symlink_extract(self): self.create_test_files() self.cmd('init', self.repository_location) From e7523b7d4652ebf088ea12a0c3b124c8711bc4cc Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 22 May 2016 11:29:44 +0200 Subject: [PATCH 06/28] Fix fuse tests when flags are available e.g. fuse installed, TMPDIR != tmpfs --- borg/testsuite/archiver.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 173a01e18..e4c830dac 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -1255,6 +1255,9 @@ class ArchiverTestCase(ArchiverTestCaseBase): try: self.cmd('mount', self.repository_location, mountpoint, fork=True) self.wait_for_mount(mountpoint) + if has_lchflags: + # remove the file we did not backup, so input and output become equal + os.remove(os.path.join('input', 'flagfile')) self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive', 'input')) self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive2', 'input')) finally: @@ -1276,6 +1279,9 @@ class ArchiverTestCase(ArchiverTestCaseBase): try: self.cmd('mount', self.repository_location + '::archive', mountpoint, fork=True) self.wait_for_mount(mountpoint) + if has_lchflags: + # remove the file we did not backup, so input and output become equal + os.remove(os.path.join('input', 'flagfile')) self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input')) finally: if sys.platform.startswith('linux'): From ffa78161cdc8e471211c623b2c6515a7a9abe382 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 22 May 2016 15:50:24 +0200 Subject: [PATCH 07/28] List files excluded via UF_NODUMP --- borg/archiver.py | 1 + borg/testsuite/archiver.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/borg/archiver.py b/borg/archiver.py index 5abdd1962..96ac6bee0 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -317,6 +317,7 @@ class Archiver: status = None # Ignore if nodump flag is set if get_flags(path, st) & stat.UF_NODUMP: + self.print_file_status('x', path) return if stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode): if not dry_run: diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index e4c830dac..675c81deb 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -997,14 +997,21 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.create_regular_file('file1', size=1024 * 80) os.utime('input/file1', (now - 5, now - 5)) # 5 seconds ago self.create_regular_file('file2', size=1024 * 80) + if has_lchflags: + self.create_regular_file('file3', size=1024 * 80) + platform.set_flags(os.path.join(self.input_path, 'file3'), stat.UF_NODUMP) self.cmd('init', self.repository_location) output = self.cmd('create', '--list', self.repository_location + '::test', 'input') self.assert_in("A input/file1", output) self.assert_in("A input/file2", output) + if has_lchflags: + self.assert_in("x input/file3", output) # should find second file as excluded output = self.cmd('create', '--list', self.repository_location + '::test1', 'input', '--exclude', '*/file2') self.assert_in("U input/file1", output) self.assert_in("x input/file2", output) + if has_lchflags: + self.assert_in("x input/file3", output) def test_create_topical(self): now = time.time() From ed6f6b9aac681b46d7642bf70545f2ebdd50ff92 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 22 May 2016 17:34:58 +0200 Subject: [PATCH 08/28] platform_linux.set_flags: don't raise on EOPNOTSUPP --- borg/platform_linux.pyx | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/borg/platform_linux.pyx b/borg/platform_linux.pyx index e60768aaa..55806fa89 100644 --- a/borg/platform_linux.pyx +++ b/borg/platform_linux.pyx @@ -50,9 +50,6 @@ cdef extern from "linux/fs.h": cdef extern from "stropts.h": int ioctl(int fildes, int request, ...) -cdef extern from "errno.h": - int errno - cdef extern from "string.h": char *strerror(int errnum) @@ -79,7 +76,8 @@ def set_flags(path, bsd_flags, fd=None): fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW) try: if ioctl(fd, FS_IOC_SETFLAGS, &flags) == -1: - raise OSError(errno, strerror(errno).decode(), path) + if errno.errno != errno.EOPNOTSUPP: + raise OSError(errno, strerror(errno).decode(), path) finally: if open_fd: os.close(fd) From f27f0e1ea23253880f18a0ad699394dcb1a05353 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 22 May 2016 19:15:21 +0200 Subject: [PATCH 09/28] Fix bug on powerpc linux --- borg/platform_linux.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/platform_linux.pyx b/borg/platform_linux.pyx index e60768aaa..4306a5fcd 100644 --- a/borg/platform_linux.pyx +++ b/borg/platform_linux.pyx @@ -8,13 +8,13 @@ from .platform_base import SyncFile as BaseSyncFile from .platform_posix import swidth from libc cimport errno +from libc.stdint cimport int64_t API_VERSION = 3 cdef extern from "sys/types.h": int ACL_TYPE_ACCESS int ACL_TYPE_DEFAULT - ctypedef off64_t cdef extern from "sys/acl.h": ctypedef struct _acl_t: @@ -31,7 +31,7 @@ cdef extern from "acl/libacl.h": int acl_extended_file(const char *path) cdef extern from "fcntl.h": - int sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags) + int sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags) unsigned int SYNC_FILE_RANGE_WRITE unsigned int SYNC_FILE_RANGE_WAIT_BEFORE unsigned int SYNC_FILE_RANGE_WAIT_AFTER From cdb8cc5490b017080be0678c10f37af4a68223d8 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 22 May 2016 19:54:08 +0200 Subject: [PATCH 10/28] Fix referencing error in platform_linux.set_flags --- borg/platform_linux.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/borg/platform_linux.pyx b/borg/platform_linux.pyx index 707766b1a..a1c549338 100644 --- a/borg/platform_linux.pyx +++ b/borg/platform_linux.pyx @@ -76,8 +76,9 @@ def set_flags(path, bsd_flags, fd=None): fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW) try: if ioctl(fd, FS_IOC_SETFLAGS, &flags) == -1: - if errno.errno != errno.EOPNOTSUPP: - raise OSError(errno, strerror(errno).decode(), path) + error_number = errno.errno + if error_number != errno.EOPNOTSUPP: + raise OSError(error_number, strerror(error_number).decode(), path) finally: if open_fd: os.close(fd) From 7a569bc0377be1f823f8b0502d333e8073142649 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Mon, 23 May 2016 00:42:31 +0200 Subject: [PATCH 11/28] Repository: avoid reading large segments for commit state check --- borg/repository.py | 4 ++-- borg/testsuite/archiver.py | 2 +- borg/testsuite/repository.py | 24 ++++++++++++------------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/borg/repository.py b/borg/repository.py index 10190e03a..c94e5decf 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -735,8 +735,8 @@ class LoggedIO: return self.segment def write_commit(self): - fd = self.get_write_fd(no_new=True) - fd.sync() + self.close_segment() + fd = self.get_write_fd() header = self.header_no_crc_fmt.pack(self.header_fmt.size, TAG_COMMIT) crc = self.crc_fmt.pack(crc32(header) & 0xffffffff) fd.write(b''.join((crc, header))) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 675c81deb..f37ca177e 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -927,7 +927,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_in('borgbackup version', output) # implied output even without --info given self.assert_not_in('Starting repository check', output) # --info not given for root logger - name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0] + name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[1] with open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+b') as fd: fd.seek(100) fd.write(b'XXXX') diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 69c946564..71743bfb0 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -216,18 +216,18 @@ class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase): self.repository.commit() self.repository.append_only = False - assert segments_in_repository() == 1 + assert segments_in_repository() == 2 self.repository.put(b'00000000000000000000000000000000', b'foo') self.repository.commit() # normal: compact squashes the data together, only one segment - assert segments_in_repository() == 1 + assert segments_in_repository() == 4 self.repository.append_only = True - assert segments_in_repository() == 1 + assert segments_in_repository() == 4 self.repository.put(b'00000000000000000000000000000000', b'foo') self.repository.commit() # append only: does not compact, only new segments written - assert segments_in_repository() == 2 + assert segments_in_repository() == 6 class RepositoryCheckTestCase(RepositoryTestCaseBase): @@ -296,20 +296,20 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase): self.add_objects([[1, 2, 3], [4, 5, 6]]) self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects()) self.check(status=True) - self.delete_segment(1) + self.delete_segment(2) self.repository.rollback() self.check(repair=True, status=True) self.assert_equal(set([1, 2, 3]), self.list_objects()) def test_repair_missing_commit_segment(self): self.add_objects([[1, 2, 3], [4, 5, 6]]) - self.delete_segment(1) + self.delete_segment(3) self.assert_raises(Repository.ObjectNotFound, lambda: self.get_objects(4)) self.assert_equal(set([1, 2, 3]), self.list_objects()) def test_repair_corrupted_commit_segment(self): self.add_objects([[1, 2, 3], [4, 5, 6]]) - with open(os.path.join(self.tmppath, 'repository', 'data', '0', '1'), 'r+b') as fd: + with open(os.path.join(self.tmppath, 'repository', 'data', '0', '3'), 'r+b') as fd: fd.seek(-1, os.SEEK_END) fd.write(b'X') self.assert_raises(Repository.ObjectNotFound, lambda: self.get_objects(4)) @@ -319,15 +319,15 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase): def test_repair_no_commits(self): self.add_objects([[1, 2, 3]]) - with open(os.path.join(self.tmppath, 'repository', 'data', '0', '0'), 'r+b') as fd: + with open(os.path.join(self.tmppath, 'repository', 'data', '0', '1'), 'r+b') as fd: fd.seek(-1, os.SEEK_END) fd.write(b'X') self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4)) self.check(status=False) self.check(status=False) - self.assert_equal(self.list_indices(), ['index.0']) - self.check(repair=True, status=True) self.assert_equal(self.list_indices(), ['index.1']) + self.check(repair=True, status=True) + self.assert_equal(self.list_indices(), ['index.3']) self.check(status=True) self.get_objects(3) self.assert_equal(set([1, 2, 3]), self.list_objects()) @@ -341,10 +341,10 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase): def test_repair_index_too_new(self): self.add_objects([[1, 2, 3], [4, 5, 6]]) - self.assert_equal(self.list_indices(), ['index.1']) + self.assert_equal(self.list_indices(), ['index.3']) self.rename_index('index.100') self.check(status=True) - self.assert_equal(self.list_indices(), ['index.1']) + self.assert_equal(self.list_indices(), ['index.3']) self.get_objects(4) self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects()) From 4f1157c3a49ee68c350dc8b8aa68e9dbc7f20093 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Tue, 24 May 2016 20:57:52 +0200 Subject: [PATCH 12/28] fix tox build for environment-python != containing-python in yet-another instance this instance: the repository worktree is *not* named borg. --- conftest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conftest.py b/conftest.py index 6ca799a3e..450ff10a7 100644 --- a/conftest.py +++ b/conftest.py @@ -1,3 +1,4 @@ +import os.path import sys # This is a hack to fix path problems because "borg" (the package) is in the source root. @@ -11,7 +12,7 @@ import sys original_path = list(sys.path) for entry in original_path: - if entry == '' or entry.endswith('/borg'): + if entry == '' or entry == os.path.dirname(__file__): sys.path.remove(entry) try: From c4c11d75a14cf8c67f3e141ca925b6890bf86674 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 24 May 2016 22:16:47 +0200 Subject: [PATCH 13/28] fall back to len() if wcswidth returns neg. value, fixes #1090 --- borg/platform_posix.pyx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/borg/platform_posix.pyx b/borg/platform_posix.pyx index f2a8e1773..8d74f19e8 100644 --- a/borg/platform_posix.pyx +++ b/borg/platform_posix.pyx @@ -2,4 +2,9 @@ cdef extern from "wchar.h": cdef int wcswidth(const Py_UNICODE *str, size_t n) def swidth(s): - return wcswidth(s, len(s)) + str_len = len(s) + terminal_width = wcswidth(s, str_len) + if terminal_width >= 0: + return terminal_width + else: + return str_len From 9fff2af1340654706c4243d581a3e4df82af3fba Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 24 May 2016 22:40:20 +0200 Subject: [PATCH 14/28] fix crashing borg extract --stdout, fixes #1064 --- borg/archive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/archive.py b/borg/archive.py index e630202a6..905cbff22 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -371,7 +371,7 @@ Number of files: {0.stats.nfiles}'''.format( """ if dry_run or stdout: if b'chunks' in item: - for data in self.pipeline.fetch_many([c.id for c in item[b'chunks']], is_preloaded=True): + for _, data in self.pipeline.fetch_many([c.id for c in item[b'chunks']], is_preloaded=True): if stdout: sys.stdout.buffer.write(data) if stdout: From f99ad4ca6fc3db1987834f5b7e770bcf89d49fed Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sat, 21 May 2016 18:45:47 +0200 Subject: [PATCH 15/28] Repository: compact v2 - Track free space information for each sparse segment - Don't compact large segments with little free space --- borg/repository.py | 144 ++++++++++++++++++++++++++++++--------------- 1 file changed, 96 insertions(+), 48 deletions(-) diff --git a/borg/repository.py b/borg/repository.py index c94e5decf..7d4e972d1 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -9,6 +9,8 @@ logger = logging.getLogger(__name__) import os import shutil import struct +from collections import defaultdict +from functools import partial from zlib import crc32 import msgpack @@ -26,6 +28,8 @@ TAG_PUT = 0 TAG_DELETE = 1 TAG_COMMIT = 2 +FreeSpace = partial(defaultdict, int) + class Repository: """Filesystem based transactional key value store @@ -210,21 +214,30 @@ class Repository: self.index = self.open_index(transaction_id) if transaction_id is None: self.segments = {} # XXX bad name: usage_count_of_segment_x = self.segments[x] - self.compact = set() # XXX bad name: segments_needing_compaction = self.compact + self.compact = FreeSpace() # XXX bad name: freeable_space_of_segment_x = self.compact[x] else: if do_cleanup: self.io.cleanup(transaction_id) with open(os.path.join(self.path, 'hints.%d' % transaction_id), 'rb') as fd: hints = msgpack.unpack(fd) - if hints[b'version'] != 1: - raise ValueError('Unknown hints file version: %d' % hints['version']) - self.segments = hints[b'segments'] - self.compact = set(hints[b'compact']) + if hints[b'version'] == 1: + logger.debug('Upgrading from v1 hints.%d', transaction_id) + self.segments = hints[b'segments'] + self.compact = FreeSpace() + for segment in sorted(hints[b'compact']): + logger.debug('Rebuilding sparse info for segment %d', segment) + self._rebuild_sparse(segment) + logger.debug('Upgrade to v2 hints complete') + elif hints[b'version'] != 2: + raise ValueError('Unknown hints file version: %d' % hints[b'version']) + else: + self.segments = hints[b'segments'] + self.compact = FreeSpace(hints[b'compact']) def write_index(self): - hints = {b'version': 1, + hints = {b'version': 2, b'segments': self.segments, - b'compact': list(self.compact)} + b'compact': self.compact} transaction_id = self.io.get_segments_transaction_id() hints_file = os.path.join(self.path, 'hints.%d' % transaction_id) with open(hints_file + '.tmp', 'wb') as fd: @@ -238,10 +251,10 @@ class Repository: if self.append_only: with open(os.path.join(self.path, 'transactions'), 'a') as log: print('transaction %d, UTC time %s' % (transaction_id, datetime.utcnow().isoformat()), file=log) - # Remove old indices + # Remove old auxiliary files current = '.%d' % transaction_id for name in os.listdir(self.path): - if not name.startswith('index.') and not name.startswith('hints.'): + if not name.startswith(('index.', 'hints.')): continue if name.endswith(current): continue @@ -267,32 +280,40 @@ class Repository: for segment in unused: assert self.segments.pop(segment) == 0 self.io.delete_segment(segment) + del self.compact[segment] unused = [] - for segment in sorted(self.compact): - if self.io.segment_exists(segment): - for tag, key, offset, data in self.io.iter_objects(segment, include_data=True): - if tag == TAG_PUT and self.index.get(key, (-1, -1)) == (segment, offset): + for segment, freeable_space in sorted(self.compact.items()): + if not self.io.segment_exists(segment): + del self.compact[segment] + continue + segment_size = self.io.segment_size(segment) + if segment_size > 0.2 * self.max_segment_size and freeable_space < 0.15 * segment_size: + logger.debug('not compacting segment %d for later (only %d bytes are sparse)', + segment, freeable_space) + continue + segments.setdefault(segment, 0) + for tag, key, offset, data in self.io.iter_objects(segment, include_data=True): + if tag == TAG_PUT and self.index.get(key, (-1, -1)) == (segment, offset): + try: + new_segment, offset = self.io.write_put(key, data, raise_full=save_space) + except LoggedIO.SegmentFull: + complete_xfer() + new_segment, offset = self.io.write_put(key, data) + self.index[key] = new_segment, offset + segments.setdefault(new_segment, 0) + segments[new_segment] += 1 + segments[segment] -= 1 + elif tag == TAG_DELETE: + if index_transaction_id is None or segment > index_transaction_id: try: - new_segment, offset = self.io.write_put(key, data, raise_full=save_space) + self.io.write_delete(key, raise_full=save_space) except LoggedIO.SegmentFull: complete_xfer() - new_segment, offset = self.io.write_put(key, data) - self.index[key] = new_segment, offset - segments.setdefault(new_segment, 0) - segments[new_segment] += 1 - segments[segment] -= 1 - elif tag == TAG_DELETE: - if index_transaction_id is None or segment > index_transaction_id: - try: - self.io.write_delete(key, raise_full=save_space) - except LoggedIO.SegmentFull: - complete_xfer() - self.io.write_delete(key) - assert segments[segment] == 0 - unused.append(segment) + self.io.write_delete(key) + assert segments[segment] == 0 + unused.append(segment) complete_xfer() - self.compact = set() def replay_segments(self, index_transaction_id, segments_transaction_id): self.prepare_txn(index_transaction_id, do_cleanup=False) @@ -315,11 +336,12 @@ class Repository: def _update_index(self, segment, objects, report=None): """some code shared between replay_segments and check""" self.segments[segment] = 0 - for tag, key, offset in objects: + for tag, key, offset, size in objects: if tag == TAG_PUT: try: + # If this PUT supersedes an older PUT, mark the old segment for compaction and count the free space s, _ = self.index[key] - self.compact.add(s) + self.compact[s] += size self.segments[s] -= 1 except KeyError: pass @@ -327,12 +349,17 @@ class Repository: self.segments[segment] += 1 elif tag == TAG_DELETE: try: - s, _ = self.index.pop(key) - self.segments[s] -= 1 - self.compact.add(s) + # if the deleted PUT is not in the index, there is nothing to clean up + s, offset = self.index.pop(key) except KeyError: pass - self.compact.add(segment) + else: + if self.io.segment_exists(s): + # the old index is not necessarily valid for this transaction (e.g. compaction); if the segment + # is already gone, then it was already compacted. + self.segments[s] -= 1 + size = len(self.io.read(s, offset, key)) + self.compact[s] += size elif tag == TAG_COMMIT: continue else: @@ -342,7 +369,22 @@ class Repository: else: report(msg) if self.segments[segment] == 0: - self.compact.add(segment) + self.compact[segment] += self.io.segment_size(segment) + + def _rebuild_sparse(self, segment): + """Rebuild sparse bytes count for a single segment relative to the current index.""" + self.compact[segment] = 0 + if self.segments[segment] == 0: + self.compact[segment] += self.io.segment_size(segment) + return + for tag, key, offset, size in self.io.iter_objects(segment): + if tag == TAG_PUT: + if self.index.get(key, (-1, -1)) != (segment, offset): + # This PUT is superseded later + self.compact[segment] += size + elif tag == TAG_DELETE: + # The outcome of the DELETE has been recorded in the PUT branch already + self.compact[segment] += size def check(self, repair=False, save_space=False): """Check repository consistency @@ -457,14 +499,16 @@ class Repository: if not self._active_txn: self.prepare_txn(self.get_transaction_id()) try: - segment, _ = self.index[id] - self.segments[segment] -= 1 - self.compact.add(segment) - segment = self.io.write_delete(id) - self.segments.setdefault(segment, 0) - self.compact.add(segment) + segment, offset = self.index[id] except KeyError: pass + else: + self.segments[segment] -= 1 + size = len(self.io.read(segment, offset, id)) + self.compact[segment] += size + segment, size = self.io.write_delete(id) + self.compact[segment] += size + self.segments.setdefault(segment, 0) segment, offset = self.io.write_put(id, data) self.segments.setdefault(segment, 0) self.segments[segment] += 1 @@ -478,9 +522,10 @@ class Repository: except KeyError: raise self.ObjectNotFound(id, self.path) from None self.segments[segment] -= 1 - self.compact.add(segment) - segment = self.io.write_delete(id) - self.compact.add(segment) + size = len(self.io.read(segment, offset, id)) + self.compact[segment] += size + segment, size = self.io.write_delete(id) + self.compact[segment] += size self.segments.setdefault(segment, 0) def preload(self, ids): @@ -578,7 +623,7 @@ class LoggedIO: seen_commit = False while True: try: - tag, key, offset = next(iterator) + tag, key, offset, _ = next(iterator) except IntegrityError: return False except StopIteration: @@ -635,6 +680,9 @@ class LoggedIO: def segment_exists(self, segment): return os.path.exists(self.segment_filename(segment)) + def segment_size(self, segment): + return os.path.getsize(self.segment_filename(segment)) + def iter_objects(self, segment, include_data=False): fd = self.get_fd(segment) fd.seek(0) @@ -648,7 +696,7 @@ class LoggedIO: if include_data: yield tag, key, offset, data else: - yield tag, key, offset + yield tag, key, offset, size offset += size header = fd.read(self.header_fmt.size) @@ -732,7 +780,7 @@ class LoggedIO: crc = self.crc_fmt.pack(crc32(id, crc32(header)) & 0xffffffff) fd.write(b''.join((crc, header, id))) self.offset += self.put_header_fmt.size - return self.segment + return self.segment, self.put_header_fmt.size def write_commit(self): self.close_segment() From b8d1bc1ca891a65a7276d33488791a4878731a94 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sat, 21 May 2016 19:17:14 +0200 Subject: [PATCH 16/28] Repository: don't read+verify old entries for size retrieval --- borg/repository.py | 67 +++++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/borg/repository.py b/borg/repository.py index 7d4e972d1..439c06511 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -358,7 +358,7 @@ class Repository: # the old index is not necessarily valid for this transaction (e.g. compaction); if the segment # is already gone, then it was already compacted. self.segments[s] -= 1 - size = len(self.io.read(s, offset, key)) + size = self.io.read(s, offset, key, read_data=False) self.compact[s] += size elif tag == TAG_COMMIT: continue @@ -377,7 +377,7 @@ class Repository: if self.segments[segment] == 0: self.compact[segment] += self.io.segment_size(segment) return - for tag, key, offset, size in self.io.iter_objects(segment): + for tag, key, offset, size in self.io.iter_objects(segment, read_data=False): if tag == TAG_PUT: if self.index.get(key, (-1, -1)) != (segment, offset): # This PUT is superseded later @@ -504,7 +504,7 @@ class Repository: pass else: self.segments[segment] -= 1 - size = len(self.io.read(segment, offset, id)) + size = self.io.read(segment, offset, id, read_data=False) self.compact[segment] += size segment, size = self.io.write_delete(id) self.compact[segment] += size @@ -522,7 +522,7 @@ class Repository: except KeyError: raise self.ObjectNotFound(id, self.path) from None self.segments[segment] -= 1 - size = len(self.io.read(segment, offset, id)) + size = self.io.read(segment, offset, id, read_data=False) self.compact[segment] += size segment, size = self.io.write_delete(id) self.compact[segment] += size @@ -683,7 +683,15 @@ class LoggedIO: def segment_size(self, segment): return os.path.getsize(self.segment_filename(segment)) - def iter_objects(self, segment, include_data=False): + def iter_objects(self, segment, include_data=False, read_data=True): + """ + Return object iterator for *segment*. + + If read_data is False then include_data must be False as well. + Integrity checks are skipped: all data obtained from the iterator must be considered informational. + + The iterator returns four-tuples of (tag, key, offset, data|size). + """ fd = self.get_fd(segment) fd.seek(0) if fd.read(MAGIC_LEN) != MAGIC: @@ -692,7 +700,8 @@ class LoggedIO: header = fd.read(self.header_fmt.size) while header: size, tag, key, data = self._read(fd, self.header_fmt, header, segment, offset, - (TAG_PUT, TAG_DELETE, TAG_COMMIT)) + (TAG_PUT, TAG_DELETE, TAG_COMMIT), + read_data=read_data) if include_data: yield tag, key, offset, data else: @@ -720,19 +729,25 @@ class LoggedIO: fd.write(data[:size]) data = data[size:] - def read(self, segment, offset, id): + def read(self, segment, offset, id, read_data=True): + """ + Read entry from *segment* at *offset* with *id*. + + If read_data is False the size of the entry is returned instead and integrity checks are skipped. + The return value should thus be considered informational. + """ if segment == self.segment and self._write_fd: self._write_fd.sync() fd = self.get_fd(segment) fd.seek(offset) header = fd.read(self.put_header_fmt.size) - size, tag, key, data = self._read(fd, self.put_header_fmt, header, segment, offset, (TAG_PUT, )) + size, tag, key, data = self._read(fd, self.put_header_fmt, header, segment, offset, (TAG_PUT, ), read_data) if id != key: raise IntegrityError('Invalid segment entry header, is not for wanted id [segment {}, offset {}]'.format( segment, offset)) - return data + return data if read_data else size - def _read(self, fd, fmt, header, segment, offset, acceptable_tags): + def _read(self, fd, fmt, header, segment, offset, acceptable_tags, read_data=True): # some code shared by read() and iter_objects() try: hdr_tuple = fmt.unpack(header) @@ -750,18 +765,32 @@ class LoggedIO: raise IntegrityError('Invalid segment entry size [segment {}, offset {}]'.format( segment, offset)) length = size - fmt.size - data = fd.read(length) - if len(data) != length: - raise IntegrityError('Segment entry data short read [segment {}, offset {}]: expected {}, got {} bytes'.format( - segment, offset, length, len(data))) - if crc32(data, crc32(memoryview(header)[4:])) & 0xffffffff != crc: - raise IntegrityError('Segment entry checksum mismatch [segment {}, offset {}]'.format( - segment, offset)) + if read_data: + data = fd.read(length) + if len(data) != length: + raise IntegrityError('Segment entry data short read [segment {}, offset {}]: expected {}, got {} bytes'.format( + segment, offset, length, len(data))) + if crc32(data, crc32(memoryview(header)[4:])) & 0xffffffff != crc: + raise IntegrityError('Segment entry checksum mismatch [segment {}, offset {}]'.format( + segment, offset)) + if key is None and tag in (TAG_PUT, TAG_DELETE): + key, data = data[:32], data[32:] + else: + if key is None and tag in (TAG_PUT, TAG_DELETE): + key = fd.read(32) + length -= 32 + if len(key) != 32: + raise IntegrityError('Segment entry key short read [segment {}, offset {}]: expected {}, got {} bytes'.format( + segment, offset, 32, len(key))) + oldpos = fd.tell() + seeked = fd.seek(length, os.SEEK_CUR) - oldpos + data = None + if seeked != length: + raise IntegrityError('Segment entry data short seek [segment {}, offset {}]: expected {}, got {} bytes'.format( + segment, offset, length, seeked)) if tag not in acceptable_tags: raise IntegrityError('Invalid segment entry header, did not get acceptable tag [segment {}, offset {}]'.format( segment, offset)) - if key is None and tag in (TAG_PUT, TAG_DELETE): - key, data = data[:32], data[32:] return size, tag, key, data def write_put(self, id, data, raise_full=False): From 4619f781d8f0fba80d618ac28a3d1b8cc031fc05 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Mon, 16 May 2016 21:15:07 +0200 Subject: [PATCH 17/28] Terms, principles of Repository --- borg/repository.py | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/borg/repository.py b/borg/repository.py index 439c06511..1620c8278 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -32,15 +32,51 @@ FreeSpace = partial(defaultdict, int) class Repository: - """Filesystem based transactional key value store + """ + Filesystem based transactional key value store + + Transactionality is achieved by using a log (aka journal) to record changes. The log is a series of numbered files + called segments. Each segment is a series of log entries. The segment number together with the offset of each + entry relative to its segment start establishes an ordering of the log entries. This is the "definition" of + time for the purposes of the log. + + Log entries are either PUT, DELETE or COMMIT. + + A COMMIT is always the final log entry in a segment and marks all data from the beginning of the log until the + segment ending with the COMMIT as committed and consistent. The segment number of a segment ending with a COMMIT + is called the transaction ID of that commit, and a segment ending with a COMMIT is called committed. + + When reading from a repository it is first checked whether the last segment is committed. If it is not, then + all segments after the last committed segment are deleted; they contain log entries whose consistency is not + established by a COMMIT. + + Note that the COMMIT can't establish consistency by itself, but only manages to do so with proper support from + the platform (including the hardware). See platform_base.SyncFile for details. + + A PUT inserts a key-value pair. The value is stored in the log entry, hence the repository implements + full data logging, meaning that all data is consistent, not just metadata (which is common in file systems). + + A DELETE marks a key as deleted. + + For a given key only the last entry regarding the key, which is called current (all other entries are called + superseded), is relevant: If there is no entry or the last entry is a DELETE then the key does not exist. + Otherwise the last PUT defines the value of the key. + + By superseding a PUT (with either another PUT or a DELETE) the log entry becomes obsolete. A segment containing + such obsolete entries is called sparse, while a segment containing no such entries is called compact. + + Sparse segments can be compacted and thereby disk space freed. This destroys the transaction for which the + superseded entries where current. On disk layout: + dir/README dir/config dir/data// dir/index.X dir/hints.X """ + class DoesNotExist(Error): """Repository {} does not exist.""" From 2806133902476cbb678bb2cddcffdf32f87e1d5f Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Wed, 25 May 2016 12:15:46 +0200 Subject: [PATCH 18/28] testsuite/repository: test .compact, _build_sparse --- borg/testsuite/repository.py | 42 +++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 71743bfb0..85f4af457 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -10,7 +10,7 @@ from ..hashindex import NSIndex from ..helpers import Location, IntegrityError from ..locking import UpgradableLock, LockFailed from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint -from ..repository import Repository, LoggedIO +from ..repository import Repository, LoggedIO, MAGIC from . import BaseTestCase @@ -125,6 +125,46 @@ class RepositoryTestCase(RepositoryTestCaseBase): self.assert_equal(len(self.repository.list(limit=50)), 50) +class LocalRepositoryTestCase(RepositoryTestCaseBase): + # test case that doesn't work with remote repositories + + def _assert_sparse(self): + # The superseded 123456... PUT + assert self.repository.compact[0] == 41 + 9 + # The DELETE issued by the superseding PUT (or issued directly) + assert self.repository.compact[2] == 41 + self.repository._rebuild_sparse(0) + assert self.repository.compact[0] == 41 + 9 + + def test_sparse1(self): + self.repository.put(b'00000000000000000000000000000000', b'foo') + self.repository.put(b'00000000000000000000000000000001', b'123456789') + self.repository.commit() + self.repository.put(b'00000000000000000000000000000001', b'bar') + self._assert_sparse() + + def test_sparse2(self): + self.repository.put(b'00000000000000000000000000000000', b'foo') + self.repository.put(b'00000000000000000000000000000001', b'123456789') + self.repository.commit() + self.repository.delete(b'00000000000000000000000000000001') + self._assert_sparse() + + def test_sparse_delete(self): + self.repository.put(b'00000000000000000000000000000000', b'1245') + self.repository.delete(b'00000000000000000000000000000000') + self.repository.io._write_fd.sync() + + # The on-line tracking works on a per-object basis... + assert self.repository.compact[0] == 41 + 41 + 4 + self.repository._rebuild_sparse(0) + # ...while _rebuild_sparse can mark whole segments as completely sparse (which then includes the segment magic) + assert self.repository.compact[0] == 41 + 41 + 4 + len(MAGIC) + + self.repository.commit() + assert 0 not in [segment for segment, _ in self.repository.io.segment_iterator()] + + class RepositoryCommitTestCase(RepositoryTestCaseBase): def add_keys(self): From 0e3bba7b640269595b2ed791fa2b9b18dd006bb7 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Wed, 25 May 2016 22:01:38 +0200 Subject: [PATCH 19/28] Don't rebuild command line parser for each invocation Makes tests faster --- borg/archiver.py | 8 ++++---- setup.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 96ac6bee0..d9eebeebb 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -96,9 +96,10 @@ def with_archive(method): class Archiver: - def __init__(self, lock_wait=None): + def __init__(self, lock_wait=None, prog=None): self.exit_code = EXIT_SUCCESS self.lock_wait = lock_wait + self.parser = self.build_parser(prog) def print_error(self, msg, *args): msg = args and msg % args or msg @@ -1118,7 +1119,7 @@ class Archiver: self.print_warning(warning) return args - def build_parser(self, args=None, prog=None): + def build_parser(self, prog=None): common_parser = argparse.ArgumentParser(add_help=False, prog=prog) common_group = common_parser.add_argument_group('Common options') @@ -2062,8 +2063,7 @@ class Archiver: # We can't use argparse for "serve" since we don't want it to show up in "Available commands" if args: args = self.preprocess_args(args) - parser = self.build_parser(args) - args = parser.parse_args(args or ['-h']) + args = self.parser.parse_args(args or ['-h']) update_excludes(args) return args diff --git a/setup.py b/setup.py index 2ded99080..4f0159acc 100644 --- a/setup.py +++ b/setup.py @@ -159,7 +159,7 @@ class build_usage(Command): print('generating usage docs') # allows us to build docs without the C modules fully loaded during help generation from borg.archiver import Archiver - parser = Archiver().build_parser(prog='borg') + parser = Archiver(prog='borg').parser choices = {} for action in parser._actions: if action.choices is not None: From 478a6f29b6abbb024d55f00c637473be20ab2632 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Wed, 25 May 2016 22:29:38 +0200 Subject: [PATCH 20/28] Lower PBKDF2 iteration count for the tests This cuts testing time to about one third for me. --- conftest.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/conftest.py b/conftest.py index 450ff10a7..95bca39fc 100644 --- a/conftest.py +++ b/conftest.py @@ -27,7 +27,12 @@ setup_logging() from borg.testsuite import has_lchflags, no_lchlfags_because, has_llfuse from borg.testsuite.platform import fakeroot_detected -from borg import xattr +from borg import xattr, constants + + +def pytest_configure(config): + # no fixture-based monkey-patching since star-imports are used for the constants module + constants.PBKDF2_ITERATIONS = 1 def pytest_report_header(config, startdir): From 731f6241faa254c379a9bc25dd21133d3604d32a Mon Sep 17 00:00:00 2001 From: Andrew Skalski Date: Wed, 25 May 2016 18:14:27 -0400 Subject: [PATCH 21/28] RemoteRepository: Fix busy wait in call_many, fixes #940 --- borg/remote.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/borg/remote.py b/borg/remote.py index 099026628..a86664d23 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -19,6 +19,8 @@ RPC_PROTOCOL_VERSION = 2 BUFSIZE = 10 * 1024 * 1024 +MAX_INFLIGHT = 100 + class ConnectionClosed(Error): """Connection closed by remote host""" @@ -246,7 +248,6 @@ class RemoteRepository: calls = list(calls) waiting_for = [] - w_fds = [self.stdin_fd] while wait or calls: while waiting_for: try: @@ -275,6 +276,10 @@ class RemoteRepository: return except KeyError: break + if self.to_send or ((calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT): + w_fds = [self.stdin_fd] + else: + w_fds = [] r, w, x = select.select(self.r_fds, w_fds, self.x_fds, 1) if x: raise Exception('FD exception occurred') @@ -311,7 +316,7 @@ class RemoteRepository: else: sys.stderr.write("Remote: " + line) if w: - while not self.to_send and (calls or self.preload_ids) and len(waiting_for) < 100: + while not self.to_send and (calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT: if calls: if is_preloaded: if calls[0] in self.cache: @@ -338,8 +343,6 @@ class RemoteRepository: # that the fd should be writable if e.errno != errno.EAGAIN: raise - if not self.to_send and not (calls or self.preload_ids): - w_fds = [] self.ignore_responses |= set(waiting_for) def check(self, repair=False, save_space=False): From 252c1b9802e26be76350dc106cbf36746693c313 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Fri, 8 Apr 2016 15:38:13 +0200 Subject: [PATCH 22/28] Auto-recover from corrupted index/hints file(s) And don't swallow all OSErrors when creating archives. We need to work on that on a more general level... --- borg/hashindex.pyx | 2 +- borg/helpers.py | 4 +++ borg/repository.py | 47 ++++++++++++++++++++++++++++++------ borg/testsuite/repository.py | 42 +++++++++++++++++++++++++++++++- 4 files changed, 86 insertions(+), 9 deletions(-) diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index a99c0f602..459eed7b0 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -63,7 +63,7 @@ cdef class IndexBase: path = os.fsencode(path) self.index = hashindex_read(path) if not self.index: - raise Exception('hashindex_read failed') + raise RuntimeError('hashindex_read failed') else: self.index = hashindex_init(capacity, self.key_size, self.value_size) if not self.index: diff --git a/borg/helpers.py b/borg/helpers.py index 15c01bb7c..4fa4e4575 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -65,6 +65,10 @@ class ErrorWithTraceback(Error): traceback = True +class InternalOSError(ErrorWithTraceback): + """Error while accessing repository / cache files""" + + class IntegrityError(ErrorWithTraceback): """Data integrity error""" diff --git a/borg/repository.py b/borg/repository.py index 1620c8278..d59466358 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -15,7 +15,8 @@ from zlib import crc32 import msgpack from .constants import * # NOQA -from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, ProgressIndicatorPercent, bin_to_hex +from .helpers import Error, ErrorWithTraceback, IntegrityError, InternalOSError, Location, ProgressIndicatorPercent, \ + bin_to_hex from .hashindex import NSIndex from .locking import UpgradableLock, LockError, LockErrorT from .lrucache import LRUCache @@ -178,7 +179,7 @@ class Repository: else: return None - def get_transaction_id(self): + def check_transaction(self): index_transaction_id = self.get_index_transaction_id() segments_transaction_id = self.io.get_segments_transaction_id() if index_transaction_id is not None and segments_transaction_id is None: @@ -191,6 +192,9 @@ class Repository: else: replay_from = index_transaction_id self.replay_segments(replay_from, segments_transaction_id) + + def get_transaction_id(self): + self.check_transaction() return self.get_index_transaction_id() def break_lock(self): @@ -231,10 +235,23 @@ class Repository: self.write_index() self.rollback() - def open_index(self, transaction_id): + def open_index(self, transaction_id, auto_recover=True): if transaction_id is None: return NSIndex() - return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8')) + index_path = (os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8') + try: + return NSIndex.read(index_path) + except RuntimeError as re: + assert str(re) == 'hashindex_read failed' # everything else means we're in *deep* trouble + # corrupted index file, need to replay segments + os.unlink(os.path.join(self.path, 'hints.%d' % transaction_id)) + os.unlink(os.path.join(self.path, 'index.%d' % transaction_id)) + if not auto_recover: + raise + self.prepare_txn(self.get_transaction_id()) + # don't leave an open transaction around + self.commit() + return self.open_index(self.get_transaction_id()) def prepare_txn(self, transaction_id, do_cleanup=True): self._active_txn = True @@ -247,15 +264,31 @@ class Repository: self._active_txn = False raise if not self.index or transaction_id is None: - self.index = self.open_index(transaction_id) + try: + self.index = self.open_index(transaction_id, False) + except RuntimeError: + self.check_transaction() + self.index = self.open_index(transaction_id, False) if transaction_id is None: self.segments = {} # XXX bad name: usage_count_of_segment_x = self.segments[x] self.compact = FreeSpace() # XXX bad name: freeable_space_of_segment_x = self.compact[x] else: if do_cleanup: self.io.cleanup(transaction_id) - with open(os.path.join(self.path, 'hints.%d' % transaction_id), 'rb') as fd: - hints = msgpack.unpack(fd) + try: + with open(os.path.join(self.path, 'hints.%d' % transaction_id), 'rb') as fd: + hints = msgpack.unpack(fd) + except (msgpack.UnpackException, msgpack.ExtraData, FileNotFoundError) as e: + # corrupted or deleted hints file, need to replay segments + if not isinstance(e, FileNotFoundError): + os.unlink(os.path.join(self.path, 'hints.%d' % transaction_id)) + # index must exist at this point + os.unlink(os.path.join(self.path, 'index.%d' % transaction_id)) + self.check_transaction() + self.prepare_txn(transaction_id) + return + except OSError as os_error: + raise InternalOSError from os_error if hints[b'version'] == 1: logger.debug('Upgrading from v1 hints.%d', transaction_id) self.segments = hints[b'segments'] diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 85f4af457..346711424 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -7,7 +7,7 @@ import tempfile from unittest.mock import patch from ..hashindex import NSIndex -from ..helpers import Location, IntegrityError +from ..helpers import Location, IntegrityError, InternalOSError from ..locking import UpgradableLock, LockFailed from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint from ..repository import Repository, LoggedIO, MAGIC @@ -270,6 +270,46 @@ class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase): assert segments_in_repository() == 6 +class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase): + def setUp(self): + super().setUp() + self.repository.put(b'00000000000000000000000000000000', b'foo') + self.repository.commit() + self.repository.close() + + def do_commit(self): + with self.repository: + self.repository.put(b'00000000000000000000000000000000', b'fox') + self.repository.commit() + + def test_corrupted_hints(self): + with open(os.path.join(self.repository.path, 'hints.0'), 'ab') as fp: + fp.write(b'123456789') + self.do_commit() + + def test_deleted_hints(self): + os.unlink(os.path.join(self.repository.path, 'hints.0')) + self.do_commit() + + def test_unreadable_hints(self): + hints = os.path.join(self.repository.path, 'hints.0') + os.unlink(hints) + os.mkdir(hints) + with self.assert_raises(InternalOSError): + self.do_commit() + + def test_index(self): + with open(os.path.join(self.repository.path, 'index.0'), 'wb') as fp: + fp.write(b'123456789') + self.do_commit() + + def test_index_outside_transaction(self): + with open(os.path.join(self.repository.path, 'index.0'), 'wb') as fp: + fp.write(b'123456789') + with self.repository: + assert len(self.repository) == 1 + + class RepositoryCheckTestCase(RepositoryTestCaseBase): def list_indices(self): From d979a84f3724522a80a7f0b19582ec40c0b8efbe Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Fri, 22 Apr 2016 11:40:16 +0200 Subject: [PATCH 23/28] Handle permission and similar errors on the index --- borg/hashindex.pyx | 10 ++++++++++ borg/repository.py | 22 ++++++++++++++-------- borg/testsuite/repository.py | 23 +++++++++++++++++------ 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index 459eed7b0..e55de7fe7 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -27,6 +27,14 @@ cdef extern from "_hashindex.c": uint32_t _le32toh(uint32_t v) +cdef extern from "errno.h": + int errno + + +cdef extern from "string.h": + char *strerror(int errnum) + + cdef _NoDefault = object() """ @@ -63,6 +71,8 @@ cdef class IndexBase: path = os.fsencode(path) self.index = hashindex_read(path) if not self.index: + if errno: + raise OSError(errno, strerror(errno), path) raise RuntimeError('hashindex_read failed') else: self.index = hashindex_init(capacity, self.key_size, self.value_size) diff --git a/borg/repository.py b/borg/repository.py index d59466358..05c0aa6f8 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -238,20 +238,24 @@ class Repository: def open_index(self, transaction_id, auto_recover=True): if transaction_id is None: return NSIndex() - index_path = (os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8') + index_path = os.path.join(self.path, 'index.%d' % transaction_id).encode('utf-8') try: return NSIndex.read(index_path) - except RuntimeError as re: - assert str(re) == 'hashindex_read failed' # everything else means we're in *deep* trouble + except RuntimeError as error: + assert str(error) == 'hashindex_read failed' # everything else means we're in *deep* trouble # corrupted index file, need to replay segments - os.unlink(os.path.join(self.path, 'hints.%d' % transaction_id)) - os.unlink(os.path.join(self.path, 'index.%d' % transaction_id)) + try: + os.unlink(index_path) + except OSError as e: + raise InternalOSError from e if not auto_recover: raise self.prepare_txn(self.get_transaction_id()) # don't leave an open transaction around self.commit() return self.open_index(self.get_transaction_id()) + except OSError as e: + raise InternalOSError from e def prepare_txn(self, transaction_id, do_cleanup=True): self._active_txn = True @@ -275,15 +279,17 @@ class Repository: else: if do_cleanup: self.io.cleanup(transaction_id) + hints_path = os.path.join(self.path, 'hints.%d' % transaction_id) + index_path = os.path.join(self.path, 'index.%d' % transaction_id) try: - with open(os.path.join(self.path, 'hints.%d' % transaction_id), 'rb') as fd: + with open(hints_path, 'rb') as fd: hints = msgpack.unpack(fd) except (msgpack.UnpackException, msgpack.ExtraData, FileNotFoundError) as e: # corrupted or deleted hints file, need to replay segments if not isinstance(e, FileNotFoundError): - os.unlink(os.path.join(self.path, 'hints.%d' % transaction_id)) + os.unlink(hints_path) # index must exist at this point - os.unlink(os.path.join(self.path, 'index.%d' % transaction_id)) + os.unlink(index_path) self.check_transaction() self.prepare_txn(transaction_id) return diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 346711424..6b758fb78 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -283,14 +283,18 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase): self.repository.commit() def test_corrupted_hints(self): - with open(os.path.join(self.repository.path, 'hints.0'), 'ab') as fp: - fp.write(b'123456789') + with open(os.path.join(self.repository.path, 'hints.0'), 'ab') as fd: + fd.write(b'123456789') self.do_commit() def test_deleted_hints(self): os.unlink(os.path.join(self.repository.path, 'hints.0')) self.do_commit() + def test_deleted_index(self): + os.unlink(os.path.join(self.repository.path, 'index.0')) + self.do_commit() + def test_unreadable_hints(self): hints = os.path.join(self.repository.path, 'hints.0') os.unlink(hints) @@ -299,16 +303,23 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase): self.do_commit() def test_index(self): - with open(os.path.join(self.repository.path, 'index.0'), 'wb') as fp: - fp.write(b'123456789') + with open(os.path.join(self.repository.path, 'index.0'), 'wb') as fd: + fd.write(b'123456789') self.do_commit() def test_index_outside_transaction(self): - with open(os.path.join(self.repository.path, 'index.0'), 'wb') as fp: - fp.write(b'123456789') + with open(os.path.join(self.repository.path, 'index.0'), 'wb') as fd: + fd.write(b'123456789') with self.repository: assert len(self.repository) == 1 + def test_unreadable_index(self): + index = os.path.join(self.repository.path, 'index.0') + os.unlink(index) + os.mkdir(index) + with self.assert_raises(InternalOSError): + self.do_commit() + class RepositoryCheckTestCase(RepositoryTestCaseBase): From 1f33861fd634f5f470947bb531f6471eb92dc025 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 24 Apr 2016 23:42:24 +0200 Subject: [PATCH 24/28] Repository: better error reporting for index/hints failures --- borg/hashindex.pyx | 8 +++++++- borg/helpers.py | 12 ++++++++++-- borg/repository.py | 10 +++++----- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index e55de7fe7..83b53807c 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- from collections import namedtuple +import locale import os cimport cython @@ -60,6 +61,11 @@ MAX_VALUE = _MAX_VALUE assert _MAX_VALUE % 2 == 1 + +def decoded_strerror(errno): + return strerror(errno).decode(locale.getpreferredencoding(), 'surrogateescape') + + @cython.internal cdef class IndexBase: cdef HashIndex *index @@ -72,7 +78,7 @@ cdef class IndexBase: self.index = hashindex_read(path) if not self.index: if errno: - raise OSError(errno, strerror(errno), path) + raise OSError(errno, decoded_strerror(errno), os.fsdecode(path)) raise RuntimeError('hashindex_read failed') else: self.index = hashindex_init(capacity, self.key_size, self.value_size) diff --git a/borg/helpers.py b/borg/helpers.py index 4fa4e4575..d93a1c3e7 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -65,8 +65,16 @@ class ErrorWithTraceback(Error): traceback = True -class InternalOSError(ErrorWithTraceback): - """Error while accessing repository / cache files""" +class InternalOSError(Error): + """Error while accessing repository: [Errno {}] {}: {}""" + + def __init__(self, os_error): + self.errno = os_error.errno + self.strerror = os_error.strerror + self.filename = os_error.filename + + def get_message(self): + return self.__doc__.format(self.errno, self.strerror, self.filename) class IntegrityError(ErrorWithTraceback): diff --git a/borg/repository.py b/borg/repository.py index 05c0aa6f8..eab6e1343 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -243,11 +243,11 @@ class Repository: return NSIndex.read(index_path) except RuntimeError as error: assert str(error) == 'hashindex_read failed' # everything else means we're in *deep* trouble - # corrupted index file, need to replay segments + logger.warning('Repository index missing or corrupted, trying to recover') try: os.unlink(index_path) except OSError as e: - raise InternalOSError from e + raise InternalOSError(e) from None if not auto_recover: raise self.prepare_txn(self.get_transaction_id()) @@ -255,7 +255,7 @@ class Repository: self.commit() return self.open_index(self.get_transaction_id()) except OSError as e: - raise InternalOSError from e + raise InternalOSError(e) from None def prepare_txn(self, transaction_id, do_cleanup=True): self._active_txn = True @@ -285,7 +285,7 @@ class Repository: with open(hints_path, 'rb') as fd: hints = msgpack.unpack(fd) except (msgpack.UnpackException, msgpack.ExtraData, FileNotFoundError) as e: - # corrupted or deleted hints file, need to replay segments + logger.warning('Repository hints file missing or corrupted, trying to recover') if not isinstance(e, FileNotFoundError): os.unlink(hints_path) # index must exist at this point @@ -294,7 +294,7 @@ class Repository: self.prepare_txn(transaction_id) return except OSError as os_error: - raise InternalOSError from os_error + raise InternalOSError(os_error) from None if hints[b'version'] == 1: logger.debug('Upgrading from v1 hints.%d', transaction_id) self.segments = hints[b'segments'] From d42f1e691ae20307eb2dfc4383bce8efcba81bf4 Mon Sep 17 00:00:00 2001 From: Ed Blackman Date: Fri, 27 May 2016 17:16:31 -0400 Subject: [PATCH 25/28] Break out remote log and stderr handling code to simplify testing May fix issue 1081, but that issue is hard to reproduce. --- borg/remote.py | 28 +++++++++++--------- borg/testsuite/repository.py | 51 ++++++++++++++---------------------- 2 files changed, 36 insertions(+), 43 deletions(-) diff --git a/borg/remote.py b/borg/remote.py index 099026628..c3f439639 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -298,18 +298,7 @@ class RemoteRepository: raise ConnectionClosed() data = data.decode('utf-8') for line in data.splitlines(keepends=True): - if line.startswith('$LOG '): - _, level, msg = line.split(' ', 2) - level = getattr(logging, level, logging.CRITICAL) # str -> int - if msg.startswith('Remote:'): - # server format: '$LOG Remote: ' - logging.log(level, msg.rstrip()) - else: - # server format '$LOG Remote: ' - logname, msg = msg.split(' ', 1) - logging.getLogger(logname).log(level, msg.rstrip()) - else: - sys.stderr.write("Remote: " + line) + handle_remote_line(line) if w: while not self.to_send and (calls or self.preload_ids) and len(waiting_for) < 100: if calls: @@ -394,6 +383,21 @@ class RemoteRepository: self.preload_ids += ids +def handle_remote_line(line): + if line.startswith('$LOG '): + _, level, msg = line.split(' ', 2) + level = getattr(logging, level, logging.CRITICAL) # str -> int + if msg.startswith('Remote:'): + # server format: '$LOG Remote: ' + logging.log(level, msg.rstrip()) + else: + # server format '$LOG Remote: ' + logname, msg = msg.split(' ', 1) + logging.getLogger(logname).log(level, msg.rstrip()) + else: + sys.stderr.write("Remote: " + line) + + class RepositoryNoCache: """A not caching Repository wrapper, passes through to repository. diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 85f4af457..df6f1372e 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -9,7 +9,7 @@ from unittest.mock import patch from ..hashindex import NSIndex from ..helpers import Location, IntegrityError from ..locking import UpgradableLock, LockFailed -from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint +from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint, handle_remote_line from ..repository import Repository, LoggedIO, MAGIC from . import BaseTestCase @@ -440,63 +440,51 @@ class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase): pass -class RemoteRepositoryLoggingStub(RemoteRepository): - """ run a remote command that just prints a logging-formatted message to - stderr, and stub out enough of RemoteRepository to avoid the resulting - exceptions """ - def __init__(self, *args, **kw): - self.msg = kw.pop('msg') - super().__init__(*args, **kw) - - def borg_cmd(self, cmd, testing): - return [sys.executable, '-c', 'import sys; print("{}", file=sys.stderr)'.format(self.msg), ] - - def __del__(self): - # clean up from exception without triggering assert - if self.p: - self.close() - - -class RemoteRepositoryLoggerTestCase(RepositoryTestCaseBase): +class RemoteLoggerTestCase(BaseTestCase): def setUp(self): - self.location = Location('__testsuite__:/doesntexist/repo') self.stream = io.StringIO() self.handler = logging.StreamHandler(self.stream) logging.getLogger().handlers[:] = [self.handler] logging.getLogger('borg.repository').handlers[:] = [] logging.getLogger('borg.repository.foo').handlers[:] = [] + # capture stderr + sys.stderr.flush() + self.old_stderr = sys.stderr + self.stderr = sys.stderr = io.StringIO() def tearDown(self): - pass + sys.stderr = self.old_stderr - def create_repository(self, msg): - try: - RemoteRepositoryLoggingStub(self.location, msg=msg) - except ConnectionClosedWithHint: - # stub is dumb, so this exception expected - pass + def test_stderr_messages(self): + handle_remote_line("unstructured stderr message") + self.assert_equal(self.stream.getvalue(), '') + # stderr messages don't get an implicit newline + self.assert_equal(self.stderr.getvalue(), 'Remote: unstructured stderr message') def test_old_format_messages(self): self.handler.setLevel(logging.DEBUG) logging.getLogger().setLevel(logging.DEBUG) - self.create_repository("$LOG INFO Remote: old format message") + handle_remote_line("$LOG INFO Remote: old format message") self.assert_equal(self.stream.getvalue(), 'Remote: old format message\n') + self.assert_equal(self.stderr.getvalue(), '') def test_new_format_messages(self): self.handler.setLevel(logging.DEBUG) logging.getLogger().setLevel(logging.DEBUG) - self.create_repository("$LOG INFO borg.repository Remote: new format message") + handle_remote_line("$LOG INFO borg.repository Remote: new format message") self.assert_equal(self.stream.getvalue(), 'Remote: new format message\n') + self.assert_equal(self.stderr.getvalue(), '') def test_remote_messages_screened(self): # default borg config for root logger self.handler.setLevel(logging.WARNING) logging.getLogger().setLevel(logging.WARNING) - self.create_repository("$LOG INFO borg.repository Remote: new format info message") + handle_remote_line("$LOG INFO borg.repository Remote: new format info message") self.assert_equal(self.stream.getvalue(), '') + self.assert_equal(self.stderr.getvalue(), '') def test_info_to_correct_local_child(self): logging.getLogger('borg.repository').setLevel(logging.INFO) @@ -514,7 +502,8 @@ class RemoteRepositoryLoggerTestCase(RepositoryTestCaseBase): foo_handler.setLevel(logging.INFO) logging.getLogger('borg.repository.foo').handlers[:] = [foo_handler] - self.create_repository("$LOG INFO borg.repository Remote: new format child message") + handle_remote_line("$LOG INFO borg.repository Remote: new format child message") self.assert_equal(foo_stream.getvalue(), '') self.assert_equal(child_stream.getvalue(), 'Remote: new format child message\n') self.assert_equal(self.stream.getvalue(), '') + self.assert_equal(self.stderr.getvalue(), '') From 7b564e518d0821a61a26505708a260ad01b974fc Mon Sep 17 00:00:00 2001 From: Ronny Pfannschmidt Date: Sat, 28 May 2016 23:51:13 +0200 Subject: [PATCH 26/28] obtain ioctl from sys/ioctl.h as per manpage fixed build failure on fedora --- borg/platform_linux.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/platform_linux.pyx b/borg/platform_linux.pyx index a1c549338..2d958e281 100644 --- a/borg/platform_linux.pyx +++ b/borg/platform_linux.pyx @@ -47,7 +47,7 @@ cdef extern from "linux/fs.h": int FS_APPEND_FL int FS_COMPR_FL -cdef extern from "stropts.h": +cdef extern from "sys/ioctl.h": int ioctl(int fildes, int request, ...) cdef extern from "string.h": From 9ebb37cab8ed0b09dc5907dfd8f109dd01024b7f Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 29 May 2016 18:51:09 +0200 Subject: [PATCH 27/28] testsuite/repository: fixup for 7a569bc --- borg/testsuite/repository.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 6b758fb78..eff532e1f 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -283,38 +283,38 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase): self.repository.commit() def test_corrupted_hints(self): - with open(os.path.join(self.repository.path, 'hints.0'), 'ab') as fd: + with open(os.path.join(self.repository.path, 'hints.1'), 'ab') as fd: fd.write(b'123456789') self.do_commit() def test_deleted_hints(self): - os.unlink(os.path.join(self.repository.path, 'hints.0')) + os.unlink(os.path.join(self.repository.path, 'hints.1')) self.do_commit() def test_deleted_index(self): - os.unlink(os.path.join(self.repository.path, 'index.0')) + os.unlink(os.path.join(self.repository.path, 'index.1')) self.do_commit() def test_unreadable_hints(self): - hints = os.path.join(self.repository.path, 'hints.0') + hints = os.path.join(self.repository.path, 'hints.1') os.unlink(hints) os.mkdir(hints) with self.assert_raises(InternalOSError): self.do_commit() def test_index(self): - with open(os.path.join(self.repository.path, 'index.0'), 'wb') as fd: + with open(os.path.join(self.repository.path, 'index.1'), 'wb') as fd: fd.write(b'123456789') self.do_commit() def test_index_outside_transaction(self): - with open(os.path.join(self.repository.path, 'index.0'), 'wb') as fd: + with open(os.path.join(self.repository.path, 'index.1'), 'wb') as fd: fd.write(b'123456789') with self.repository: assert len(self.repository) == 1 def test_unreadable_index(self): - index = os.path.join(self.repository.path, 'index.0') + index = os.path.join(self.repository.path, 'index.1') os.unlink(index) os.mkdir(index) with self.assert_raises(InternalOSError): From d1ce746a026a72bbf445f8615351e14a8c453f8f Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 29 May 2016 19:52:53 +0200 Subject: [PATCH 28/28] borg.hashindex: use PyErr_SetFromErrnoWithFilename instead of home-grown (i.e. not medical grade) OSError raising. --- borg/hashindex.pyx | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index 83b53807c..724f2ee84 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -5,6 +5,8 @@ import os cimport cython from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t +from libc.errno cimport errno +from cpython.exc cimport PyErr_SetFromErrnoWithFilename API_VERSION = 2 @@ -28,14 +30,6 @@ cdef extern from "_hashindex.c": uint32_t _le32toh(uint32_t v) -cdef extern from "errno.h": - int errno - - -cdef extern from "string.h": - char *strerror(int errnum) - - cdef _NoDefault = object() """ @@ -62,10 +56,6 @@ MAX_VALUE = _MAX_VALUE assert _MAX_VALUE % 2 == 1 -def decoded_strerror(errno): - return strerror(errno).decode(locale.getpreferredencoding(), 'surrogateescape') - - @cython.internal cdef class IndexBase: cdef HashIndex *index @@ -78,7 +68,8 @@ cdef class IndexBase: self.index = hashindex_read(path) if not self.index: if errno: - raise OSError(errno, decoded_strerror(errno), os.fsdecode(path)) + PyErr_SetFromErrnoWithFilename(OSError, path) + return raise RuntimeError('hashindex_read failed') else: self.index = hashindex_init(capacity, self.key_size, self.value_size)