From 252c1b9802e26be76350dc106cbf36746693c313 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Fri, 8 Apr 2016 15:38:13 +0200 Subject: [PATCH 1/5] Auto-recover from corrupted index/hints file(s) And don't swallow all OSErrors when creating archives. We need to work on that on a more general level... --- borg/hashindex.pyx | 2 +- borg/helpers.py | 4 +++ borg/repository.py | 47 ++++++++++++++++++++++++++++++------ borg/testsuite/repository.py | 42 +++++++++++++++++++++++++++++++- 4 files changed, 86 insertions(+), 9 deletions(-) diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index a99c0f602..459eed7b0 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -63,7 +63,7 @@ cdef class IndexBase: path = os.fsencode(path) self.index = hashindex_read(path) if not self.index: - raise Exception('hashindex_read failed') + raise RuntimeError('hashindex_read failed') else: self.index = hashindex_init(capacity, self.key_size, self.value_size) if not self.index: diff --git a/borg/helpers.py b/borg/helpers.py index 15c01bb7c..4fa4e4575 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -65,6 +65,10 @@ class ErrorWithTraceback(Error): traceback = True +class InternalOSError(ErrorWithTraceback): + """Error while accessing repository / cache files""" + + class IntegrityError(ErrorWithTraceback): """Data integrity error""" diff --git a/borg/repository.py b/borg/repository.py index 1620c8278..d59466358 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -15,7 +15,8 @@ from zlib import crc32 import msgpack from .constants import * # NOQA -from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, ProgressIndicatorPercent, bin_to_hex +from .helpers import Error, ErrorWithTraceback, IntegrityError, InternalOSError, Location, ProgressIndicatorPercent, \ + bin_to_hex from .hashindex import NSIndex from .locking import UpgradableLock, LockError, LockErrorT from .lrucache import LRUCache @@ -178,7 +179,7 @@ class Repository: else: return None - def get_transaction_id(self): + def check_transaction(self): index_transaction_id = self.get_index_transaction_id() segments_transaction_id = self.io.get_segments_transaction_id() if index_transaction_id is not None and segments_transaction_id is None: @@ -191,6 +192,9 @@ class Repository: else: replay_from = index_transaction_id self.replay_segments(replay_from, segments_transaction_id) + + def get_transaction_id(self): + self.check_transaction() return self.get_index_transaction_id() def break_lock(self): @@ -231,10 +235,23 @@ class Repository: self.write_index() self.rollback() - def open_index(self, transaction_id): + def open_index(self, transaction_id, auto_recover=True): if transaction_id is None: return NSIndex() - return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8')) + index_path = (os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8') + try: + return NSIndex.read(index_path) + except RuntimeError as re: + assert str(re) == 'hashindex_read failed' # everything else means we're in *deep* trouble + # corrupted index file, need to replay segments + os.unlink(os.path.join(self.path, 'hints.%d' % transaction_id)) + os.unlink(os.path.join(self.path, 'index.%d' % transaction_id)) + if not auto_recover: + raise + self.prepare_txn(self.get_transaction_id()) + # don't leave an open transaction around + self.commit() + return self.open_index(self.get_transaction_id()) def prepare_txn(self, transaction_id, do_cleanup=True): self._active_txn = True @@ -247,15 +264,31 @@ class Repository: self._active_txn = False raise if not self.index or transaction_id is None: - self.index = self.open_index(transaction_id) + try: + self.index = self.open_index(transaction_id, False) + except RuntimeError: + self.check_transaction() + self.index = self.open_index(transaction_id, False) if transaction_id is None: self.segments = {} # XXX bad name: usage_count_of_segment_x = self.segments[x] self.compact = FreeSpace() # XXX bad name: freeable_space_of_segment_x = self.compact[x] else: if do_cleanup: self.io.cleanup(transaction_id) - with open(os.path.join(self.path, 'hints.%d' % transaction_id), 'rb') as fd: - hints = msgpack.unpack(fd) + try: + with open(os.path.join(self.path, 'hints.%d' % transaction_id), 'rb') as fd: + hints = msgpack.unpack(fd) + except (msgpack.UnpackException, msgpack.ExtraData, FileNotFoundError) as e: + # corrupted or deleted hints file, need to replay segments + if not isinstance(e, FileNotFoundError): + os.unlink(os.path.join(self.path, 'hints.%d' % transaction_id)) + # index must exist at this point + os.unlink(os.path.join(self.path, 'index.%d' % transaction_id)) + self.check_transaction() + self.prepare_txn(transaction_id) + return + except OSError as os_error: + raise InternalOSError from os_error if hints[b'version'] == 1: logger.debug('Upgrading from v1 hints.%d', transaction_id) self.segments = hints[b'segments'] diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 85f4af457..346711424 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -7,7 +7,7 @@ import tempfile from unittest.mock import patch from ..hashindex import NSIndex -from ..helpers import Location, IntegrityError +from ..helpers import Location, IntegrityError, InternalOSError from ..locking import UpgradableLock, LockFailed from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint from ..repository import Repository, LoggedIO, MAGIC @@ -270,6 +270,46 @@ class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase): assert segments_in_repository() == 6 +class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase): + def setUp(self): + super().setUp() + self.repository.put(b'00000000000000000000000000000000', b'foo') + self.repository.commit() + self.repository.close() + + def do_commit(self): + with self.repository: + self.repository.put(b'00000000000000000000000000000000', b'fox') + self.repository.commit() + + def test_corrupted_hints(self): + with open(os.path.join(self.repository.path, 'hints.0'), 'ab') as fp: + fp.write(b'123456789') + self.do_commit() + + def test_deleted_hints(self): + os.unlink(os.path.join(self.repository.path, 'hints.0')) + self.do_commit() + + def test_unreadable_hints(self): + hints = os.path.join(self.repository.path, 'hints.0') + os.unlink(hints) + os.mkdir(hints) + with self.assert_raises(InternalOSError): + self.do_commit() + + def test_index(self): + with open(os.path.join(self.repository.path, 'index.0'), 'wb') as fp: + fp.write(b'123456789') + self.do_commit() + + def test_index_outside_transaction(self): + with open(os.path.join(self.repository.path, 'index.0'), 'wb') as fp: + fp.write(b'123456789') + with self.repository: + assert len(self.repository) == 1 + + class RepositoryCheckTestCase(RepositoryTestCaseBase): def list_indices(self): From d979a84f3724522a80a7f0b19582ec40c0b8efbe Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Fri, 22 Apr 2016 11:40:16 +0200 Subject: [PATCH 2/5] Handle permission and similar errors on the index --- borg/hashindex.pyx | 10 ++++++++++ borg/repository.py | 22 ++++++++++++++-------- borg/testsuite/repository.py | 23 +++++++++++++++++------ 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index 459eed7b0..e55de7fe7 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -27,6 +27,14 @@ cdef extern from "_hashindex.c": uint32_t _le32toh(uint32_t v) +cdef extern from "errno.h": + int errno + + +cdef extern from "string.h": + char *strerror(int errnum) + + cdef _NoDefault = object() """ @@ -63,6 +71,8 @@ cdef class IndexBase: path = os.fsencode(path) self.index = hashindex_read(path) if not self.index: + if errno: + raise OSError(errno, strerror(errno), path) raise RuntimeError('hashindex_read failed') else: self.index = hashindex_init(capacity, self.key_size, self.value_size) diff --git a/borg/repository.py b/borg/repository.py index d59466358..05c0aa6f8 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -238,20 +238,24 @@ class Repository: def open_index(self, transaction_id, auto_recover=True): if transaction_id is None: return NSIndex() - index_path = (os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8') + index_path = os.path.join(self.path, 'index.%d' % transaction_id).encode('utf-8') try: return NSIndex.read(index_path) - except RuntimeError as re: - assert str(re) == 'hashindex_read failed' # everything else means we're in *deep* trouble + except RuntimeError as error: + assert str(error) == 'hashindex_read failed' # everything else means we're in *deep* trouble # corrupted index file, need to replay segments - os.unlink(os.path.join(self.path, 'hints.%d' % transaction_id)) - os.unlink(os.path.join(self.path, 'index.%d' % transaction_id)) + try: + os.unlink(index_path) + except OSError as e: + raise InternalOSError from e if not auto_recover: raise self.prepare_txn(self.get_transaction_id()) # don't leave an open transaction around self.commit() return self.open_index(self.get_transaction_id()) + except OSError as e: + raise InternalOSError from e def prepare_txn(self, transaction_id, do_cleanup=True): self._active_txn = True @@ -275,15 +279,17 @@ class Repository: else: if do_cleanup: self.io.cleanup(transaction_id) + hints_path = os.path.join(self.path, 'hints.%d' % transaction_id) + index_path = os.path.join(self.path, 'index.%d' % transaction_id) try: - with open(os.path.join(self.path, 'hints.%d' % transaction_id), 'rb') as fd: + with open(hints_path, 'rb') as fd: hints = msgpack.unpack(fd) except (msgpack.UnpackException, msgpack.ExtraData, FileNotFoundError) as e: # corrupted or deleted hints file, need to replay segments if not isinstance(e, FileNotFoundError): - os.unlink(os.path.join(self.path, 'hints.%d' % transaction_id)) + os.unlink(hints_path) # index must exist at this point - os.unlink(os.path.join(self.path, 'index.%d' % transaction_id)) + os.unlink(index_path) self.check_transaction() self.prepare_txn(transaction_id) return diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 346711424..6b758fb78 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -283,14 +283,18 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase): self.repository.commit() def test_corrupted_hints(self): - with open(os.path.join(self.repository.path, 'hints.0'), 'ab') as fp: - fp.write(b'123456789') + with open(os.path.join(self.repository.path, 'hints.0'), 'ab') as fd: + fd.write(b'123456789') self.do_commit() def test_deleted_hints(self): os.unlink(os.path.join(self.repository.path, 'hints.0')) self.do_commit() + def test_deleted_index(self): + os.unlink(os.path.join(self.repository.path, 'index.0')) + self.do_commit() + def test_unreadable_hints(self): hints = os.path.join(self.repository.path, 'hints.0') os.unlink(hints) @@ -299,16 +303,23 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase): self.do_commit() def test_index(self): - with open(os.path.join(self.repository.path, 'index.0'), 'wb') as fp: - fp.write(b'123456789') + with open(os.path.join(self.repository.path, 'index.0'), 'wb') as fd: + fd.write(b'123456789') self.do_commit() def test_index_outside_transaction(self): - with open(os.path.join(self.repository.path, 'index.0'), 'wb') as fp: - fp.write(b'123456789') + with open(os.path.join(self.repository.path, 'index.0'), 'wb') as fd: + fd.write(b'123456789') with self.repository: assert len(self.repository) == 1 + def test_unreadable_index(self): + index = os.path.join(self.repository.path, 'index.0') + os.unlink(index) + os.mkdir(index) + with self.assert_raises(InternalOSError): + self.do_commit() + class RepositoryCheckTestCase(RepositoryTestCaseBase): From 1f33861fd634f5f470947bb531f6471eb92dc025 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 24 Apr 2016 23:42:24 +0200 Subject: [PATCH 3/5] Repository: better error reporting for index/hints failures --- borg/hashindex.pyx | 8 +++++++- borg/helpers.py | 12 ++++++++++-- borg/repository.py | 10 +++++----- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index e55de7fe7..83b53807c 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- from collections import namedtuple +import locale import os cimport cython @@ -60,6 +61,11 @@ MAX_VALUE = _MAX_VALUE assert _MAX_VALUE % 2 == 1 + +def decoded_strerror(errno): + return strerror(errno).decode(locale.getpreferredencoding(), 'surrogateescape') + + @cython.internal cdef class IndexBase: cdef HashIndex *index @@ -72,7 +78,7 @@ cdef class IndexBase: self.index = hashindex_read(path) if not self.index: if errno: - raise OSError(errno, strerror(errno), path) + raise OSError(errno, decoded_strerror(errno), os.fsdecode(path)) raise RuntimeError('hashindex_read failed') else: self.index = hashindex_init(capacity, self.key_size, self.value_size) diff --git a/borg/helpers.py b/borg/helpers.py index 4fa4e4575..d93a1c3e7 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -65,8 +65,16 @@ class ErrorWithTraceback(Error): traceback = True -class InternalOSError(ErrorWithTraceback): - """Error while accessing repository / cache files""" +class InternalOSError(Error): + """Error while accessing repository: [Errno {}] {}: {}""" + + def __init__(self, os_error): + self.errno = os_error.errno + self.strerror = os_error.strerror + self.filename = os_error.filename + + def get_message(self): + return self.__doc__.format(self.errno, self.strerror, self.filename) class IntegrityError(ErrorWithTraceback): diff --git a/borg/repository.py b/borg/repository.py index 05c0aa6f8..eab6e1343 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -243,11 +243,11 @@ class Repository: return NSIndex.read(index_path) except RuntimeError as error: assert str(error) == 'hashindex_read failed' # everything else means we're in *deep* trouble - # corrupted index file, need to replay segments + logger.warning('Repository index missing or corrupted, trying to recover') try: os.unlink(index_path) except OSError as e: - raise InternalOSError from e + raise InternalOSError(e) from None if not auto_recover: raise self.prepare_txn(self.get_transaction_id()) @@ -255,7 +255,7 @@ class Repository: self.commit() return self.open_index(self.get_transaction_id()) except OSError as e: - raise InternalOSError from e + raise InternalOSError(e) from None def prepare_txn(self, transaction_id, do_cleanup=True): self._active_txn = True @@ -285,7 +285,7 @@ class Repository: with open(hints_path, 'rb') as fd: hints = msgpack.unpack(fd) except (msgpack.UnpackException, msgpack.ExtraData, FileNotFoundError) as e: - # corrupted or deleted hints file, need to replay segments + logger.warning('Repository hints file missing or corrupted, trying to recover') if not isinstance(e, FileNotFoundError): os.unlink(hints_path) # index must exist at this point @@ -294,7 +294,7 @@ class Repository: self.prepare_txn(transaction_id) return except OSError as os_error: - raise InternalOSError from os_error + raise InternalOSError(os_error) from None if hints[b'version'] == 1: logger.debug('Upgrading from v1 hints.%d', transaction_id) self.segments = hints[b'segments'] From 9ebb37cab8ed0b09dc5907dfd8f109dd01024b7f Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 29 May 2016 18:51:09 +0200 Subject: [PATCH 4/5] testsuite/repository: fixup for 7a569bc --- borg/testsuite/repository.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 6b758fb78..eff532e1f 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -283,38 +283,38 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase): self.repository.commit() def test_corrupted_hints(self): - with open(os.path.join(self.repository.path, 'hints.0'), 'ab') as fd: + with open(os.path.join(self.repository.path, 'hints.1'), 'ab') as fd: fd.write(b'123456789') self.do_commit() def test_deleted_hints(self): - os.unlink(os.path.join(self.repository.path, 'hints.0')) + os.unlink(os.path.join(self.repository.path, 'hints.1')) self.do_commit() def test_deleted_index(self): - os.unlink(os.path.join(self.repository.path, 'index.0')) + os.unlink(os.path.join(self.repository.path, 'index.1')) self.do_commit() def test_unreadable_hints(self): - hints = os.path.join(self.repository.path, 'hints.0') + hints = os.path.join(self.repository.path, 'hints.1') os.unlink(hints) os.mkdir(hints) with self.assert_raises(InternalOSError): self.do_commit() def test_index(self): - with open(os.path.join(self.repository.path, 'index.0'), 'wb') as fd: + with open(os.path.join(self.repository.path, 'index.1'), 'wb') as fd: fd.write(b'123456789') self.do_commit() def test_index_outside_transaction(self): - with open(os.path.join(self.repository.path, 'index.0'), 'wb') as fd: + with open(os.path.join(self.repository.path, 'index.1'), 'wb') as fd: fd.write(b'123456789') with self.repository: assert len(self.repository) == 1 def test_unreadable_index(self): - index = os.path.join(self.repository.path, 'index.0') + index = os.path.join(self.repository.path, 'index.1') os.unlink(index) os.mkdir(index) with self.assert_raises(InternalOSError): From d1ce746a026a72bbf445f8615351e14a8c453f8f Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 29 May 2016 19:52:53 +0200 Subject: [PATCH 5/5] borg.hashindex: use PyErr_SetFromErrnoWithFilename instead of home-grown (i.e. not medical grade) OSError raising. --- borg/hashindex.pyx | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index 83b53807c..724f2ee84 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -5,6 +5,8 @@ import os cimport cython from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t +from libc.errno cimport errno +from cpython.exc cimport PyErr_SetFromErrnoWithFilename API_VERSION = 2 @@ -28,14 +30,6 @@ cdef extern from "_hashindex.c": uint32_t _le32toh(uint32_t v) -cdef extern from "errno.h": - int errno - - -cdef extern from "string.h": - char *strerror(int errnum) - - cdef _NoDefault = object() """ @@ -62,10 +56,6 @@ MAX_VALUE = _MAX_VALUE assert _MAX_VALUE % 2 == 1 -def decoded_strerror(errno): - return strerror(errno).decode(locale.getpreferredencoding(), 'surrogateescape') - - @cython.internal cdef class IndexBase: cdef HashIndex *index @@ -78,7 +68,8 @@ cdef class IndexBase: self.index = hashindex_read(path) if not self.index: if errno: - raise OSError(errno, decoded_strerror(errno), os.fsdecode(path)) + PyErr_SetFromErrnoWithFilename(OSError, path) + return raise RuntimeError('hashindex_read failed') else: self.index = hashindex_init(capacity, self.key_size, self.value_size)