From faedaf8160a5487a6074faccb9f5a9793bd229a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Borgstr=C3=B6m?= Date: Tue, 4 Feb 2014 23:49:10 +0100 Subject: [PATCH] Basic repository consistency check functionality. Still no archive metadata validation or repair functionality. --- CHANGES | 8 ++++++ attic/archiver.py | 31 +++++++++++++++++++++- attic/hashindex.pyx | 4 +++ attic/helpers.py | 9 +++++++ attic/remote.py | 3 +++ attic/repository.py | 38 +++++++++++++++++++++++++++ attic/testsuite/archiver.py | 2 ++ attic/testsuite/repository.py | 48 +++++++++++++++++++++++++++++++++++ docs/update_usage.sh | 2 +- docs/usage.rst | 6 +++++ docs/usage/check.rst.inc | 28 ++++++++++++++++++++ 11 files changed, 177 insertions(+), 2 deletions(-) create mode 100644 docs/usage/check.rst.inc diff --git a/CHANGES b/CHANGES index 243e02989..d0e6fe8e6 100644 --- a/CHANGES +++ b/CHANGES @@ -3,6 +3,14 @@ Attic Changelog Here you can see the full list of changes between each Attic release. +Version 0.11 +------------ + +(feature release, released on X) + +- New "check" command for repository consistency checking (#24) +- Documentation improvements + Version 0.10 ------------ diff --git a/attic/archiver.py b/attic/archiver.py index a95a44df3..4946ceb3d 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -13,7 +13,7 @@ from attic.cache import Cache from attic.key import key_creator from attic.helpers import Error, location_validator, format_time, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \ - get_cache_dir, get_keys_dir, format_timedelta, prune_split, Manifest, remove_surrogates + get_cache_dir, get_keys_dir, format_timedelta, prune_split, Manifest, remove_surrogates, is_a_terminal from attic.remote import RepositoryServer, RemoteRepository @@ -59,6 +59,17 @@ class Archiver: repository.commit() return self.exit_code + def do_check(self, args): + """Check repository consistency + """ + repository = self.open_repository(args.repository) + if args.progress is None: + args.progress = is_a_terminal(sys.stdout) or args.verbose + if not repository.check(progress=args.progress): + if args.progress: + print('No problems found', file=sys.stderr) + return self.exit_code + def do_change_passphrase(self, args): """Change repository key file passphrase """ @@ -360,6 +371,24 @@ class Archiver: choices=('none', 'passphrase', 'keyfile'), default='none', help='select encryption method') + check_epilog = """ + Progress status will be reported on the standard output stream by default when + it is attached to a terminal. Any problems found are printed to the standard error + stream and the command will have a non zero exit code. + """ + subparser = subparsers.add_parser('check', parents=[common_parser], + description=self.do_check.__doc__, + epilog=check_epilog) + subparser.set_defaults(func=self.do_check) + subparser.add_argument('repository', metavar='REPOSITORY', + type=location_validator(archive=False), + help='repository to check consistency of') + subparser.add_argument('--progress', dest='progress', action='store_true', + default=None, + help='Report progress status to standard output stream') + subparser.add_argument('--no-progress', dest='progress', action='store_false', + help='Disable progress reporting') + subparser = subparsers.add_parser('change-passphrase', parents=[common_parser], description=self.do_change_passphrase.__doc__) subparser.set_defaults(func=self.do_change_passphrase) diff --git a/attic/hashindex.pyx b/attic/hashindex.pyx index 69c185ed4..e5c8aede0 100644 --- a/attic/hashindex.pyx +++ b/attic/hashindex.pyx @@ -107,11 +107,13 @@ cdef class NSIndex(IndexBase): def iteritems(self, marker=None, limit=0): iter = NSKeyIterator() + iter.idx = self iter.index = self.index return iter cdef class NSKeyIterator: + cdef NSIndex idx cdef HashIndex *index cdef char *key @@ -156,11 +158,13 @@ cdef class ChunkIndex(IndexBase): def iteritems(self, marker=None, limit=0): iter = ChunkKeyIterator() + iter.idx = self iter.index = self.index return iter cdef class ChunkKeyIterator: + cdef ChunkIndex idx cdef HashIndex *index cdef char *key diff --git a/attic/helpers.py b/attic/helpers.py index c5a7ca125..427428519 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -431,6 +431,15 @@ def daemonize(): os.dup2(fd, 2) +def is_a_terminal(fd): + """Determine if `fd` is associated with a terminal or not + """ + try: + os.ttyname(fd.fileno()) + return True + except: + return False + if sys.version < '3.3': # st_mtime_ns attribute only available in 3.3+ def st_mtime_ns(st): diff --git a/attic/remote.py b/attic/remote.py index f818563d8..df5c6a990 100644 --- a/attic/remote.py +++ b/attic/remote.py @@ -178,6 +178,9 @@ class RemoteRepository(object): w_fds = [] self.ignore_responses |= set(waiting_for) + def check(self, progress=False): + return self.call('check', progress) + def commit(self, *args): return self.call('commit') diff --git a/attic/repository.py b/attic/repository.py index 6555e9a8a..37f09929c 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -5,6 +5,7 @@ import os import re import shutil import struct +import sys from zlib import crc32 from .hashindex import NSIndex @@ -198,6 +199,41 @@ class Repository(object): if self.io.head is not None: self.write_index() + def check(self, progress=False): + """Check repository consistency + + This method verifies all segment checksums and makes sure + the index is consistent with the data stored in the segments. + """ + error_found = False + def report_error(msg): + nonlocal error_found + error_found = True + print(msg, file=sys.stderr) + seen = set() + for segment, filename in self.io._segment_names(): + if progress: + print('Checking segment {}/{}'.format(segment, self.io.head)) + try: + objects = list(self.io.iter_objects(segment)) + except (IntegrityError, struct.error): + report_error('Error reading segment {}'.format(segment)) + objects = [] + for tag, key, offset in objects: + if tag == TAG_PUT: + if key in seen: + report_error('Key found in more than one segment. Segment={}, key={}'.format(segment, hexlify(key))) + seen.add(key) + if self.index.get(key, (0, 0)) != (segment, offset): + report_error('Index vs segment header mismatch. Segment={}, key={}'.format(segment, hexlify(key))) + elif tag == TAG_COMMIT: + continue + else: + raise self.RepositoryCheckFailed(self.path, 'Unexpected tag {} in segment {}'.format(tag, segment)) + if len(self.index) != len(seen): + report_error('Index object count mismatch. {} != {}'.format(len(self.index), len(seen))) + return not error_found + def rollback(self): """ """ @@ -309,6 +345,8 @@ class LoggedIO(object): """ self.head = None self.segment = 0 + # FIXME: Only delete segments if we're sure there's at least + # one complete segment somewhere for segment, filename in self._segment_names(reverse=True): if self.is_complete_segment(filename): self.head = segment diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index e09747a9c..b3d5ca8aa 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -205,12 +205,14 @@ class ArchiverTestCase(AtticTestCase): self.attic('init', self.repository_location) self.create_src_archive('test') self.attic('verify', self.repository_location + '::test') + self.attic('check', self.repository_location) name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0] fd = open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+') fd.seek(100) fd.write('XXXX') fd.close() self.attic('verify', self.repository_location + '::test', exit_code=1) + self.attic('check', self.repository_location, exit_code=1) def test_readonly_repository(self): self.attic('init', self.repository_location) diff --git a/attic/testsuite/repository.py b/attic/testsuite/repository.py index 33d0cde28..46a8420ef 100644 --- a/attic/testsuite/repository.py +++ b/attic/testsuite/repository.py @@ -102,7 +102,55 @@ class RepositoryTestCase(AtticTestCase): self.repository.commit() +class RepositoryCheckTestCase(AtticTestCase): + + def open(self, create=False): + return Repository(os.path.join(self.tmppath, 'repository'), create=create) + + def setUp(self): + self.tmppath = tempfile.mkdtemp() + self.repository = self.open(create=True) + + def tearDown(self): + self.repository.close() + shutil.rmtree(self.tmppath) + + def add_objects(self, ids): + for id_ in ids: + self.repository.put(('%032d' % id_).encode('ascii'), b'data') + self.repository.commit() + + def open_index(self): + head = sorted(int(n[6:]) for n in os.listdir(os.path.join(self.tmppath, 'repository')) if n.startswith('index') and n[6:].isdigit())[0] + return NSIndex(os.path.join(self.tmppath, 'repository', 'index.{}'.format(head))) + + def corrupt_object(self, id_): + idx = self.open_index() + segment, offset = idx[('%032d' % id_).encode('ascii')] + with open(os.path.join(self.tmppath, 'repository', 'data', '0', str(segment)), 'r+b') as fd: + fd.seek(offset) + fd.write(b'BOOM') + + def list_objects(self): + return set((int(key) for key, _ in list(self.open_index().iteritems()))) + + def test_check(self): + self.add_objects([1, 2, 3]) + self.add_objects([4, 5, 6]) + self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects()) + self.assert_equal(True, self.repository.check()) + self.corrupt_object(5) + self.assert_equal(False, self.repository.check()) + self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects()) + + class RemoteRepositoryTestCase(RepositoryTestCase): def open(self, create=False): return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create) + + +class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase): + + def open(self, create=False): + return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create) diff --git a/docs/update_usage.sh b/docs/update_usage.sh index 5a5eed31e..307d5ba79 100755 --- a/docs/update_usage.sh +++ b/docs/update_usage.sh @@ -2,7 +2,7 @@ if [ ! -d usage ]; then mkdir usage fi -for cmd in change-passphrase create delete extract info init list mount prune verify; do +for cmd in change-passphrase check create delete extract info init list mount prune verify; do FILENAME="usage/$cmd.rst.inc" LINE=`echo -n attic $cmd | tr 'a-z- ' '-'` echo -e ".. _attic_$cmd:\n" > $FILENAME diff --git a/docs/usage.rst b/docs/usage.rst index e7b134afc..9d5a97446 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -93,6 +93,12 @@ not corrupt. |project_name| will not compare the the archived files with the files on disk. +.. include:: usage/check.rst.inc + +The check command verifies the consistency of a repository. Any inconsistencies +found are reported to the standard error stream and the command will have a +non zero exit code. + .. include:: usage/delete.rst.inc This command deletes an archive from the repository. Any disk space not diff --git a/docs/usage/check.rst.inc b/docs/usage/check.rst.inc new file mode 100644 index 000000000..08fd36193 --- /dev/null +++ b/docs/usage/check.rst.inc @@ -0,0 +1,28 @@ +.. _attic_check: + +attic check +----------- +:: + + + usage: attic check [-h] [-v] [--progress] [--no-progress] REPOSITORY + + Check repository consistency + + positional arguments: + REPOSITORY repository to check consistency of + + optional arguments: + -h, --help show this help message and exit + -v, --verbose verbose output + --progress Report progress status to standard output stream + --no-progress Disable progress reporting + + Progress status will be reported on the standard output stream by default when + it is attached to a terminal. Any problems found are printed to the standard + error stream and the command will have a non zero exit code. + +Description +~~~~~~~~~~~ + +