diff --git a/attic/archive.py b/attic/archive.py index ea05b6e04..326240178 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -419,7 +419,10 @@ class ArchiveChecker: shutil.rmtree(self.tmpdir) def init_chunks(self): - self.chunks = ChunkIndex.create(os.path.join(self.tmpdir, 'chunks').encode('utf-8')) + # Explicity set the initial hash table capacity to avoid performance issues + # due to hash table "resonance" + capacity = int(len(self.repository) * 1.2) + self.chunks = ChunkIndex.create(os.path.join(self.tmpdir, 'chunks').encode('utf-8'), capacity=capacity) marker = None while True: result = self.repository.list(limit=10000, marker=marker) @@ -466,11 +469,11 @@ class ArchiveChecker: if not Manifest.MANIFEST_ID in self.chunks: self.manifest = self.rebuild_manifest() else: - self.manifest, _ = Manifest.load(repository) + self.manifest, _ = Manifest.load(repository, key=self.key) self.rebuild_chunks() self.verify_chunks() if not self.error_found: - self.report_progress('Archive consistency check complete, no errors found.') + self.report_progress('Archive consistency check complete, no problems found.') return self.repair or not self.error_found def verify_chunks(self): @@ -563,8 +566,9 @@ class ArchiveChecker: for item in unpacker: yield item - for name, info in list(self.manifest.archives.items()): - self.report_progress('Analyzing archive: ' + name) + num_archives = len(self.manifest.archives) + for i, (name, info) in enumerate(list(self.manifest.archives.items()), 1): + self.report_progress('Analyzing archive {} ({}/{})'.format(name, i, num_archives)) archive_id = info[b'id'] if not archive_id in self.chunks: self.report_progress('Archive metadata block is missing', error=True) diff --git a/attic/archiver.py b/attic/archiver.py index d7060e649..536a92900 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -64,7 +64,7 @@ class Archiver: """ repository = self.open_repository(args.repository) if args.repair: - while not os.environ.get('ATTIC_CHECK_I_KWOW_WHAT_I_AM_DOING'): + while not os.environ.get('ATTIC_CHECK_I_KNOW_WHAT_I_AM_DOING'): self.print_error("""Warning: 'check --repair' is an experimental feature that might result in data loss. diff --git a/attic/hashindex.pyx b/attic/hashindex.pyx index 6a359f2cf..743071248 100644 --- a/attic/hashindex.pyx +++ b/attic/hashindex.pyx @@ -37,8 +37,8 @@ cdef class IndexBase: raise Exception('hashindex_close failed') @classmethod - def create(cls, path): - index = hashindex_create(os.fsencode(path), 0, cls.key_size, cls.value_size) + def create(cls, path, capacity=0): + index = hashindex_create(os.fsencode(path), capacity, cls.key_size, cls.value_size) if not index: raise Exception('Failed to create %s' % path) hashindex_close(index) diff --git a/attic/helpers.py b/attic/helpers.py index 6646f785d..168a72f52 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -63,10 +63,11 @@ class Manifest: self.repository = repository @classmethod - def load(cls, repository): + def load(cls, repository, key=None): from .key import key_factory cdata = repository.get(cls.MANIFEST_ID) - key = key_factory(repository, cdata) + if not key: + key = key_factory(repository, cdata) manifest = cls(key, repository) data = key.decrypt(None, cdata) manifest.id = key.id_hash(data) diff --git a/attic/repository.py b/attic/repository.py index 5db0afa35..2f3fb366b 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -217,7 +217,6 @@ class Repository(object): else: current_index = None report_progress('No suitable index found', error=True) - progress_time = None for segment, filename in self.io.segment_iterator(): if segment > transaction_id: @@ -227,10 +226,6 @@ class Repository(object): else: report_progress('Uncommitted segment {} found'.format(segment), error=True) continue - if progress: - if int(time.time()) != progress_time: - progress_time = int(time.time()) - report_progress('Checking segment {}/{}'.format(segment, transaction_id)) try: objects = list(self.io.iter_objects(segment)) except (IntegrityError, struct.error): @@ -272,7 +267,7 @@ class Repository(object): if current_index and len(current_index) != len(self.index): report_progress('Index object count mismatch. {} != {}'.format(len(current_index), len(self.index)), error=True) if not error_found: - report_progress('Repository check complete, no errors found.') + report_progress('Repository check complete, no problems found.') if repair: self.write_index() else: diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 43c770be7..d16e1360d 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -42,7 +42,7 @@ class ArchiverTestCaseBase(AtticTestCase): prefix = '' def setUp(self): - os.environ['ATTIC_CHECK_I_KWOW_WHAT_I_AM_DOING'] = '1' + os.environ['ATTIC_CHECK_I_KNOW_WHAT_I_AM_DOING'] = '1' self.archiver = Archiver() self.tmpdir = tempfile.mkdtemp() self.repository_path = os.path.join(self.tmpdir, 'repository')