attic check performance improvement and minor progress reporting changes

This commit is contained in:
Jonas Borgström 2014-02-17 18:25:25 +01:00
parent c394a31d62
commit 893120e609
6 changed files with 17 additions and 17 deletions

View file

@ -419,7 +419,10 @@ class ArchiveChecker:
shutil.rmtree(self.tmpdir)
def init_chunks(self):
self.chunks = ChunkIndex.create(os.path.join(self.tmpdir, 'chunks').encode('utf-8'))
# Explicity set the initial hash table capacity to avoid performance issues
# due to hash table "resonance"
capacity = int(len(self.repository) * 1.2)
self.chunks = ChunkIndex.create(os.path.join(self.tmpdir, 'chunks').encode('utf-8'), capacity=capacity)
marker = None
while True:
result = self.repository.list(limit=10000, marker=marker)
@ -466,11 +469,11 @@ class ArchiveChecker:
if not Manifest.MANIFEST_ID in self.chunks:
self.manifest = self.rebuild_manifest()
else:
self.manifest, _ = Manifest.load(repository)
self.manifest, _ = Manifest.load(repository, key=self.key)
self.rebuild_chunks()
self.verify_chunks()
if not self.error_found:
self.report_progress('Archive consistency check complete, no errors found.')
self.report_progress('Archive consistency check complete, no problems found.')
return self.repair or not self.error_found
def verify_chunks(self):
@ -563,8 +566,9 @@ class ArchiveChecker:
for item in unpacker:
yield item
for name, info in list(self.manifest.archives.items()):
self.report_progress('Analyzing archive: ' + name)
num_archives = len(self.manifest.archives)
for i, (name, info) in enumerate(list(self.manifest.archives.items()), 1):
self.report_progress('Analyzing archive {} ({}/{})'.format(name, i, num_archives))
archive_id = info[b'id']
if not archive_id in self.chunks:
self.report_progress('Archive metadata block is missing', error=True)

View file

@ -64,7 +64,7 @@ class Archiver:
"""
repository = self.open_repository(args.repository)
if args.repair:
while not os.environ.get('ATTIC_CHECK_I_KWOW_WHAT_I_AM_DOING'):
while not os.environ.get('ATTIC_CHECK_I_KNOW_WHAT_I_AM_DOING'):
self.print_error("""Warning: 'check --repair' is an experimental feature that might result
in data loss.

View file

@ -37,8 +37,8 @@ cdef class IndexBase:
raise Exception('hashindex_close failed')
@classmethod
def create(cls, path):
index = hashindex_create(<bytes>os.fsencode(path), 0, cls.key_size, cls.value_size)
def create(cls, path, capacity=0):
index = hashindex_create(<bytes>os.fsencode(path), capacity, cls.key_size, cls.value_size)
if not index:
raise Exception('Failed to create %s' % path)
hashindex_close(index)

View file

@ -63,10 +63,11 @@ class Manifest:
self.repository = repository
@classmethod
def load(cls, repository):
def load(cls, repository, key=None):
from .key import key_factory
cdata = repository.get(cls.MANIFEST_ID)
key = key_factory(repository, cdata)
if not key:
key = key_factory(repository, cdata)
manifest = cls(key, repository)
data = key.decrypt(None, cdata)
manifest.id = key.id_hash(data)

View file

@ -217,7 +217,6 @@ class Repository(object):
else:
current_index = None
report_progress('No suitable index found', error=True)
progress_time = None
for segment, filename in self.io.segment_iterator():
if segment > transaction_id:
@ -227,10 +226,6 @@ class Repository(object):
else:
report_progress('Uncommitted segment {} found'.format(segment), error=True)
continue
if progress:
if int(time.time()) != progress_time:
progress_time = int(time.time())
report_progress('Checking segment {}/{}'.format(segment, transaction_id))
try:
objects = list(self.io.iter_objects(segment))
except (IntegrityError, struct.error):
@ -272,7 +267,7 @@ class Repository(object):
if current_index and len(current_index) != len(self.index):
report_progress('Index object count mismatch. {} != {}'.format(len(current_index), len(self.index)), error=True)
if not error_found:
report_progress('Repository check complete, no errors found.')
report_progress('Repository check complete, no problems found.')
if repair:
self.write_index()
else:

View file

@ -42,7 +42,7 @@ class ArchiverTestCaseBase(AtticTestCase):
prefix = ''
def setUp(self):
os.environ['ATTIC_CHECK_I_KWOW_WHAT_I_AM_DOING'] = '1'
os.environ['ATTIC_CHECK_I_KNOW_WHAT_I_AM_DOING'] = '1'
self.archiver = Archiver()
self.tmpdir = tempfile.mkdtemp()
self.repository_path = os.path.join(self.tmpdir, 'repository')