diff --git a/dedupestore/archive.py b/dedupestore/archive.py index 083530f3a..153cfe5ad 100644 --- a/dedupestore/archive.py +++ b/dedupestore/archive.py @@ -4,6 +4,7 @@ import logging import msgpack import os import stat +import sys import zlib from .cache import NS_ARCHIVES, NS_CHUNKS @@ -30,6 +31,9 @@ class Archive(object): if hashlib.sha256(data).digest() != id: raise Exception('Archive hash did not match') archive = msgpack.unpackb(zlib.decompress(data)) + version = archive.get('version') + if version != 1: + raise Exception('Archive version %r not supported' % version) self.items = archive['items'] self.name = archive['name'] self.chunks = archive['chunks'] @@ -38,7 +42,9 @@ class Archive(object): def save(self, name): archive = { + 'version': 1, 'name': name, + 'cmdline': ' '.join(sys.argv), 'ts': datetime.utcnow().isoformat(), 'items': self.items, 'chunks': self.chunks @@ -58,17 +64,15 @@ class Archive(object): return idx def stats(self, cache): - total_osize = 0 - total_csize = 0 - total_usize = 0 + osize = csize = usize = 0 for item in self.items: if item['type'] == 'FILE': - total_osize += item['size'] + osize += item['size'] for id, size in self.chunks: - total_csize += size + csize += size if self.cache.seen_chunk(id) == 1: - total_usize += size - return dict(osize=total_osize, csize=total_csize, usize=total_usize) + usize += size + return osize, csize, usize def list(self): for item in self.items: diff --git a/dedupestore/archiver.py b/dedupestore/archiver.py index 5b16a7863..77bad7d48 100644 --- a/dedupestore/archiver.py +++ b/dedupestore/archiver.py @@ -55,10 +55,10 @@ class Archiver(object): def do_info(self, args): store, cache = self.open_store(args.archive) archive = Archive(store, cache, args.archive.archive) - stats = archive.stats(cache) - print 'Original size:', pretty_size(stats['osize']) - print 'Compressed size:', pretty_size(stats['csize']) - print 'Unique data:', pretty_size(stats['usize']) + osize, csize, usize = archive.stats(cache) + print 'Original size:', pretty_size(osize) + print 'Compressed size:', pretty_size(csize) + print 'Unique data:', pretty_size(usize) return self.exit_code_from_logger() def run(self, args=None): diff --git a/dedupestore/cache.py b/dedupestore/cache.py index 6da6781c8..1debb260f 100644 --- a/dedupestore/cache.py +++ b/dedupestore/cache.py @@ -1,6 +1,7 @@ import hashlib -import os +import logging import msgpack +import os import zlib NS_ARCHIVES = 'A' @@ -29,7 +30,11 @@ class Cache(object): if hashlib.sha256(data).digest() != id: raise Exception('Cache hash did not match') data = msgpack.unpackb(zlib.decompress(data)) - if data['uuid'] != self.store.uuid: + version = data.get('version') + if version != 1: + logging.error('Unsupported cache version %r' % version) + return + if data['store'] != self.store.uuid: raise Exception('Cache UUID mismatch') self.chunkmap = data['chunkmap'] self.archives = data['archives'] @@ -38,6 +43,7 @@ class Cache(object): def init(self): """Initializes cache by fetching and reading all archive indicies """ + logging.info('Initialzing cache...') self.chunkmap = {} self.archives = {} self.tid = self.store.tid @@ -59,7 +65,8 @@ class Cache(object): def save(self): assert self.store.state == self.store.OPEN - data = {'uuid': self.store.uuid, + data = {'version': 1, + 'store': self.store.uuid, 'chunkmap': self.chunkmap, 'tid': self.store.tid, 'archives': self.archives} cachedir = os.path.dirname(self.path) @@ -77,11 +84,8 @@ class Cache(object): data = hashlib.sha256(data).digest() + data csize = len(data) self.store.put(NS_CHUNKS, id, data) - return self.init_chunk(id, csize)[1] - - def init_chunk(self, id, size): - self.chunkmap[id] = (1, size) - return id, size + self.chunkmap[id] = (1, csize) + return csize def seen_chunk(self, id): count, size = self.chunkmap.get(id, (0, 0))