From 8616df7f32f077fe2a3cd343555687433820788f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Borgstr=C3=B6m?= Date: Sun, 7 Aug 2011 17:10:21 +0200 Subject: [PATCH] Added creation time stats output using the --stats flag --- darc/archive.py | 30 ++++++++++++++---------------- darc/archiver.py | 22 +++++++++++++++++----- darc/cache.py | 8 +++++--- darc/helpers.py | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 24 deletions(-) diff --git a/darc/archive.py b/darc/archive.py index 3707d2f69..d2f0602ff 100644 --- a/darc/archive.py +++ b/darc/archive.py @@ -12,8 +12,9 @@ from xattr import xattr, XATTR_NOFOLLOW from . import NS_ARCHIVE_METADATA, NS_CHUNK from ._speedups import chunkify from .helpers import uid2user, user2uid, gid2group, group2gid, IntegrityError, \ - Counter, encode_filename + Counter, encode_filename, Statistics +ITEMS_BUFFER = 1024 * 1024 CHUNK_SIZE = 64 * 1024 WINDOW_SIZE = 4096 @@ -33,6 +34,7 @@ class Archive(object): self.items = StringIO() self.items_ids = [] self.hard_links = {} + self.stats = Statistics() if name: self.load(self.key.archive_hash(name)) @@ -74,7 +76,7 @@ class Archive(object): def add_item(self, item): self.items.write(msgpack.packb(item)) - if self.items.tell() > 1024 * 1024: + if self.items.tell() > ITEMS_BUFFER: self.flush_items() def flush_items(self, flush=False): @@ -85,9 +87,11 @@ class Archive(object): self.items.seek(0) self.items.truncate() for chunk in chunks[:-1]: - self.items_ids.append(self.cache.add_chunk(self.key.id_hash(chunk), chunk)) + self.items_ids.append(self.cache.add_chunk(self.key.id_hash(chunk), + chunk, self.stats)) if flush or len(chunks) == 1: - self.items_ids.append(self.cache.add_chunk(self.key.id_hash(chunks[-1]), chunks[-1])) + self.items_ids.append(self.cache.add_chunk(self.key.id_hash(chunks[-1]), + chunks[-1], self.stats)) else: self.items.write(chunks[-1]) @@ -108,7 +112,7 @@ class Archive(object): self.store.commit() cache.commit() - def stats(self, cache): + def calc_stats(self, cache): # This function is a bit evil since it abuses the cache to calculate # the stats. The cache transaction must be rolled back afterwards def cb(chunk, error, id): @@ -120,21 +124,15 @@ class Archive(object): try: for id, size, csize in item['chunks']: count, _, _ = self.cache.chunks[id] - stats['osize'] += size - stats['csize'] += csize - if count == 1: - stats['usize'] += csize + stats.update(size, csize, count==1) self.cache.chunks[id] = count - 1, size, csize except KeyError: pass unpacker = msgpack.Unpacker() cache.begin_txn() - stats = {'osize': 0, 'csize': 0, 'usize': 0} + stats = Statistics() for id, size, csize in self.metadata['items']: - stats['osize'] += size - stats['csize'] += csize - if self.cache.seen_chunk(id) == 1: - stats['usize'] += csize + stats.update(size, csize, self.cache.seen_chunk(id) == 1) self.store.get(NS_CHUNK, id, callback=cb, callback_data=id) self.cache.chunk_decref(id) self.store.flush_rpc() @@ -323,14 +321,14 @@ class Archive(object): if not cache.seen_chunk(id): break else: - chunks = [cache.chunk_incref(id) for id in ids] + chunks = [cache.chunk_incref(id, self.stats) for id in ids] # Only chunkify the file if needed if chunks is None: with open(path, 'rb') as fd: chunks = [] for chunk in chunkify(fd, CHUNK_SIZE, WINDOW_SIZE, self.key.chunk_seed): - chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk)) + chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats)) ids = [id for id, _, _ in chunks] cache.memorize_file(path_hash, st, ids) item = {'path': safe_path, 'chunks': chunks} diff --git a/darc/archiver.py b/darc/archiver.py index 2c538135f..61a493c5e 100644 --- a/darc/archiver.py +++ b/darc/archiver.py @@ -11,7 +11,7 @@ from .cache import Cache from .key import Key from .helpers import location_validator, format_file_size, format_time,\ format_file_mode, IncludePattern, ExcludePattern, exclude_path, to_localtime, \ - get_cache_dir, day_of_year + get_cache_dir, day_of_year, format_timedelta from .remote import StoreServer, RemoteStore class Archiver(object): @@ -48,6 +48,7 @@ class Archiver(object): return self.exit_code def do_create(self, args): + t0 = datetime.now() store = self.open_store(args.archive) key = Key(store) try: @@ -76,6 +77,16 @@ class Archiver(object): for path in args.paths: self._process(archive, cache, args.patterns, skip_inodes, path) archive.save(args.archive.archive, cache) + if args.stats: + t = datetime.now() + diff = t - t0 + print '-' * 40 + print 'Archive name: %s' % args.archive.archive + print 'Start time: %s' % t0.strftime('%c') + print 'End time: %s' % t.strftime('%c') + print 'Duration: %.2f (%s)' % (diff.total_seconds(), format_timedelta(diff)) + archive.stats.print_() + print '-' * 40 return self.exit_code def _process(self, archive, cache, patterns, skip_inodes, path): @@ -204,15 +215,13 @@ class Archiver(object): key = Key(store) cache = Cache(store, key) archive = Archive(store, key, args.archive.archive, cache=cache) - stats = archive.stats(cache) + stats = archive.calc_stats(cache) print 'Name:', archive.metadata['name'] print 'Hostname:', archive.metadata['hostname'] print 'Username:', archive.metadata['username'] print 'Time:', archive.metadata['time'] print 'Command line:', ' '.join(archive.metadata['cmdline']) - print 'Original size:', format_file_size(stats['osize']) - print 'Compressed size:', format_file_size(stats['csize']) - print 'Unique data:', format_file_size(stats['usize']) + stats.print_() return self.exit_code def do_purge(self, args): @@ -291,6 +300,9 @@ class Archiver(object): subparser = subparsers.add_parser('create') subparser.set_defaults(func=self.do_create) + subparser.add_argument('-s', '--stats', dest='stats', + action='store_true', default=False, + help='Print statistics for the created archive') subparser.add_argument('-i', '--include', dest='patterns', type=IncludePattern, action='append', help='Include condition') diff --git a/darc/cache.py b/darc/cache.py index 709e099fe..ee03d6012 100644 --- a/darc/cache.py +++ b/darc/cache.py @@ -152,26 +152,28 @@ class Cache(object): self.store.get(NS_CHUNK, id, callback=cb, callback_data=id) self.store.flush_rpc() - def add_chunk(self, id, data): + def add_chunk(self, id, data, stats): if not self.txn_active: self.begin_txn() if self.seen_chunk(id): - return self.chunk_incref(id) + return self.chunk_incref(id, stats) size = len(data) data, hash = self.key.encrypt(data) csize = len(data) self.store.put(NS_CHUNK, id, data, callback=error_callback) self.chunks[id] = (1, size, csize) + stats.update(size, csize, True) return id, size, csize def seen_chunk(self, id): return self.chunks.get(id, (0, 0, 0))[0] - def chunk_incref(self, id): + def chunk_incref(self, id, stats): if not self.txn_active: self.begin_txn() count, size, csize = self.chunks[id] self.chunks[id] = (count + 1, size, csize) + stats.update(size, csize, False) return id, size, csize def chunk_decref(self, id): diff --git a/darc/helpers.py b/darc/helpers.py index 06a6dfea7..a4b51514f 100644 --- a/darc/helpers.py +++ b/darc/helpers.py @@ -13,6 +13,22 @@ import time import urllib +class Statistics(object): + + def __init__(self): + self.osize = self.csize = self.usize = 0 + + def update(self, size, csize, unique): + self.osize += size + self.csize += csize + if unique: + self.usize += csize + + def print_(self): + print 'Original size: %d (%s)' % (self.osize, format_file_size(self.osize)) + print 'Compressed size: %s (%s)'% (self.csize, format_file_size(self.csize)) + print 'Unique data: %d (%s)' % (self.usize, format_file_size(self.usize)) + def day_of_year(d): """Calculate the "day of year" from a date object""" return int(d.strftime('%j')) @@ -194,6 +210,22 @@ def format_time(t): return t.strftime('%b %d %Y') +def format_timedelta(td): + """Format timedelta in a human friendly format""" + ts = td.total_seconds() + s = ts % 60 + m = int(ts / 60) % 60 + h = int(ts / 3600) % 24 + txt = '%.2f seconds' % s + if m: + txt = '%d minutes %s' % (m, txt) + if h: + txt = '%d hours %s' % (h, txt) + if td.days: + txt = '%d days %s' % (td.days, txt) + return txt + + def format_file_mode(mod): """Format file mode bits for list output """