Added creation time stats output using the --stats flag

This commit is contained in:
Jonas Borgström 2011-08-07 17:10:21 +02:00
parent 9feef66d4e
commit 8616df7f32
4 changed files with 68 additions and 24 deletions

View file

@ -12,8 +12,9 @@ from xattr import xattr, XATTR_NOFOLLOW
from . import NS_ARCHIVE_METADATA, NS_CHUNK
from ._speedups import chunkify
from .helpers import uid2user, user2uid, gid2group, group2gid, IntegrityError, \
Counter, encode_filename
Counter, encode_filename, Statistics
ITEMS_BUFFER = 1024 * 1024
CHUNK_SIZE = 64 * 1024
WINDOW_SIZE = 4096
@ -33,6 +34,7 @@ class Archive(object):
self.items = StringIO()
self.items_ids = []
self.hard_links = {}
self.stats = Statistics()
if name:
self.load(self.key.archive_hash(name))
@ -74,7 +76,7 @@ class Archive(object):
def add_item(self, item):
self.items.write(msgpack.packb(item))
if self.items.tell() > 1024 * 1024:
if self.items.tell() > ITEMS_BUFFER:
self.flush_items()
def flush_items(self, flush=False):
@ -85,9 +87,11 @@ class Archive(object):
self.items.seek(0)
self.items.truncate()
for chunk in chunks[:-1]:
self.items_ids.append(self.cache.add_chunk(self.key.id_hash(chunk), chunk))
self.items_ids.append(self.cache.add_chunk(self.key.id_hash(chunk),
chunk, self.stats))
if flush or len(chunks) == 1:
self.items_ids.append(self.cache.add_chunk(self.key.id_hash(chunks[-1]), chunks[-1]))
self.items_ids.append(self.cache.add_chunk(self.key.id_hash(chunks[-1]),
chunks[-1], self.stats))
else:
self.items.write(chunks[-1])
@ -108,7 +112,7 @@ class Archive(object):
self.store.commit()
cache.commit()
def stats(self, cache):
def calc_stats(self, cache):
# This function is a bit evil since it abuses the cache to calculate
# the stats. The cache transaction must be rolled back afterwards
def cb(chunk, error, id):
@ -120,21 +124,15 @@ class Archive(object):
try:
for id, size, csize in item['chunks']:
count, _, _ = self.cache.chunks[id]
stats['osize'] += size
stats['csize'] += csize
if count == 1:
stats['usize'] += csize
stats.update(size, csize, count==1)
self.cache.chunks[id] = count - 1, size, csize
except KeyError:
pass
unpacker = msgpack.Unpacker()
cache.begin_txn()
stats = {'osize': 0, 'csize': 0, 'usize': 0}
stats = Statistics()
for id, size, csize in self.metadata['items']:
stats['osize'] += size
stats['csize'] += csize
if self.cache.seen_chunk(id) == 1:
stats['usize'] += csize
stats.update(size, csize, self.cache.seen_chunk(id) == 1)
self.store.get(NS_CHUNK, id, callback=cb, callback_data=id)
self.cache.chunk_decref(id)
self.store.flush_rpc()
@ -323,14 +321,14 @@ class Archive(object):
if not cache.seen_chunk(id):
break
else:
chunks = [cache.chunk_incref(id) for id in ids]
chunks = [cache.chunk_incref(id, self.stats) for id in ids]
# Only chunkify the file if needed
if chunks is None:
with open(path, 'rb') as fd:
chunks = []
for chunk in chunkify(fd, CHUNK_SIZE, WINDOW_SIZE,
self.key.chunk_seed):
chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk))
chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats))
ids = [id for id, _, _ in chunks]
cache.memorize_file(path_hash, st, ids)
item = {'path': safe_path, 'chunks': chunks}

View file

@ -11,7 +11,7 @@ from .cache import Cache
from .key import Key
from .helpers import location_validator, format_file_size, format_time,\
format_file_mode, IncludePattern, ExcludePattern, exclude_path, to_localtime, \
get_cache_dir, day_of_year
get_cache_dir, day_of_year, format_timedelta
from .remote import StoreServer, RemoteStore
class Archiver(object):
@ -48,6 +48,7 @@ class Archiver(object):
return self.exit_code
def do_create(self, args):
t0 = datetime.now()
store = self.open_store(args.archive)
key = Key(store)
try:
@ -76,6 +77,16 @@ class Archiver(object):
for path in args.paths:
self._process(archive, cache, args.patterns, skip_inodes, path)
archive.save(args.archive.archive, cache)
if args.stats:
t = datetime.now()
diff = t - t0
print '-' * 40
print 'Archive name: %s' % args.archive.archive
print 'Start time: %s' % t0.strftime('%c')
print 'End time: %s' % t.strftime('%c')
print 'Duration: %.2f (%s)' % (diff.total_seconds(), format_timedelta(diff))
archive.stats.print_()
print '-' * 40
return self.exit_code
def _process(self, archive, cache, patterns, skip_inodes, path):
@ -204,15 +215,13 @@ class Archiver(object):
key = Key(store)
cache = Cache(store, key)
archive = Archive(store, key, args.archive.archive, cache=cache)
stats = archive.stats(cache)
stats = archive.calc_stats(cache)
print 'Name:', archive.metadata['name']
print 'Hostname:', archive.metadata['hostname']
print 'Username:', archive.metadata['username']
print 'Time:', archive.metadata['time']
print 'Command line:', ' '.join(archive.metadata['cmdline'])
print 'Original size:', format_file_size(stats['osize'])
print 'Compressed size:', format_file_size(stats['csize'])
print 'Unique data:', format_file_size(stats['usize'])
stats.print_()
return self.exit_code
def do_purge(self, args):
@ -291,6 +300,9 @@ class Archiver(object):
subparser = subparsers.add_parser('create')
subparser.set_defaults(func=self.do_create)
subparser.add_argument('-s', '--stats', dest='stats',
action='store_true', default=False,
help='Print statistics for the created archive')
subparser.add_argument('-i', '--include', dest='patterns',
type=IncludePattern, action='append',
help='Include condition')

View file

@ -152,26 +152,28 @@ class Cache(object):
self.store.get(NS_CHUNK, id, callback=cb, callback_data=id)
self.store.flush_rpc()
def add_chunk(self, id, data):
def add_chunk(self, id, data, stats):
if not self.txn_active:
self.begin_txn()
if self.seen_chunk(id):
return self.chunk_incref(id)
return self.chunk_incref(id, stats)
size = len(data)
data, hash = self.key.encrypt(data)
csize = len(data)
self.store.put(NS_CHUNK, id, data, callback=error_callback)
self.chunks[id] = (1, size, csize)
stats.update(size, csize, True)
return id, size, csize
def seen_chunk(self, id):
return self.chunks.get(id, (0, 0, 0))[0]
def chunk_incref(self, id):
def chunk_incref(self, id, stats):
if not self.txn_active:
self.begin_txn()
count, size, csize = self.chunks[id]
self.chunks[id] = (count + 1, size, csize)
stats.update(size, csize, False)
return id, size, csize
def chunk_decref(self, id):

View file

@ -13,6 +13,22 @@ import time
import urllib
class Statistics(object):
def __init__(self):
self.osize = self.csize = self.usize = 0
def update(self, size, csize, unique):
self.osize += size
self.csize += csize
if unique:
self.usize += csize
def print_(self):
print 'Original size: %d (%s)' % (self.osize, format_file_size(self.osize))
print 'Compressed size: %s (%s)'% (self.csize, format_file_size(self.csize))
print 'Unique data: %d (%s)' % (self.usize, format_file_size(self.usize))
def day_of_year(d):
"""Calculate the "day of year" from a date object"""
return int(d.strftime('%j'))
@ -194,6 +210,22 @@ def format_time(t):
return t.strftime('%b %d %Y')
def format_timedelta(td):
"""Format timedelta in a human friendly format"""
ts = td.total_seconds()
s = ts % 60
m = int(ts / 60) % 60
h = int(ts / 3600) % 24
txt = '%.2f seconds' % s
if m:
txt = '%d minutes %s' % (m, txt)
if h:
txt = '%d hours %s' % (h, txt)
if td.days:
txt = '%d days %s' % (td.days, txt)
return txt
def format_file_mode(mod):
"""Format file mode bits for list output
"""