diff --git a/darc/archive.py b/darc/archive.py index a4ceb0791..c36203854 100644 --- a/darc/archive.py +++ b/darc/archive.py @@ -55,6 +55,9 @@ class Archive(object): t, f = self.metadata['time'].split('.', 1) return datetime.strptime(t, '%Y-%m-%dT%H:%M:%S') + timedelta(seconds=float('.' + f)) + def __repr__(self): + return 'Archive(%r)' % self.name + def iter_items(self, callback): unpacker = msgpack.Unpacker() counter = Counter(0) diff --git a/darc/archiver.py b/darc/archiver.py index 96cee3f9e..28f24c5c6 100644 --- a/darc/archiver.py +++ b/darc/archiver.py @@ -1,5 +1,5 @@ import argparse -from datetime import datetime, date, timedelta +from datetime import datetime, timedelta from operator import attrgetter import os import stat @@ -9,9 +9,9 @@ from .archive import Archive from .store import Store from .cache import Cache from .key import Key -from .helpers import location_validator, format_file_size, format_time,\ +from .helpers import location_validator, format_time, \ format_file_mode, IncludePattern, ExcludePattern, exclude_path, to_localtime, \ - get_cache_dir, day_of_year, format_timedelta + get_cache_dir, format_timedelta, Purger from .remote import StoreServer, RemoteStore class Archiver(object): @@ -85,7 +85,7 @@ class Archiver(object): print 'Archive fingerprint: %s' % archive.hash.encode('hex') print 'Start time: %s' % t0.strftime('%c') print 'End time: %s' % t.strftime('%c') - print 'Duration: %.2f (%s)' % (diff.total_seconds(), format_timedelta(diff)) + print 'Duration: %s' % format_timedelta(diff) archive.stats.print_() print '-' * 40 return self.exit_code @@ -235,48 +235,57 @@ class Archiver(object): cache = Cache(store, key) archives = list(sorted(Archive.list_archives(store, key, cache), key=attrgetter('ts'), reverse=True)) - num_daily = args.daily - num_weekly = args.weekly - num_monthly = args.monthly - num_yearly = args.yearly + daily = [] + weekly = [] + monthly = [] + yearly = [] if args.daily + args.weekly + args.monthly + args.yearly == 0: self.print_error('At least one of the "daily", "weekly", "monthly" or "yearly" ' 'settings must be specified') return 1 - t0 = date.today() + timedelta(days=1) # Tomorrow - daily = weekly = monthly = yearly = 0 + + if args.prefix: + archives = [archive for archive in archives if archive.name.startswith(args.prefix)] + purger = Purger() for archive in archives: - if args.prefix and not archive.name.startswith(args.prefix): - continue - t = to_localtime(archive.ts).date() - if daily < args.daily and t < t0: - daily += 1 - self.print_verbose('Archive "%s" is daily archive number %d', - archive.name, daily) - t0 = t - elif weekly < args.weekly and t < t0 and t.weekday() == 1: - weekly += 1 - self.print_verbose('Archive "%s" is weekly archive number %d', - archive.name, weekly) - t0 = t - elif monthly < args.monthly and t < t0 and t.day == 1: - monthly += 1 - self.print_verbose('Archive "%s" is monthly archive number %d', - archive.name, monthly) - t0 = t - elif yearly < args.yearly and t < t0 and day_of_year(t) == 1: - yearly += 1 - self.print_verbose('Archive "%s" is yearly archive number %d', - archive.name, yearly) - t0 = t + purger.insert(to_localtime(archive.ts).date(), archive) + archives, to_delete = purger.purge(len(purger.items)) + if args.yearly: + purger = Purger() + for archive in archives: + purger.insert(to_localtime(archive.ts).strftime('%Y'), archive) + yearly, archives = purger.purge(args.yearly) + if args.monthly: + purger = Purger() + for archive in archives: + purger.insert(to_localtime(archive.ts).strftime('%Y-%m'), archive) + monthly, archives = purger.purge(args.monthly) + if args.weekly: + purger = Purger() + for archive in archives: + purger.insert(to_localtime(archive.ts).strftime('%Y-%V'), archive) + weekly, archives = purger.purge(args.weekly) + if args.daily: + daily = archives[-args.daily:] + archives = archives[:-args.daily] + to_delete += archives + + for i, archive in enumerate(yearly): + self.print_verbose('Keeping "%s" as yearly archive %d' % (archive.name, i + 1)) + for i, archive in enumerate(monthly): + self.print_verbose('Keeping "%s" as monthly archive %d' % (archive.name, i + 1)) + for i, archive in enumerate(weekly): + self.print_verbose('Keeping "%s" as weekly archive %d' % (archive.name, i + 1)) + for i, archive in enumerate(daily): + self.print_verbose('Keeping "%s" as daily archive %d' % (archive.name, i + 1)) + for archive in to_delete: + if args.really: + self.print_verbose('Purging archive "%s"', archive.name) + archive.delete(cache) else: - self.print_verbose('Purging archive %s', archive.name) - if args.really: - archive.delete(cache) - else: - print ('Archive "%s" marked for deletion. ' - 'Use the "--really" option to actually delete it' - % archive.metadata['name']) + print ('Archive "%s" marked for deletion. ' + 'Use the "--really" option to actually delete it' + % archive.metadata['name']) return self.exit_code def run(self, args=None): diff --git a/darc/helpers.py b/darc/helpers.py index 6f83ba8e4..1bd202753 100644 --- a/darc/helpers.py +++ b/darc/helpers.py @@ -2,6 +2,7 @@ from __future__ import with_statement import argparse from datetime import datetime, timedelta from fnmatch import fnmatchcase +from operator import attrgetter import grp import os import pwd @@ -13,6 +14,30 @@ import time import urllib +class Purger(object): + """Purging helper""" + + def __init__(self): + self.items = {} + + def insert(self, key, value): + self.items.setdefault(key, []) + self.items[key].append(value) + + def purge(self, n, reverse=False): + keep = [] + delete = [] + for key, values in sorted(self.items.items(), reverse=reverse): + if n: + values.sort(key=attrgetter('ts'), reverse=reverse) + keep.append(values[0]) + delete += values[1:] + n -= 1 + else: + delete += values + return keep, delete + + class Statistics(object): def __init__(self): @@ -30,10 +55,6 @@ class Statistics(object): print 'Compressed size: %s (%s)'% (self.csize, format_file_size(self.csize)) print 'Unique data: %d (%s)' % (self.usize, format_file_size(self.usize)) -def day_of_year(d): - """Calculate the "day of year" from a date object""" - return int(d.strftime('%j')) - # OSX filenames are UTF-8 Only so any non-utf8 filenames are url encoded if sys.platform == 'darwin':