From 14782a831ba0cdf8e42bfd0ad7970821cd5a09db Mon Sep 17 00:00:00 2001 From: Niklas Meinzer Date: Sun, 29 Oct 2017 16:12:16 +0100 Subject: [PATCH] prune: Show which rule was applied to keep archive Prune now shows for each kept archive: * Which rule is responsible for keeping this archive * How many archived have been kept by this rule so far Ref #2886 --- src/borg/archiver.py | 59 ++++++++++++------------ src/borg/helpers/misc.py | 32 ++++++++++--- src/borg/testsuite/archiver.py | 20 ++++----- src/borg/testsuite/helpers.py | 82 +++++++++++++++++++++------------- 4 files changed, 119 insertions(+), 74 deletions(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index b09490bf2..d678bb28c 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -49,7 +49,7 @@ from .helpers import PrefixSpec, SortBySpec, FilesCacheMode from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict -from .helpers import interval, prune_within, prune_split +from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS from .helpers import timestamp from .helpers import get_cache_dir from .helpers import Manifest, AI_HUMAN_SORT_KEYS @@ -1333,45 +1333,48 @@ class Archiver: # that is newer than a successfully completed backup - and killing the successful backup. archives = [arch for arch in archives_checkpoints if arch not in checkpoints] keep = [] + # collect the rule responsible for the keeping of each archive in this dict + # keys are archive ids, values are a tuple + # (, ) + kept_because = {} + + # find archives which need to be kept because of the keep-within rule if args.within: - keep += prune_within(archives, args.within) - if args.secondly: - keep += prune_split(archives, '%Y-%m-%d %H:%M:%S', args.secondly, keep) - if args.minutely: - keep += prune_split(archives, '%Y-%m-%d %H:%M', args.minutely, keep) - if args.hourly: - keep += prune_split(archives, '%Y-%m-%d %H', args.hourly, keep) - if args.daily: - keep += prune_split(archives, '%Y-%m-%d', args.daily, keep) - if args.weekly: - keep += prune_split(archives, '%G-%V', args.weekly, keep) - if args.monthly: - keep += prune_split(archives, '%Y-%m', args.monthly, keep) - if args.yearly: - keep += prune_split(archives, '%Y', args.yearly, keep) + keep += prune_within(archives, args.within, kept_because) + + # find archives which need to be kept because of the various time period rules + for rule in PRUNING_PATTERNS.keys(): + num = getattr(args, rule, None) + if num is not None: + keep += prune_split(archives, rule, num, kept_because) + to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints)) stats = Statistics() with Cache(repository, key, manifest, do_files=False, lock_wait=self.lock_wait) as cache: list_logger = logging.getLogger('borg.output.list') - if args.output_list: - # set up counters for the progress display - to_delete_len = len(to_delete) - archives_deleted = 0 + # set up counters for the progress display + to_delete_len = len(to_delete) + archives_deleted = 0 for archive in archives_checkpoints: if archive in to_delete: if args.dry_run: - if args.output_list: - list_logger.info('Would prune: %s' % format_archive(archive)) + log_message = 'Would prune:' else: - if args.output_list: - archives_deleted += 1 - list_logger.info('Pruning archive: %s (%d/%d)' % (format_archive(archive), - archives_deleted, to_delete_len)) + archives_deleted += 1 + log_message = 'Pruning archive (%d/%d):' % (archives_deleted, to_delete_len) Archive(repository, key, manifest, archive.name, cache, progress=args.progress).delete(stats, forced=args.forced) else: - if args.output_list: - list_logger.info('Keeping archive: %s' % format_archive(archive)) + if is_checkpoint(archive.name): + log_message = 'Keeping checkpoint archive:' + else: + log_message = 'Keeping archive (rule: {rule} #{num}):'.format( + rule=kept_because[archive.id][0], num=kept_because[archive.id][1] + ) + if args.output_list: + list_logger.info("{message:<40} {archive}".format( + message=log_message, archive=format_archive(archive) + )) if to_delete and not args.dry_run: manifest.write() repository.commit(save_space=args.save_space) diff --git a/src/borg/helpers/misc.py b/src/borg/helpers/misc.py index e33b46f0b..b0fbb203e 100644 --- a/src/borg/helpers/misc.py +++ b/src/borg/helpers/misc.py @@ -4,7 +4,7 @@ import os import os.path import platform import sys -from collections import deque +from collections import deque, OrderedDict from datetime import datetime, timezone, timedelta from itertools import islice from operator import attrgetter @@ -17,22 +17,44 @@ from .. import __version__ as borg_version from .. import chunker -def prune_within(archives, hours): +def prune_within(archives, hours, kept_because): target = datetime.now(timezone.utc) - timedelta(seconds=hours * 3600) - return [a for a in archives if a.ts > target] + kept_counter = 0 + result = [] + for a in archives: + if a.ts > target: + kept_counter += 1 + kept_because[a.id] = ("within", kept_counter) + result.append(a) + return result -def prune_split(archives, pattern, n, skip=[]): +PRUNING_PATTERNS = OrderedDict([ + ("secondly", '%Y-%m-%d %H:%M:%S'), + ("minutely", '%Y-%m-%d %H:%M'), + ("hourly", '%Y-%m-%d %H'), + ("daily", '%Y-%m-%d'), + ("weekly", '%G-%V'), + ("monthly", '%Y-%m'), + ("yearly", '%Y'), +]) + + +def prune_split(archives, rule, n, kept_because=None): last = None keep = [] + pattern = PRUNING_PATTERNS[rule] + if kept_because is None: + kept_because = {} if n == 0: return keep for a in sorted(archives, key=attrgetter('ts'), reverse=True): period = to_localtime(a.ts).strftime(pattern) if period != last: last = period - if a not in skip: + if a.id not in kept_because: keep.append(a) + kept_because[a.id] = (rule, len(keep)) if len(keep) == n: break return keep diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index c3a666320..29fc50f73 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -6,6 +6,7 @@ import logging import os import pstats import random +import re import shutil import socket import stat @@ -1731,12 +1732,11 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir) self.cmd('create', self.repository_location + '::test4.checkpoint', src_dir) output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2') - self.assert_in('Keeping archive: test2', output) - self.assert_in('Would prune: test1', output) + assert re.search(r'Would prune:\s+test1', output) # must keep the latest non-checkpoint archive: - self.assert_in('Keeping archive: test2', output) + assert re.search(r'Keeping archive \(rule: daily #1\):\s+test2', output) # must keep the latest checkpoint archive: - self.assert_in('Keeping archive: test4.checkpoint', output) + assert re.search(r'Keeping checkpoint archive:\s+test4.checkpoint', output) output = self.cmd('list', self.repository_location) self.assert_in('test1', output) self.assert_in('test2', output) @@ -1766,8 +1766,8 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('create', self.repository_location + '::test1', src_dir) self.cmd('create', self.repository_location + '::test2', src_dir) output = self.cmd('prune', '--list', '--stats', '--dry-run', self.repository_location, '--keep-daily=2') - self.assert_in('Keeping archive: test2', output) - self.assert_in('Would prune: test1', output) + assert re.search(r'Keeping archive \(rule: daily #1\):\s+test2', output) + assert re.search(r'Would prune:\s+test1', output) self.assert_in('Deleted data:', output) output = self.cmd('list', self.repository_location) self.assert_in('test1', output) @@ -1784,8 +1784,8 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('create', self.repository_location + '::bar-2015-08-12-10:00', src_dir) self.cmd('create', self.repository_location + '::bar-2015-08-12-20:00', src_dir) output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--prefix=foo-') - self.assert_in('Keeping archive: foo-2015-08-12-20:00', output) - self.assert_in('Would prune: foo-2015-08-12-10:00', output) + assert re.search(r'Keeping archive \(rule: daily #1\):\s+foo-2015-08-12-20:00', output) + assert re.search(r'Would prune:\s+foo-2015-08-12-10:00', output) output = self.cmd('list', self.repository_location) self.assert_in('foo-2015-08-12-10:00', output) self.assert_in('foo-2015-08-12-20:00', output) @@ -1805,8 +1805,8 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('create', self.repository_location + '::2015-08-12-10:00-bar', src_dir) self.cmd('create', self.repository_location + '::2015-08-12-20:00-bar', src_dir) output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--glob-archives=2015-*-foo') - self.assert_in('Keeping archive: 2015-08-12-20:00-foo', output) - self.assert_in('Would prune: 2015-08-12-10:00-foo', output) + assert re.search(r'Keeping archive \(rule: daily #1\):\s+2015-08-12-20:00-foo', output) + assert re.search(r'Would prune:\s+2015-08-12-10:00-foo', output) output = self.cmd('list', self.repository_location) self.assert_in('2015-08-12-10:00-foo', output) self.assert_in('2015-08-12-20:00-foo', output) diff --git a/src/borg/testsuite/helpers.py b/src/borg/testsuite/helpers.py index 15c029f5c..8d191974e 100644 --- a/src/borg/testsuite/helpers.py +++ b/src/borg/testsuite/helpers.py @@ -1,11 +1,10 @@ import hashlib -import io import os import shutil import sys from argparse import ArgumentTypeError from datetime import datetime, timezone, timedelta -from time import mktime, strptime, sleep +from time import sleep import pytest @@ -333,40 +332,56 @@ class MakePathSafeTestCase(BaseTestCase): class MockArchive: - def __init__(self, ts): + def __init__(self, ts, id): self.ts = ts + self.id = id def __repr__(self): - return repr(self.ts) + return "{0}: {1}".format(self.id, self.ts.isoformat()) -class PruneSplitTestCase(BaseTestCase): +@pytest.mark.parametrize( + "rule,num_to_keep,expected_ids", [ + ("yearly", 3, (13, 2, 1)), + ("monthly", 3, (13, 8, 4)), + ("weekly", 2, (13, 8)), + ("daily", 3, (13, 8, 7)), + ("hourly", 3, (13, 10, 8)), + ("minutely", 3, (13, 10, 9)), + ("secondly", 4, (13, 12, 11, 10)), + ("daily", 0, []), + ] +) +def test_prune_split(rule, num_to_keep, expected_ids): + def subset(lst, ids): + return {i for i in lst if i.id in ids} - def test(self): + archives = [ + # years apart + MockArchive(datetime(2015, 1, 1, 10, 0, 0, tzinfo=timezone.utc), 1), + MockArchive(datetime(2016, 1, 1, 10, 0, 0, tzinfo=timezone.utc), 2), + MockArchive(datetime(2017, 1, 1, 10, 0, 0, tzinfo=timezone.utc), 3), + # months apart + MockArchive(datetime(2017, 2, 1, 10, 0, 0, tzinfo=timezone.utc), 4), + MockArchive(datetime(2017, 3, 1, 10, 0, 0, tzinfo=timezone.utc), 5), + # days apart + MockArchive(datetime(2017, 3, 2, 10, 0, 0, tzinfo=timezone.utc), 6), + MockArchive(datetime(2017, 3, 3, 10, 0, 0, tzinfo=timezone.utc), 7), + MockArchive(datetime(2017, 3, 4, 10, 0, 0, tzinfo=timezone.utc), 8), + # minutes apart + MockArchive(datetime(2017, 10, 1, 9, 45, 0, tzinfo=timezone.utc), 9), + MockArchive(datetime(2017, 10, 1, 9, 55, 0, tzinfo=timezone.utc), 10), + # seconds apart + MockArchive(datetime(2017, 10, 1, 10, 0, 1, tzinfo=timezone.utc), 11), + MockArchive(datetime(2017, 10, 1, 10, 0, 3, tzinfo=timezone.utc), 12), + MockArchive(datetime(2017, 10, 1, 10, 0, 5, tzinfo=timezone.utc), 13), + ] + kept_because = {} + keep = prune_split(archives, rule, num_to_keep, kept_because) - def local_to_UTC(month, day): - """Convert noon on the month and day in 2013 to UTC.""" - seconds = mktime(strptime('2013-%02d-%02d 12:00' % (month, day), '%Y-%m-%d %H:%M')) - return datetime.fromtimestamp(seconds, tz=timezone.utc) - - def subset(lst, indices): - return {lst[i] for i in indices} - - def dotest(test_archives, n, skip, indices): - for ta in test_archives, reversed(test_archives): - self.assert_equal(set(prune_split(ta, '%Y-%m', n, skip)), - subset(test_archives, indices)) - - test_pairs = [(1, 1), (2, 1), (2, 28), (3, 1), (3, 2), (3, 31), (5, 1)] - test_dates = [local_to_UTC(month, day) for month, day in test_pairs] - test_archives = [MockArchive(date) for date in test_dates] - - dotest(test_archives, 3, [], [6, 5, 2]) - dotest(test_archives, -1, [], [6, 5, 2, 0]) - dotest(test_archives, 3, [test_archives[6]], [5, 2, 0]) - dotest(test_archives, 3, [test_archives[5]], [6, 2, 0]) - dotest(test_archives, 3, [test_archives[4]], [6, 5, 2]) - dotest(test_archives, 0, [], []) + assert set(keep) == subset(archives, expected_ids) + for item in keep: + assert kept_because[item.id][0] == rule class IntervalTestCase(BaseTestCase): @@ -410,14 +425,19 @@ class PruneWithinTestCase(BaseTestCase): def dotest(test_archives, within, indices): for ta in test_archives, reversed(test_archives): - self.assert_equal(set(prune_within(ta, interval(within))), + kept_because = {} + keep = prune_within(ta, interval(within), kept_because) + self.assert_equal(set(keep), subset(test_archives, indices)) + assert all("within" == kept_because[a.id][0] for a in keep) # 1 minute, 1.5 hours, 2.5 hours, 3.5 hours, 25 hours, 49 hours test_offsets = [60, 90*60, 150*60, 210*60, 25*60*60, 49*60*60] now = datetime.now(timezone.utc) test_dates = [now - timedelta(seconds=s) for s in test_offsets] - test_archives = [MockArchive(date) for date in test_dates] + test_archives = [ + MockArchive(date, i) for i, date in enumerate(test_dates) + ] dotest(test_archives, '1H', [0]) dotest(test_archives, '2H', [0, 1])