Merge pull request #3235 from NiklasMM/feat/2886_prune-show-which-rule-keeps-archive

prune: Show which rule was applied to keep archive
This commit is contained in:
TW 2017-11-01 13:40:17 +01:00 committed by GitHub
commit 4a58310433
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 119 additions and 74 deletions

View file

@ -49,7 +49,7 @@ from .helpers import PrefixSpec, SortBySpec, FilesCacheMode
from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict
from .helpers import interval, prune_within, prune_split
from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS
from .helpers import timestamp
from .helpers import get_cache_dir
from .helpers import Manifest, AI_HUMAN_SORT_KEYS
@ -1333,45 +1333,48 @@ class Archiver:
# that is newer than a successfully completed backup - and killing the successful backup.
archives = [arch for arch in archives_checkpoints if arch not in checkpoints]
keep = []
# collect the rule responsible for the keeping of each archive in this dict
# keys are archive ids, values are a tuple
# (<rulename>, <how many archives were kept by this rule so far >)
kept_because = {}
# find archives which need to be kept because of the keep-within rule
if args.within:
keep += prune_within(archives, args.within)
if args.secondly:
keep += prune_split(archives, '%Y-%m-%d %H:%M:%S', args.secondly, keep)
if args.minutely:
keep += prune_split(archives, '%Y-%m-%d %H:%M', args.minutely, keep)
if args.hourly:
keep += prune_split(archives, '%Y-%m-%d %H', args.hourly, keep)
if args.daily:
keep += prune_split(archives, '%Y-%m-%d', args.daily, keep)
if args.weekly:
keep += prune_split(archives, '%G-%V', args.weekly, keep)
if args.monthly:
keep += prune_split(archives, '%Y-%m', args.monthly, keep)
if args.yearly:
keep += prune_split(archives, '%Y', args.yearly, keep)
keep += prune_within(archives, args.within, kept_because)
# find archives which need to be kept because of the various time period rules
for rule in PRUNING_PATTERNS.keys():
num = getattr(args, rule, None)
if num is not None:
keep += prune_split(archives, rule, num, kept_because)
to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
stats = Statistics()
with Cache(repository, key, manifest, do_files=False, lock_wait=self.lock_wait) as cache:
list_logger = logging.getLogger('borg.output.list')
if args.output_list:
# set up counters for the progress display
to_delete_len = len(to_delete)
archives_deleted = 0
# set up counters for the progress display
to_delete_len = len(to_delete)
archives_deleted = 0
for archive in archives_checkpoints:
if archive in to_delete:
if args.dry_run:
if args.output_list:
list_logger.info('Would prune: %s' % format_archive(archive))
log_message = 'Would prune:'
else:
if args.output_list:
archives_deleted += 1
list_logger.info('Pruning archive: %s (%d/%d)' % (format_archive(archive),
archives_deleted, to_delete_len))
archives_deleted += 1
log_message = 'Pruning archive (%d/%d):' % (archives_deleted, to_delete_len)
Archive(repository, key, manifest, archive.name, cache,
progress=args.progress).delete(stats, forced=args.forced)
else:
if args.output_list:
list_logger.info('Keeping archive: %s' % format_archive(archive))
if is_checkpoint(archive.name):
log_message = 'Keeping checkpoint archive:'
else:
log_message = 'Keeping archive (rule: {rule} #{num}):'.format(
rule=kept_because[archive.id][0], num=kept_because[archive.id][1]
)
if args.output_list:
list_logger.info("{message:<40} {archive}".format(
message=log_message, archive=format_archive(archive)
))
if to_delete and not args.dry_run:
manifest.write()
repository.commit(save_space=args.save_space)

View file

@ -4,7 +4,7 @@ import os
import os.path
import platform
import sys
from collections import deque
from collections import deque, OrderedDict
from datetime import datetime, timezone, timedelta
from itertools import islice
from operator import attrgetter
@ -17,22 +17,44 @@ from .. import __version__ as borg_version
from .. import chunker
def prune_within(archives, hours):
def prune_within(archives, hours, kept_because):
target = datetime.now(timezone.utc) - timedelta(seconds=hours * 3600)
return [a for a in archives if a.ts > target]
kept_counter = 0
result = []
for a in archives:
if a.ts > target:
kept_counter += 1
kept_because[a.id] = ("within", kept_counter)
result.append(a)
return result
def prune_split(archives, pattern, n, skip=[]):
PRUNING_PATTERNS = OrderedDict([
("secondly", '%Y-%m-%d %H:%M:%S'),
("minutely", '%Y-%m-%d %H:%M'),
("hourly", '%Y-%m-%d %H'),
("daily", '%Y-%m-%d'),
("weekly", '%G-%V'),
("monthly", '%Y-%m'),
("yearly", '%Y'),
])
def prune_split(archives, rule, n, kept_because=None):
last = None
keep = []
pattern = PRUNING_PATTERNS[rule]
if kept_because is None:
kept_because = {}
if n == 0:
return keep
for a in sorted(archives, key=attrgetter('ts'), reverse=True):
period = to_localtime(a.ts).strftime(pattern)
if period != last:
last = period
if a not in skip:
if a.id not in kept_because:
keep.append(a)
kept_because[a.id] = (rule, len(keep))
if len(keep) == n:
break
return keep

View file

@ -6,6 +6,7 @@ import logging
import os
import pstats
import random
import re
import shutil
import socket
import stat
@ -1731,12 +1732,11 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir)
self.cmd('create', self.repository_location + '::test4.checkpoint', src_dir)
output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2')
self.assert_in('Keeping archive: test2', output)
self.assert_in('Would prune: test1', output)
assert re.search(r'Would prune:\s+test1', output)
# must keep the latest non-checkpoint archive:
self.assert_in('Keeping archive: test2', output)
assert re.search(r'Keeping archive \(rule: daily #1\):\s+test2', output)
# must keep the latest checkpoint archive:
self.assert_in('Keeping archive: test4.checkpoint', output)
assert re.search(r'Keeping checkpoint archive:\s+test4.checkpoint', output)
output = self.cmd('list', self.repository_location)
self.assert_in('test1', output)
self.assert_in('test2', output)
@ -1766,8 +1766,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd('create', self.repository_location + '::test1', src_dir)
self.cmd('create', self.repository_location + '::test2', src_dir)
output = self.cmd('prune', '--list', '--stats', '--dry-run', self.repository_location, '--keep-daily=2')
self.assert_in('Keeping archive: test2', output)
self.assert_in('Would prune: test1', output)
assert re.search(r'Keeping archive \(rule: daily #1\):\s+test2', output)
assert re.search(r'Would prune:\s+test1', output)
self.assert_in('Deleted data:', output)
output = self.cmd('list', self.repository_location)
self.assert_in('test1', output)
@ -1784,8 +1784,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd('create', self.repository_location + '::bar-2015-08-12-10:00', src_dir)
self.cmd('create', self.repository_location + '::bar-2015-08-12-20:00', src_dir)
output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--prefix=foo-')
self.assert_in('Keeping archive: foo-2015-08-12-20:00', output)
self.assert_in('Would prune: foo-2015-08-12-10:00', output)
assert re.search(r'Keeping archive \(rule: daily #1\):\s+foo-2015-08-12-20:00', output)
assert re.search(r'Would prune:\s+foo-2015-08-12-10:00', output)
output = self.cmd('list', self.repository_location)
self.assert_in('foo-2015-08-12-10:00', output)
self.assert_in('foo-2015-08-12-20:00', output)
@ -1805,8 +1805,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd('create', self.repository_location + '::2015-08-12-10:00-bar', src_dir)
self.cmd('create', self.repository_location + '::2015-08-12-20:00-bar', src_dir)
output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--glob-archives=2015-*-foo')
self.assert_in('Keeping archive: 2015-08-12-20:00-foo', output)
self.assert_in('Would prune: 2015-08-12-10:00-foo', output)
assert re.search(r'Keeping archive \(rule: daily #1\):\s+2015-08-12-20:00-foo', output)
assert re.search(r'Would prune:\s+2015-08-12-10:00-foo', output)
output = self.cmd('list', self.repository_location)
self.assert_in('2015-08-12-10:00-foo', output)
self.assert_in('2015-08-12-20:00-foo', output)

View file

@ -1,11 +1,10 @@
import hashlib
import io
import os
import shutil
import sys
from argparse import ArgumentTypeError
from datetime import datetime, timezone, timedelta
from time import mktime, strptime, sleep
from time import sleep
import pytest
@ -333,40 +332,56 @@ class MakePathSafeTestCase(BaseTestCase):
class MockArchive:
def __init__(self, ts):
def __init__(self, ts, id):
self.ts = ts
self.id = id
def __repr__(self):
return repr(self.ts)
return "{0}: {1}".format(self.id, self.ts.isoformat())
class PruneSplitTestCase(BaseTestCase):
@pytest.mark.parametrize(
"rule,num_to_keep,expected_ids", [
("yearly", 3, (13, 2, 1)),
("monthly", 3, (13, 8, 4)),
("weekly", 2, (13, 8)),
("daily", 3, (13, 8, 7)),
("hourly", 3, (13, 10, 8)),
("minutely", 3, (13, 10, 9)),
("secondly", 4, (13, 12, 11, 10)),
("daily", 0, []),
]
)
def test_prune_split(rule, num_to_keep, expected_ids):
def subset(lst, ids):
return {i for i in lst if i.id in ids}
def test(self):
archives = [
# years apart
MockArchive(datetime(2015, 1, 1, 10, 0, 0, tzinfo=timezone.utc), 1),
MockArchive(datetime(2016, 1, 1, 10, 0, 0, tzinfo=timezone.utc), 2),
MockArchive(datetime(2017, 1, 1, 10, 0, 0, tzinfo=timezone.utc), 3),
# months apart
MockArchive(datetime(2017, 2, 1, 10, 0, 0, tzinfo=timezone.utc), 4),
MockArchive(datetime(2017, 3, 1, 10, 0, 0, tzinfo=timezone.utc), 5),
# days apart
MockArchive(datetime(2017, 3, 2, 10, 0, 0, tzinfo=timezone.utc), 6),
MockArchive(datetime(2017, 3, 3, 10, 0, 0, tzinfo=timezone.utc), 7),
MockArchive(datetime(2017, 3, 4, 10, 0, 0, tzinfo=timezone.utc), 8),
# minutes apart
MockArchive(datetime(2017, 10, 1, 9, 45, 0, tzinfo=timezone.utc), 9),
MockArchive(datetime(2017, 10, 1, 9, 55, 0, tzinfo=timezone.utc), 10),
# seconds apart
MockArchive(datetime(2017, 10, 1, 10, 0, 1, tzinfo=timezone.utc), 11),
MockArchive(datetime(2017, 10, 1, 10, 0, 3, tzinfo=timezone.utc), 12),
MockArchive(datetime(2017, 10, 1, 10, 0, 5, tzinfo=timezone.utc), 13),
]
kept_because = {}
keep = prune_split(archives, rule, num_to_keep, kept_because)
def local_to_UTC(month, day):
"""Convert noon on the month and day in 2013 to UTC."""
seconds = mktime(strptime('2013-%02d-%02d 12:00' % (month, day), '%Y-%m-%d %H:%M'))
return datetime.fromtimestamp(seconds, tz=timezone.utc)
def subset(lst, indices):
return {lst[i] for i in indices}
def dotest(test_archives, n, skip, indices):
for ta in test_archives, reversed(test_archives):
self.assert_equal(set(prune_split(ta, '%Y-%m', n, skip)),
subset(test_archives, indices))
test_pairs = [(1, 1), (2, 1), (2, 28), (3, 1), (3, 2), (3, 31), (5, 1)]
test_dates = [local_to_UTC(month, day) for month, day in test_pairs]
test_archives = [MockArchive(date) for date in test_dates]
dotest(test_archives, 3, [], [6, 5, 2])
dotest(test_archives, -1, [], [6, 5, 2, 0])
dotest(test_archives, 3, [test_archives[6]], [5, 2, 0])
dotest(test_archives, 3, [test_archives[5]], [6, 2, 0])
dotest(test_archives, 3, [test_archives[4]], [6, 5, 2])
dotest(test_archives, 0, [], [])
assert set(keep) == subset(archives, expected_ids)
for item in keep:
assert kept_because[item.id][0] == rule
class IntervalTestCase(BaseTestCase):
@ -410,14 +425,19 @@ class PruneWithinTestCase(BaseTestCase):
def dotest(test_archives, within, indices):
for ta in test_archives, reversed(test_archives):
self.assert_equal(set(prune_within(ta, interval(within))),
kept_because = {}
keep = prune_within(ta, interval(within), kept_because)
self.assert_equal(set(keep),
subset(test_archives, indices))
assert all("within" == kept_because[a.id][0] for a in keep)
# 1 minute, 1.5 hours, 2.5 hours, 3.5 hours, 25 hours, 49 hours
test_offsets = [60, 90*60, 150*60, 210*60, 25*60*60, 49*60*60]
now = datetime.now(timezone.utc)
test_dates = [now - timedelta(seconds=s) for s in test_offsets]
test_archives = [MockArchive(date) for date in test_dates]
test_archives = [
MockArchive(date, i) for i, date in enumerate(test_dates)
]
dotest(test_archives, '1H', [0])
dotest(test_archives, '2H', [0, 1])