mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-11 01:41:57 -04:00
Merge pull request #3235 from NiklasMM/feat/2886_prune-show-which-rule-keeps-archive
prune: Show which rule was applied to keep archive
This commit is contained in:
commit
4a58310433
4 changed files with 119 additions and 74 deletions
|
|
@ -49,7 +49,7 @@ from .helpers import PrefixSpec, SortBySpec, FilesCacheMode
|
|||
from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
|
||||
from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
|
||||
from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict
|
||||
from .helpers import interval, prune_within, prune_split
|
||||
from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS
|
||||
from .helpers import timestamp
|
||||
from .helpers import get_cache_dir
|
||||
from .helpers import Manifest, AI_HUMAN_SORT_KEYS
|
||||
|
|
@ -1333,45 +1333,48 @@ class Archiver:
|
|||
# that is newer than a successfully completed backup - and killing the successful backup.
|
||||
archives = [arch for arch in archives_checkpoints if arch not in checkpoints]
|
||||
keep = []
|
||||
# collect the rule responsible for the keeping of each archive in this dict
|
||||
# keys are archive ids, values are a tuple
|
||||
# (<rulename>, <how many archives were kept by this rule so far >)
|
||||
kept_because = {}
|
||||
|
||||
# find archives which need to be kept because of the keep-within rule
|
||||
if args.within:
|
||||
keep += prune_within(archives, args.within)
|
||||
if args.secondly:
|
||||
keep += prune_split(archives, '%Y-%m-%d %H:%M:%S', args.secondly, keep)
|
||||
if args.minutely:
|
||||
keep += prune_split(archives, '%Y-%m-%d %H:%M', args.minutely, keep)
|
||||
if args.hourly:
|
||||
keep += prune_split(archives, '%Y-%m-%d %H', args.hourly, keep)
|
||||
if args.daily:
|
||||
keep += prune_split(archives, '%Y-%m-%d', args.daily, keep)
|
||||
if args.weekly:
|
||||
keep += prune_split(archives, '%G-%V', args.weekly, keep)
|
||||
if args.monthly:
|
||||
keep += prune_split(archives, '%Y-%m', args.monthly, keep)
|
||||
if args.yearly:
|
||||
keep += prune_split(archives, '%Y', args.yearly, keep)
|
||||
keep += prune_within(archives, args.within, kept_because)
|
||||
|
||||
# find archives which need to be kept because of the various time period rules
|
||||
for rule in PRUNING_PATTERNS.keys():
|
||||
num = getattr(args, rule, None)
|
||||
if num is not None:
|
||||
keep += prune_split(archives, rule, num, kept_because)
|
||||
|
||||
to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
|
||||
stats = Statistics()
|
||||
with Cache(repository, key, manifest, do_files=False, lock_wait=self.lock_wait) as cache:
|
||||
list_logger = logging.getLogger('borg.output.list')
|
||||
if args.output_list:
|
||||
# set up counters for the progress display
|
||||
to_delete_len = len(to_delete)
|
||||
archives_deleted = 0
|
||||
# set up counters for the progress display
|
||||
to_delete_len = len(to_delete)
|
||||
archives_deleted = 0
|
||||
for archive in archives_checkpoints:
|
||||
if archive in to_delete:
|
||||
if args.dry_run:
|
||||
if args.output_list:
|
||||
list_logger.info('Would prune: %s' % format_archive(archive))
|
||||
log_message = 'Would prune:'
|
||||
else:
|
||||
if args.output_list:
|
||||
archives_deleted += 1
|
||||
list_logger.info('Pruning archive: %s (%d/%d)' % (format_archive(archive),
|
||||
archives_deleted, to_delete_len))
|
||||
archives_deleted += 1
|
||||
log_message = 'Pruning archive (%d/%d):' % (archives_deleted, to_delete_len)
|
||||
Archive(repository, key, manifest, archive.name, cache,
|
||||
progress=args.progress).delete(stats, forced=args.forced)
|
||||
else:
|
||||
if args.output_list:
|
||||
list_logger.info('Keeping archive: %s' % format_archive(archive))
|
||||
if is_checkpoint(archive.name):
|
||||
log_message = 'Keeping checkpoint archive:'
|
||||
else:
|
||||
log_message = 'Keeping archive (rule: {rule} #{num}):'.format(
|
||||
rule=kept_because[archive.id][0], num=kept_because[archive.id][1]
|
||||
)
|
||||
if args.output_list:
|
||||
list_logger.info("{message:<40} {archive}".format(
|
||||
message=log_message, archive=format_archive(archive)
|
||||
))
|
||||
if to_delete and not args.dry_run:
|
||||
manifest.write()
|
||||
repository.commit(save_space=args.save_space)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import os
|
|||
import os.path
|
||||
import platform
|
||||
import sys
|
||||
from collections import deque
|
||||
from collections import deque, OrderedDict
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from itertools import islice
|
||||
from operator import attrgetter
|
||||
|
|
@ -17,22 +17,44 @@ from .. import __version__ as borg_version
|
|||
from .. import chunker
|
||||
|
||||
|
||||
def prune_within(archives, hours):
|
||||
def prune_within(archives, hours, kept_because):
|
||||
target = datetime.now(timezone.utc) - timedelta(seconds=hours * 3600)
|
||||
return [a for a in archives if a.ts > target]
|
||||
kept_counter = 0
|
||||
result = []
|
||||
for a in archives:
|
||||
if a.ts > target:
|
||||
kept_counter += 1
|
||||
kept_because[a.id] = ("within", kept_counter)
|
||||
result.append(a)
|
||||
return result
|
||||
|
||||
|
||||
def prune_split(archives, pattern, n, skip=[]):
|
||||
PRUNING_PATTERNS = OrderedDict([
|
||||
("secondly", '%Y-%m-%d %H:%M:%S'),
|
||||
("minutely", '%Y-%m-%d %H:%M'),
|
||||
("hourly", '%Y-%m-%d %H'),
|
||||
("daily", '%Y-%m-%d'),
|
||||
("weekly", '%G-%V'),
|
||||
("monthly", '%Y-%m'),
|
||||
("yearly", '%Y'),
|
||||
])
|
||||
|
||||
|
||||
def prune_split(archives, rule, n, kept_because=None):
|
||||
last = None
|
||||
keep = []
|
||||
pattern = PRUNING_PATTERNS[rule]
|
||||
if kept_because is None:
|
||||
kept_because = {}
|
||||
if n == 0:
|
||||
return keep
|
||||
for a in sorted(archives, key=attrgetter('ts'), reverse=True):
|
||||
period = to_localtime(a.ts).strftime(pattern)
|
||||
if period != last:
|
||||
last = period
|
||||
if a not in skip:
|
||||
if a.id not in kept_because:
|
||||
keep.append(a)
|
||||
kept_because[a.id] = (rule, len(keep))
|
||||
if len(keep) == n:
|
||||
break
|
||||
return keep
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import logging
|
|||
import os
|
||||
import pstats
|
||||
import random
|
||||
import re
|
||||
import shutil
|
||||
import socket
|
||||
import stat
|
||||
|
|
@ -1731,12 +1732,11 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir)
|
||||
self.cmd('create', self.repository_location + '::test4.checkpoint', src_dir)
|
||||
output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2')
|
||||
self.assert_in('Keeping archive: test2', output)
|
||||
self.assert_in('Would prune: test1', output)
|
||||
assert re.search(r'Would prune:\s+test1', output)
|
||||
# must keep the latest non-checkpoint archive:
|
||||
self.assert_in('Keeping archive: test2', output)
|
||||
assert re.search(r'Keeping archive \(rule: daily #1\):\s+test2', output)
|
||||
# must keep the latest checkpoint archive:
|
||||
self.assert_in('Keeping archive: test4.checkpoint', output)
|
||||
assert re.search(r'Keeping checkpoint archive:\s+test4.checkpoint', output)
|
||||
output = self.cmd('list', self.repository_location)
|
||||
self.assert_in('test1', output)
|
||||
self.assert_in('test2', output)
|
||||
|
|
@ -1766,8 +1766,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
self.cmd('create', self.repository_location + '::test1', src_dir)
|
||||
self.cmd('create', self.repository_location + '::test2', src_dir)
|
||||
output = self.cmd('prune', '--list', '--stats', '--dry-run', self.repository_location, '--keep-daily=2')
|
||||
self.assert_in('Keeping archive: test2', output)
|
||||
self.assert_in('Would prune: test1', output)
|
||||
assert re.search(r'Keeping archive \(rule: daily #1\):\s+test2', output)
|
||||
assert re.search(r'Would prune:\s+test1', output)
|
||||
self.assert_in('Deleted data:', output)
|
||||
output = self.cmd('list', self.repository_location)
|
||||
self.assert_in('test1', output)
|
||||
|
|
@ -1784,8 +1784,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
self.cmd('create', self.repository_location + '::bar-2015-08-12-10:00', src_dir)
|
||||
self.cmd('create', self.repository_location + '::bar-2015-08-12-20:00', src_dir)
|
||||
output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--prefix=foo-')
|
||||
self.assert_in('Keeping archive: foo-2015-08-12-20:00', output)
|
||||
self.assert_in('Would prune: foo-2015-08-12-10:00', output)
|
||||
assert re.search(r'Keeping archive \(rule: daily #1\):\s+foo-2015-08-12-20:00', output)
|
||||
assert re.search(r'Would prune:\s+foo-2015-08-12-10:00', output)
|
||||
output = self.cmd('list', self.repository_location)
|
||||
self.assert_in('foo-2015-08-12-10:00', output)
|
||||
self.assert_in('foo-2015-08-12-20:00', output)
|
||||
|
|
@ -1805,8 +1805,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
self.cmd('create', self.repository_location + '::2015-08-12-10:00-bar', src_dir)
|
||||
self.cmd('create', self.repository_location + '::2015-08-12-20:00-bar', src_dir)
|
||||
output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--glob-archives=2015-*-foo')
|
||||
self.assert_in('Keeping archive: 2015-08-12-20:00-foo', output)
|
||||
self.assert_in('Would prune: 2015-08-12-10:00-foo', output)
|
||||
assert re.search(r'Keeping archive \(rule: daily #1\):\s+2015-08-12-20:00-foo', output)
|
||||
assert re.search(r'Would prune:\s+2015-08-12-10:00-foo', output)
|
||||
output = self.cmd('list', self.repository_location)
|
||||
self.assert_in('2015-08-12-10:00-foo', output)
|
||||
self.assert_in('2015-08-12-20:00-foo', output)
|
||||
|
|
|
|||
|
|
@ -1,11 +1,10 @@
|
|||
import hashlib
|
||||
import io
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from argparse import ArgumentTypeError
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from time import mktime, strptime, sleep
|
||||
from time import sleep
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
@ -333,40 +332,56 @@ class MakePathSafeTestCase(BaseTestCase):
|
|||
|
||||
class MockArchive:
|
||||
|
||||
def __init__(self, ts):
|
||||
def __init__(self, ts, id):
|
||||
self.ts = ts
|
||||
self.id = id
|
||||
|
||||
def __repr__(self):
|
||||
return repr(self.ts)
|
||||
return "{0}: {1}".format(self.id, self.ts.isoformat())
|
||||
|
||||
|
||||
class PruneSplitTestCase(BaseTestCase):
|
||||
@pytest.mark.parametrize(
|
||||
"rule,num_to_keep,expected_ids", [
|
||||
("yearly", 3, (13, 2, 1)),
|
||||
("monthly", 3, (13, 8, 4)),
|
||||
("weekly", 2, (13, 8)),
|
||||
("daily", 3, (13, 8, 7)),
|
||||
("hourly", 3, (13, 10, 8)),
|
||||
("minutely", 3, (13, 10, 9)),
|
||||
("secondly", 4, (13, 12, 11, 10)),
|
||||
("daily", 0, []),
|
||||
]
|
||||
)
|
||||
def test_prune_split(rule, num_to_keep, expected_ids):
|
||||
def subset(lst, ids):
|
||||
return {i for i in lst if i.id in ids}
|
||||
|
||||
def test(self):
|
||||
archives = [
|
||||
# years apart
|
||||
MockArchive(datetime(2015, 1, 1, 10, 0, 0, tzinfo=timezone.utc), 1),
|
||||
MockArchive(datetime(2016, 1, 1, 10, 0, 0, tzinfo=timezone.utc), 2),
|
||||
MockArchive(datetime(2017, 1, 1, 10, 0, 0, tzinfo=timezone.utc), 3),
|
||||
# months apart
|
||||
MockArchive(datetime(2017, 2, 1, 10, 0, 0, tzinfo=timezone.utc), 4),
|
||||
MockArchive(datetime(2017, 3, 1, 10, 0, 0, tzinfo=timezone.utc), 5),
|
||||
# days apart
|
||||
MockArchive(datetime(2017, 3, 2, 10, 0, 0, tzinfo=timezone.utc), 6),
|
||||
MockArchive(datetime(2017, 3, 3, 10, 0, 0, tzinfo=timezone.utc), 7),
|
||||
MockArchive(datetime(2017, 3, 4, 10, 0, 0, tzinfo=timezone.utc), 8),
|
||||
# minutes apart
|
||||
MockArchive(datetime(2017, 10, 1, 9, 45, 0, tzinfo=timezone.utc), 9),
|
||||
MockArchive(datetime(2017, 10, 1, 9, 55, 0, tzinfo=timezone.utc), 10),
|
||||
# seconds apart
|
||||
MockArchive(datetime(2017, 10, 1, 10, 0, 1, tzinfo=timezone.utc), 11),
|
||||
MockArchive(datetime(2017, 10, 1, 10, 0, 3, tzinfo=timezone.utc), 12),
|
||||
MockArchive(datetime(2017, 10, 1, 10, 0, 5, tzinfo=timezone.utc), 13),
|
||||
]
|
||||
kept_because = {}
|
||||
keep = prune_split(archives, rule, num_to_keep, kept_because)
|
||||
|
||||
def local_to_UTC(month, day):
|
||||
"""Convert noon on the month and day in 2013 to UTC."""
|
||||
seconds = mktime(strptime('2013-%02d-%02d 12:00' % (month, day), '%Y-%m-%d %H:%M'))
|
||||
return datetime.fromtimestamp(seconds, tz=timezone.utc)
|
||||
|
||||
def subset(lst, indices):
|
||||
return {lst[i] for i in indices}
|
||||
|
||||
def dotest(test_archives, n, skip, indices):
|
||||
for ta in test_archives, reversed(test_archives):
|
||||
self.assert_equal(set(prune_split(ta, '%Y-%m', n, skip)),
|
||||
subset(test_archives, indices))
|
||||
|
||||
test_pairs = [(1, 1), (2, 1), (2, 28), (3, 1), (3, 2), (3, 31), (5, 1)]
|
||||
test_dates = [local_to_UTC(month, day) for month, day in test_pairs]
|
||||
test_archives = [MockArchive(date) for date in test_dates]
|
||||
|
||||
dotest(test_archives, 3, [], [6, 5, 2])
|
||||
dotest(test_archives, -1, [], [6, 5, 2, 0])
|
||||
dotest(test_archives, 3, [test_archives[6]], [5, 2, 0])
|
||||
dotest(test_archives, 3, [test_archives[5]], [6, 2, 0])
|
||||
dotest(test_archives, 3, [test_archives[4]], [6, 5, 2])
|
||||
dotest(test_archives, 0, [], [])
|
||||
assert set(keep) == subset(archives, expected_ids)
|
||||
for item in keep:
|
||||
assert kept_because[item.id][0] == rule
|
||||
|
||||
|
||||
class IntervalTestCase(BaseTestCase):
|
||||
|
|
@ -410,14 +425,19 @@ class PruneWithinTestCase(BaseTestCase):
|
|||
|
||||
def dotest(test_archives, within, indices):
|
||||
for ta in test_archives, reversed(test_archives):
|
||||
self.assert_equal(set(prune_within(ta, interval(within))),
|
||||
kept_because = {}
|
||||
keep = prune_within(ta, interval(within), kept_because)
|
||||
self.assert_equal(set(keep),
|
||||
subset(test_archives, indices))
|
||||
assert all("within" == kept_because[a.id][0] for a in keep)
|
||||
|
||||
# 1 minute, 1.5 hours, 2.5 hours, 3.5 hours, 25 hours, 49 hours
|
||||
test_offsets = [60, 90*60, 150*60, 210*60, 25*60*60, 49*60*60]
|
||||
now = datetime.now(timezone.utc)
|
||||
test_dates = [now - timedelta(seconds=s) for s in test_offsets]
|
||||
test_archives = [MockArchive(date) for date in test_dates]
|
||||
test_archives = [
|
||||
MockArchive(date, i) for i, date in enumerate(test_dates)
|
||||
]
|
||||
|
||||
dotest(test_archives, '1H', [0])
|
||||
dotest(test_archives, '2H', [0, 1])
|
||||
|
|
|
|||
Loading…
Reference in a new issue