Allow timespan to be specified with common time units (#8626)

allow timespan to be specified with common time units, fixes #8624

Co-authored-by: Ken Kundert <ken@theKunderts.net>
This commit is contained in:
Ken Kundert 2025-01-08 09:48:10 -08:00 committed by GitHub
parent 40df2f3c49
commit b9498ca571
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 200 additions and 66 deletions

View file

@ -12,6 +12,7 @@ Internally, we store and process date and time as UTC.
.. rubric:: TIMESPAN
Some options accept a TIMESPAN parameter, which can be given as a
number of days (e.g. ``7d``) or months (e.g. ``12m``).
Some options accept a TIMESPAN parameter, which can be given as a number of
years (e.g. ``2y``), months (e.g. ``12m``), weeks (e.g. ``2w``),
days (e.g. ``7d``), hours (e.g. ``8H``), minutes (e.g. ``30M``),
or seconds (e.g. ``150S``).

View file

@ -18,8 +18,8 @@ from ..logger import create_logger
logger = create_logger()
def prune_within(archives, hours, kept_because):
target = datetime.now(timezone.utc) - timedelta(seconds=hours * 3600)
def prune_within(archives, seconds, kept_because):
target = datetime.now(timezone.utc) - timedelta(seconds=seconds)
kept_counter = 0
result = []
for a in archives:
@ -241,10 +241,10 @@ class PruneMixIn:
series.
The ``--keep-within`` option takes an argument of the form "<int><char>",
where char is "H", "d", "w", "m", "y". For example, ``--keep-within 2d`` means
to keep all archives that were created within the past 48 hours.
"1m" is taken to mean "31d". The archives kept with this option do not
count towards the totals specified by any other options.
where char is "y", "m", "w", "d", "H", "M", or "S". For example,
``--keep-within 2d`` means to keep all archives that were created within
the past 2 days. "1m" is taken to mean "31d". The archives kept with
this option do not count towards the totals specified by any other options.
A good procedure is to thin out more and more the older your backups get.
As an example, ``--keep-daily 7`` means to keep the latest backup on each day,

View file

@ -126,26 +126,38 @@ def positive_int_validator(value):
def interval(s):
"""Convert a string representing a valid interval to a number of hours."""
multiplier = {"H": 1, "d": 24, "w": 24 * 7, "m": 24 * 31, "y": 24 * 365}
"""Convert a string representing a valid interval to a number of seconds."""
seconds_in_a_minute = 60
seconds_in_an_hour = 60 * seconds_in_a_minute
seconds_in_a_day = 24 * seconds_in_an_hour
seconds_in_a_week = 7 * seconds_in_a_day
seconds_in_a_month = 31 * seconds_in_a_day
seconds_in_a_year = 365 * seconds_in_a_day
multiplier = dict(
y=seconds_in_a_year,
m=seconds_in_a_month,
w=seconds_in_a_week,
d=seconds_in_a_day,
H=seconds_in_an_hour,
M=seconds_in_a_minute,
S=1,
)
if s.endswith(tuple(multiplier.keys())):
number = s[:-1]
suffix = s[-1]
else:
# range suffixes in ascending multiplier order
ranges = [k for k, v in sorted(multiplier.items(), key=lambda t: t[1])]
raise argparse.ArgumentTypeError(f'Unexpected interval time unit "{s[-1]}": expected one of {ranges!r}')
raise argparse.ArgumentTypeError(f'Unexpected time unit "{s[-1]}": choose from {", ".join(multiplier)}')
try:
hours = int(number) * multiplier[suffix]
seconds = int(number) * multiplier[suffix]
except ValueError:
hours = -1
seconds = -1
if hours <= 0:
raise argparse.ArgumentTypeError('Unexpected interval number "%s": expected an integer greater than 0' % number)
if seconds <= 0:
raise argparse.ArgumentTypeError(f'Invalid number "{number}": expected positive integer')
return hours
return seconds
def ChunkerParams(s):
@ -579,10 +591,10 @@ def location_validator(proto=None, other=False):
def relative_time_marker_validator(text: str):
time_marker_regex = r"^\d+[md]$"
time_marker_regex = r"^\d+[ymwdHMS]$"
match = re.compile(time_marker_regex).search(text)
if not match:
raise argparse.ArgumentTypeError(f"Invalid relative time marker used: {text}")
raise argparse.ArgumentTypeError(f"Invalid relative time marker used: {text}, choose from y, m, w, d, H, M, S")
else:
return text

View file

@ -119,7 +119,7 @@ def calculate_relative_offset(format_string, from_ts, earlier=False):
from_ts = archive_ts_now()
if format_string is not None:
offset_regex = re.compile(r"(?P<offset>\d+)(?P<unit>[md])")
offset_regex = re.compile(r"(?P<offset>\d+)(?P<unit>[ymwdHMS])")
match = offset_regex.search(format_string)
if match:
@ -127,10 +127,20 @@ def calculate_relative_offset(format_string, from_ts, earlier=False):
offset = int(match.group("offset"))
offset *= -1 if earlier else 1
if unit == "d":
return from_ts + timedelta(days=offset)
if unit == "y":
return from_ts.replace(year=from_ts.year + offset)
elif unit == "m":
return offset_n_months(from_ts, offset)
elif unit == "w":
return from_ts + timedelta(days=offset * 7)
elif unit == "d":
return from_ts + timedelta(days=offset)
elif unit == "H":
return from_ts + timedelta(seconds=offset * 60 * 60)
elif unit == "M":
return from_ts + timedelta(seconds=offset * 60)
elif unit == "S":
return from_ts + timedelta(seconds=offset)
raise ValueError(f"Invalid relative ts offset format: {format_string}")

View file

@ -58,32 +58,80 @@ def test_date_matching(archivers, request):
shutil.rmtree(archiver.repository_path)
cmd(archiver, "repo-create", RK_ENCRYPTION)
earliest_ts = "2022-11-20T23:59:59"
ts_in_between = "2022-12-18T23:59:59"
create_src_archive(archiver, "archive1", ts=earliest_ts)
create_src_archive(archiver, "archive2", ts=ts_in_between)
create_src_archive(archiver, "archive3")
create_src_archive(archiver, "archive-2022-11-20", ts="2022-11-20T23:59:59")
create_src_archive(archiver, "archive-2022-12-18", ts="2022-12-18T23:59:59")
create_src_archive(archiver, "archive-now")
cmd(archiver, "check", "-v", "--archives-only", "--oldest=23e", exit_code=2)
output = cmd(archiver, "check", "-v", "--archives-only", "--oldest=1y", exit_code=0)
assert "archive-2022-11-20" in output
assert "archive-2022-12-18" in output
assert "archive-now" not in output
output = cmd(archiver, "check", "-v", "--archives-only", "--newest=1y", exit_code=0)
assert "archive-2022-11-20" not in output
assert "archive-2022-12-18" not in output
assert "archive-now" in output
output = cmd(archiver, "check", "-v", "--archives-only", "--oldest=1m", exit_code=0)
assert "archive1" in output
assert "archive2" in output
assert "archive3" not in output
assert "archive-2022-11-20" in output
assert "archive-2022-12-18" in output
assert "archive-now" not in output
output = cmd(archiver, "check", "-v", "--archives-only", "--newest=1m", exit_code=0)
assert "archive3" in output
assert "archive2" not in output
assert "archive1" not in output
assert "archive-2022-11-20" not in output
assert "archive-2022-12-18" not in output
assert "archive-now" in output
output = cmd(archiver, "check", "-v", "--archives-only", "--oldest=4w", exit_code=0)
assert "archive-2022-11-20" in output
assert "archive-2022-12-18" in output
assert "archive-now" not in output
output = cmd(archiver, "check", "-v", "--archives-only", "--newest=4w", exit_code=0)
assert "archive-2022-11-20" not in output
assert "archive-2022-12-18" not in output
assert "archive-now" in output
output = cmd(archiver, "check", "-v", "--archives-only", "--newer=1d", exit_code=0)
assert "archive3" in output
assert "archive1" not in output
assert "archive2" not in output
assert "archive-2022-11-20" not in output
assert "archive-2022-12-18" not in output
assert "archive-now" in output
output = cmd(archiver, "check", "-v", "--archives-only", "--older=1d", exit_code=0)
assert "archive1" in output
assert "archive2" in output
assert "archive3" not in output
assert "archive-2022-11-20" in output
assert "archive-2022-12-18" in output
assert "archive-now" not in output
output = cmd(archiver, "check", "-v", "--archives-only", "--newer=24H", exit_code=0)
assert "archive-2022-11-20" not in output
assert "archive-2022-12-18" not in output
assert "archive-now" in output
output = cmd(archiver, "check", "-v", "--archives-only", "--older=24H", exit_code=0)
assert "archive-2022-11-20" in output
assert "archive-2022-12-18" in output
assert "archive-now" not in output
output = cmd(archiver, "check", "-v", "--archives-only", "--newer=1440M", exit_code=0)
assert "archive-2022-11-20" not in output
assert "archive-2022-12-18" not in output
assert "archive-now" in output
output = cmd(archiver, "check", "-v", "--archives-only", "--older=1440M", exit_code=0)
assert "archive-2022-11-20" in output
assert "archive-2022-12-18" in output
assert "archive-now" not in output
output = cmd(archiver, "check", "-v", "--archives-only", "--newer=86400S", exit_code=0)
assert "archive-2022-11-20" not in output
assert "archive-2022-12-18" not in output
assert "archive-now" in output
output = cmd(archiver, "check", "-v", "--archives-only", "--older=86400S", exit_code=0)
assert "archive-2022-11-20" in output
assert "archive-2022-12-18" in output
assert "archive-now" not in output
# check for output when timespan older than the earliest archive is given. Issue #1711
output = cmd(archiver, "check", "-v", "--archives-only", "--older=9999m", exit_code=0)

View file

@ -57,32 +57,82 @@ def test_size_nfiles(archivers, request):
def test_date_matching(archivers, request):
archiver = request.getfixturevalue(archivers)
cmd(archiver, "repo-create", RK_ENCRYPTION)
earliest_ts = "2022-11-20T23:59:59"
ts_in_between = "2022-12-18T23:59:59"
create_src_archive(archiver, "archive1", ts=earliest_ts)
create_src_archive(archiver, "archive2", ts=ts_in_between)
create_src_archive(archiver, "archive3")
cmd(archiver, "repo-list", "-v", "--oldest=23e", exit_code=2)
create_src_archive(archiver, "archive-2022-11-20", ts="2022-11-20T23:59:59")
create_src_archive(archiver, "archive-2022-12-18", ts="2022-12-18T23:59:59")
create_src_archive(archiver, "archive-now")
cmd(archiver, "check", "-v", "--oldest=23e", exit_code=2)
output = cmd(archiver, "repo-list", "-v", "--oldest=1y", exit_code=0)
assert "archive-2022-11-20" in output
assert "archive-2022-12-18" in output
assert "archive-now" not in output
output = cmd(archiver, "repo-list", "-v", "--newest=1y", exit_code=0)
assert "archive-2022-11-20" not in output
assert "archive-2022-12-18" not in output
assert "archive-now" in output
output = cmd(archiver, "repo-list", "-v", "--oldest=1m", exit_code=0)
assert "archive1" in output
assert "archive2" in output
assert "archive3" not in output
assert "archive-2022-11-20" in output
assert "archive-2022-12-18" in output
assert "archive-now" not in output
output = cmd(archiver, "repo-list", "-v", "--newest=1m", exit_code=0)
assert "archive3" in output
assert "archive2" not in output
assert "archive1" not in output
assert "archive-2022-11-20" not in output
assert "archive-2022-12-18" not in output
assert "archive-now" in output
output = cmd(archiver, "repo-list", "-v", "--oldest=4w", exit_code=0)
assert "archive-2022-11-20" in output
assert "archive-2022-12-18" in output
assert "archive-now" not in output
output = cmd(archiver, "repo-list", "-v", "--newest=4w", exit_code=0)
assert "archive-2022-11-20" not in output
assert "archive-2022-12-18" not in output
assert "archive-now" in output
output = cmd(archiver, "repo-list", "-v", "--newer=1d", exit_code=0)
assert "archive3" in output
assert "archive1" not in output
assert "archive2" not in output
assert "archive-2022-11-20" not in output
assert "archive-2022-12-18" not in output
assert "archive-now" in output
output = cmd(archiver, "repo-list", "-v", "--older=1d", exit_code=0)
assert "archive1" in output
assert "archive2" in output
assert "archive3" not in output
assert "archive-2022-11-20" in output
assert "archive-2022-12-18" in output
assert "archive-now" not in output
output = cmd(archiver, "repo-list", "-v", "--newer=24H", exit_code=0)
assert "archive-2022-11-20" not in output
assert "archive-2022-12-18" not in output
assert "archive-now" in output
output = cmd(archiver, "repo-list", "-v", "--older=24H", exit_code=0)
assert "archive-2022-11-20" in output
assert "archive-2022-12-18" in output
assert "archive-now" not in output
output = cmd(archiver, "repo-list", "-v", "--newer=1440M", exit_code=0)
assert "archive-2022-11-20" not in output
assert "archive-2022-12-18" not in output
assert "archive-now" in output
output = cmd(archiver, "repo-list", "-v", "--older=1440M", exit_code=0)
assert "archive-2022-11-20" in output
assert "archive-2022-12-18" in output
assert "archive-now" not in output
output = cmd(archiver, "repo-list", "-v", "--newer=86400S", exit_code=0)
assert "archive-2022-11-20" not in output
assert "archive-2022-12-18" not in output
assert "archive-now" in output
output = cmd(archiver, "repo-list", "-v", "--older=86400S", exit_code=0)
assert "archive-2022-11-20" in output
assert "archive-2022-12-18" in output
assert "archive-now" not in output
def test_repo_list_json(archivers, request):

View file

@ -553,17 +553,28 @@ def test_prune_split_no_archives():
assert kept_because == {}
@pytest.mark.parametrize("timeframe, num_hours", [("1H", 1), ("1d", 24), ("1w", 168), ("1m", 744), ("1y", 8760)])
def test_interval(timeframe, num_hours):
assert interval(timeframe) == num_hours
@pytest.mark.parametrize(
"timeframe, num_secs",
[
("5S", 5),
("2M", 2 * 60),
("1H", 60 * 60),
("1d", 24 * 60 * 60),
("1w", 7 * 24 * 60 * 60),
("1m", 31 * 24 * 60 * 60),
("1y", 365 * 24 * 60 * 60),
],
)
def test_interval(timeframe, num_secs):
assert interval(timeframe) == num_secs
@pytest.mark.parametrize(
"invalid_interval, error_tuple",
[
("H", ('Unexpected interval number "": expected an integer greater than 0',)),
("-1d", ('Unexpected interval number "-1": expected an integer greater than 0',)),
("food", ('Unexpected interval number "foo": expected an integer greater than 0',)),
("H", ('Invalid number "": expected positive integer',)),
("-1d", ('Invalid number "-1": expected positive integer',)),
("food", ('Invalid number "foo": expected positive integer',)),
],
)
def test_interval_time_unit(invalid_interval, error_tuple):
@ -575,7 +586,7 @@ def test_interval_time_unit(invalid_interval, error_tuple):
def test_interval_number():
with pytest.raises(ArgumentTypeError) as exc:
interval("5")
assert exc.value.args == ("Unexpected interval time unit \"5\": expected one of ['H', 'd', 'w', 'm', 'y']",)
assert exc.value.args == ('Unexpected time unit "5": choose from y, m, w, d, H, M, S',)
def test_prune_within():
@ -595,6 +606,8 @@ def test_prune_within():
test_dates = [now - timedelta(seconds=s) for s in test_offsets]
test_archives = [MockArchive(date, i) for i, date in enumerate(test_dates)]
dotest(test_archives, "15S", [])
dotest(test_archives, "2M", [0])
dotest(test_archives, "1H", [0])
dotest(test_archives, "2H", [0, 1])
dotest(test_archives, "3H", [0, 1, 2])