diff --git a/src/borg/helpers/__init__.py b/src/borg/helpers/__init__.py index ccff30d22..8a209c5d8 100644 --- a/src/borg/helpers/__init__.py +++ b/src/borg/helpers/__init__.py @@ -26,7 +26,7 @@ from .parseformat import sizeof_fmt, sizeof_fmt_iec, sizeof_fmt_decimal from .parseformat import format_line, replace_placeholders, PlaceholderError from .parseformat import SortBySpec, NameSpec from .parseformat import format_archive, parse_stringified_list, clean_lines -from .parseformat import Location, location_validator, archivename_validator +from .parseformat import Location, location_validator, archivename_validator, comment_validator, text_validator from .parseformat import BaseFormatter, ArchiveFormatter, ItemFormatter, file_status from .parseformat import swidth_slice, ellipsis_truncate from .parseformat import BorgJsonEncoder, basic_json_data, json_print, json_dump, prepare_dump_dict diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index 6e3cf3b32..5b347d174 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -573,6 +573,26 @@ def archivename_validator(): return validator +def text_validator(*, name, max_length, invalid_ctrl_chars="\0"): + def validator(text): + assert isinstance(text, str) + if not (len(text) <= max_length): + raise argparse.ArgumentTypeError(f'Invalid {name}: "{text}" [length <= {max_length}]') + if re.search(f"[{re.escape(invalid_ctrl_chars)}]", text): + raise argparse.ArgumentTypeError(f'Invalid {name}: "{text}" [invalid control chars detected]') + try: + text.encode("utf-8", errors="strict") + except UnicodeEncodeError: + # looks like text contains surrogate-escapes + raise argparse.ArgumentTypeError(f'Invalid {name}: "{text}" [contains non-unicode characters]') + return text + + return validator + + +comment_validator = text_validator(name="comment", max_length=10000) + + class BaseFormatter: FIXED_KEYS = { # Formatting aids diff --git a/src/borg/testsuite/helpers.py b/src/borg/testsuite/helpers.py index b413e9c32..76363479d 100644 --- a/src/borg/testsuite/helpers.py +++ b/src/borg/testsuite/helpers.py @@ -32,7 +32,7 @@ from ..helpers import msgpack from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH from ..helpers import StableDict, bin_to_hex from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams -from ..helpers import archivename_validator +from ..helpers import archivename_validator, text_validator from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless from ..helpers import swidth_slice from ..helpers import chunkit @@ -290,6 +290,29 @@ def test_archivename_invalid(name): av(name) +@pytest.mark.parametrize("text", ["", "single line", "multi\nline\ncomment"]) +def test_text_ok(text): + tv = text_validator(max_length=100, name="name") + tv(text) # must not raise an exception + + +@pytest.mark.parametrize( + "text", + [ + "x" * 101, # too long + # invalid chars: + "foo\0bar", + # contains surrogate-escapes + "foo\udc80bar", + "foo\udcffbar", + ], +) +def test_text_invalid(text): + tv = text_validator(max_length=100, name="name") + with pytest.raises(ArgumentTypeError): + tv(text) + + class FormatTimedeltaTestCase(BaseTestCase): def test(self): t0 = datetime(2001, 1, 1, 10, 20, 3, 0)