archive names: validate more strictly, fixes #2290

we want to be able to use an archive name as a directory name,
e.g. for the FUSE fs built by borg mount.

thus we can not allow "/" in an archive name on linux.

on windows, the rules are more restrictive, disallowing
quite some more characters (':<>"|*?' plus some more).
we do not have FUSE fs / borg mount on windows yet, but
we better avoid any issues.
we can not avoid ":" though, as our {now} placeholder
generates ISO-8601 timestamps, including ":" chars.

also, we do not want to have leading/trailing blanks in
archive names, neither surrogate-escapes.

control chars are disallowed also, including chr(0).
we have python str here, thus chr(0) is not expected in there
(is not used to terminate a string, like it is in C).
This commit is contained in:
Thomas Waldmann 2022-12-10 14:45:01 +01:00
parent de6d8af42c
commit fe2b2bc007
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
2 changed files with 71 additions and 2 deletions

View file

@ -540,9 +540,34 @@ def location_validator(proto=None, other=False):
def archivename_validator():
def validator(text):
assert isinstance(text, str)
# we make sure that the archive name can be used as directory name (for borg mount)
text = replace_placeholders(text)
if "/" in text or "::" in text or not text:
raise argparse.ArgumentTypeError('Invalid archive name: "%s"' % text)
MAX_PATH = 260 # Windows default. Since Win10, there is a registry setting LongPathsEnabled to get more.
MAX_DIRNAME = MAX_PATH - len("12345678.123")
SAFETY_MARGIN = 48 # borgfs path: mountpoint / archivename / dir / dir / ... / file
MAX_ARCHIVENAME = MAX_DIRNAME - SAFETY_MARGIN
if not (0 < len(text) <= MAX_ARCHIVENAME):
raise argparse.ArgumentTypeError(f'Invalid archive name: "{text}" [0 < length <= {MAX_ARCHIVENAME}]')
# note: ":" is also a invalid path char on windows, but we can not blacklist it,
# because e.g. our {now} placeholder creates ISO-8601 like output like 2022-12-10T20:47:42 .
invalid_chars = r"/" + r"\"<|>?*" # posix + windows
if re.search(f"[{re.escape(invalid_chars)}]", text):
raise argparse.ArgumentTypeError(
f'Invalid archive name: "{text}" [invalid chars detected matching "{invalid_chars}"]'
)
invalid_ctrl_chars = "".join(chr(i) for i in range(32))
if re.search(f"[{re.escape(invalid_ctrl_chars)}]", text):
raise argparse.ArgumentTypeError(
f'Invalid archive name: "{text}" [invalid control chars detected, ASCII < 32]'
)
if text.startswith(" ") or text.endswith(" "):
raise argparse.ArgumentTypeError(f'Invalid archive name: "{text}" [leading or trailing blanks]')
try:
text.encode("utf-8", errors="strict")
except UnicodeEncodeError:
# looks like text contains surrogate-escapes
raise argparse.ArgumentTypeError(f'Invalid archive name: "{text}" [contains non-unicode characters]')
return text
return validator

View file

@ -32,6 +32,7 @@ from ..helpers import msgpack
from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH
from ..helpers import StableDict, bin_to_hex
from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams
from ..helpers import archivename_validator
from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
from ..helpers import swidth_slice
from ..helpers import chunkit
@ -246,6 +247,49 @@ class TestLocationWithoutEnv:
Location("ssh://user@host:/path")
@pytest.mark.parametrize(
"name",
[
"foobar",
# placeholders
"foobar-{now}",
],
)
def test_archivename_ok(name):
av = archivename_validator()
av(name) # must not raise an exception
@pytest.mark.parametrize(
"name",
[
"", # too short
"x" * 201, # too long
# invalid chars:
"foo/bar",
"foo\\bar",
">foo",
"<foo",
"|foo",
'foo"bar',
"foo?",
"*bar",
"foo\nbar",
"foo\0bar",
# leading/trailing blanks
" foo",
"bar ",
# contains surrogate-escapes
"foo\udc80bar",
"foo\udcffbar",
],
)
def test_archivename_invalid(name):
av = archivename_validator()
with pytest.raises(ArgumentTypeError):
av(name)
class FormatTimedeltaTestCase(BaseTestCase):
def test(self):
t0 = datetime(2001, 1, 1, 10, 20, 3, 0)