mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-13 10:50:21 -04:00
parseformat: simplify Location parsing/validation, #9678
For sftp/http(s)/s3/b2/rclone repositories, borg only detects the scheme now and hands the raw URL to borgstore, which parses and validates it - removing the duplicate parsing borg used to do. Precise field extraction (user/host/ port/path) is kept only for the protocols borg itself reads: file, rest and legacy ssh. - drop http_re, s3_re, rclone_re and the sftp arm of the old ssh_or_sftp_re - add a single scheme-detection pass-through against BORGSTORE_SCHEMES; reject unknown schemes (e.g. socket://) as before - canonical_path() returns the processed URL for the delegated protocols, with embedded credentials stripped so secrets never reach the security state file or logs - source local_path_re's scheme exclusions from BORGSTORE_SCHEMES - create: use proto == "file" instead of "not location.host" for the local repo-dir inode skip Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
8509b3b7fa
commit
aa9f810453
4 changed files with 76 additions and 100 deletions
|
|
@ -201,6 +201,12 @@ Fixes:
|
|||
|
||||
Other changes:
|
||||
|
||||
- Location: simplify parsing/validation, #9678.
|
||||
For sftp/http(s)/s3/b2/rclone repositories, borg now only detects the scheme and hands the raw
|
||||
URL to borgstore, which parses and validates it (removing the duplicate parsing borg did before).
|
||||
Note: for these repositories the canonical location string changed slightly, so on the first run
|
||||
against an existing such repository borg may warn once that it "was previously located at ..." -
|
||||
this is harmless and can be confirmed.
|
||||
- keyfile: name key files by sha256(keyfile_contents).
|
||||
Existing legacy-named keyfiles continue to work.
|
||||
- repokey: use same format as with external keyfile
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ class CreateMixIn:
|
|||
except OSError:
|
||||
pass
|
||||
# Add local repository dir to inode_skip list
|
||||
if not args.location.host:
|
||||
if args.location.proto == "file":
|
||||
try:
|
||||
st = os.stat(args.location.path)
|
||||
skip_inodes.add((st.st_ino, st.st_dev))
|
||||
|
|
|
|||
|
|
@ -503,6 +503,15 @@ def parse_stringified_list(s):
|
|||
return [item for item in items if item != ""]
|
||||
|
||||
|
||||
def _redact_url_credentials(url):
|
||||
"""Remove embedded credentials from a repository URL for safe display/identity use."""
|
||||
# netloc style: scheme://user[:pass]@host... -> scheme://host...
|
||||
url = re.sub(r"(://)[^/@]+@", r"\1", url)
|
||||
# s3/b2 style: (s3|b2):profile@... or (s3|b2):key:secret@... -> (s3|b2):...
|
||||
url = re.sub(r"^((?:s3|b2):)[^@/]+@", r"\1", url)
|
||||
return url
|
||||
|
||||
|
||||
class Location:
|
||||
"""Object representing a repository location"""
|
||||
|
||||
|
|
@ -528,12 +537,14 @@ class Location:
|
|||
# :port (optional)
|
||||
optional_port_re = r"(?::(?P<port>\d+))?"
|
||||
|
||||
# path may contain any chars. to avoid ambiguities with other regexes,
|
||||
# it must not start with "//" nor with "scheme://" nor with "rclone:".
|
||||
local_path_re = r"""
|
||||
(?!(//|(ssh|socket|sftp|file)://|(rclone|s3|b2):))
|
||||
(?P<path>.+)
|
||||
"""
|
||||
# locations that borgstore parses and validates itself - borg only detects the scheme and
|
||||
# passes the raw URL through. covers both "scheme://..." and opaque "scheme:..." forms.
|
||||
BORGSTORE_SCHEMES = ("sftp", "http", "https", "s3", "b2", "rclone")
|
||||
|
||||
# path may contain any chars. to avoid ambiguities with other regexes, it must not start with
|
||||
# "//", a "scheme://" or one of the borgstore "scheme:" specifiers (all of which are matched
|
||||
# before local_re in _parse). the borgstore scheme list is sourced from BORGSTORE_SCHEMES.
|
||||
local_path_re = r"(?!(//|(?:ssh|socket|file)://|(?:" + "|".join(BORGSTORE_SCHEMES) + r"):))" r"(?P<path>.+)"
|
||||
|
||||
# abs_path must start with a slash (or drive letter on Windows).
|
||||
abs_path_re = r"(?P<path>[A-Za-z]:/.+)" if is_win32 else r"(?P<path>/.+)"
|
||||
|
|
@ -541,9 +552,14 @@ class Location:
|
|||
# path may or may not start with a slash.
|
||||
abs_or_rel_path_re = r"(?P<path>.+)"
|
||||
|
||||
# regexes for misc. kinds of supported location specifiers:
|
||||
ssh_or_sftp_re = re.compile(
|
||||
r"(?P<proto>(ssh|sftp))://"
|
||||
# We only parse out individual fields (user/host/port/path) for the protocols where borg
|
||||
# itself needs them: legacy "ssh" (v1 repositories) and "rest" (for the ssh tunnel + FILE
|
||||
# backend), plus local "file" paths. Everything else (see BORGSTORE_SCHEMES) is handed to
|
||||
# borgstore as the raw URL and parsed/validated there - we only detect the scheme.
|
||||
|
||||
# ssh:// is only used for legacy borg 1.x repositories nowadays.
|
||||
ssh_re = re.compile(
|
||||
r"(?P<proto>ssh)://"
|
||||
+ optional_user_re
|
||||
+ host_re
|
||||
+ optional_port_re
|
||||
|
|
@ -565,39 +581,8 @@ class Location:
|
|||
re.VERBOSE,
|
||||
)
|
||||
|
||||
# BorgStore REST server
|
||||
# (http|https)://user:pass@host:port/
|
||||
http_re = re.compile(
|
||||
r"(?P<proto>http|https)://"
|
||||
+ r"((?P<user>[^:@]+):(?P<pass>[^@]+)@)?"
|
||||
+ host_re
|
||||
+ optional_port_re
|
||||
+ r"(?P<path>/)",
|
||||
re.VERBOSE,
|
||||
)
|
||||
|
||||
# (s3|b2):[(profile|(access_key_id:access_key_secret))@][scheme://hostname[:port]]/bucket/path
|
||||
s3_re = re.compile(
|
||||
r"""
|
||||
(?P<s3type>(s3|b2)):
|
||||
((
|
||||
(?P<profile>[^@:]+) # profile (no colons allowed)
|
||||
|
|
||||
(?P<access_key_id>[^:@]+):(?P<access_key_secret>[^@]+) # access key and secret
|
||||
)@)? # optional authentication
|
||||
(
|
||||
[^:/]+:// # scheme (often https)
|
||||
(?P<hostname>[^:/]+)
|
||||
(:(?P<port>\d+))?
|
||||
)? # optional endpoint
|
||||
/
|
||||
(?P<bucket>[^/]+)/ # bucket name
|
||||
(?P<path>.+) # path
|
||||
""",
|
||||
re.VERBOSE,
|
||||
)
|
||||
|
||||
rclone_re = re.compile(r"(?P<proto>rclone):(?P<path>(.*))", re.VERBOSE)
|
||||
# scheme detector for the borgstore-handled locations listed in BORGSTORE_SCHEMES above.
|
||||
scheme_re = re.compile(r"(?P<scheme>[a-zA-Z][a-zA-Z0-9+.\-]*):")
|
||||
|
||||
sl = "/" if is_win32 else ""
|
||||
file_re = re.compile(r"(?P<proto>file)://" + sl + abs_path_re, re.VERBOSE)
|
||||
|
|
@ -636,7 +621,7 @@ class Location:
|
|||
raise ValueError('Invalid location format: "%s"' % self.processed)
|
||||
|
||||
def _parse(self, text):
|
||||
m = self.ssh_or_sftp_re.match(text)
|
||||
m = self.ssh_re.match(text)
|
||||
if m:
|
||||
self.proto = m.group("proto")
|
||||
self.user = m.group("user")
|
||||
|
|
@ -652,33 +637,16 @@ class Location:
|
|||
self.port = m.group("port") and int(m.group("port")) or None
|
||||
self.path = os.path.normpath(m.group("path"))
|
||||
return True
|
||||
m = self.http_re.match(text)
|
||||
if m:
|
||||
self.proto = m.group("proto")
|
||||
self.user = m.group("user")
|
||||
self._pass = True if m.group("pass") else False
|
||||
self._host = m.group("host")
|
||||
self.port = m.group("port") and int(m.group("port")) or None
|
||||
self.path = m.group("path")
|
||||
return True
|
||||
m = self.rclone_re.match(text)
|
||||
if m:
|
||||
self.proto = m.group("proto")
|
||||
self.path = m.group("path")
|
||||
return True
|
||||
m = self.file_re.match(text)
|
||||
if m:
|
||||
self.proto = m.group("proto")
|
||||
self.path = os.path.normpath(m.group("path"))
|
||||
return True
|
||||
m = self.s3_re.match(text)
|
||||
if m:
|
||||
self.proto = m.group("s3type")
|
||||
self.user = m.group("profile") if m.group("profile") else m.group("access_key_id")
|
||||
self._pass = True if m.group("access_key_secret") else False
|
||||
self._host = m.group("hostname")
|
||||
self.port = m.group("port") and int(m.group("port")) or None
|
||||
self.path = m.group("bucket") + "/" + m.group("path")
|
||||
m = self.scheme_re.match(text)
|
||||
if m and m.group("scheme") in self.BORGSTORE_SCHEMES:
|
||||
# borgstore parses/validates these; we only detect the scheme and pass the raw
|
||||
# URL (self.processed) through to it - no fields are extracted here.
|
||||
self.proto = m.group("scheme")
|
||||
return True
|
||||
m = self.local_re.match(text)
|
||||
if m:
|
||||
|
|
@ -711,9 +679,7 @@ class Location:
|
|||
def canonical_path(self):
|
||||
if self.proto == "file":
|
||||
return self.path
|
||||
if self.proto == "rclone":
|
||||
return f"{self.proto}:{self.path}"
|
||||
if self.proto in ("rest", "sftp", "ssh", "s3", "b2", "http", "https"):
|
||||
if self.proto in ("rest", "ssh"):
|
||||
return (
|
||||
f"{self.proto}://"
|
||||
f"{(self.user + '@') if self.user else ''}"
|
||||
|
|
@ -721,6 +687,10 @@ class Location:
|
|||
f"{(':' + str(self.port)) if self.port else ''}/"
|
||||
f"{self.path}"
|
||||
)
|
||||
if self.proto in self.BORGSTORE_SCHEMES:
|
||||
# borgstore-handled locations: use the raw (processed) URL as given, but strip any
|
||||
# embedded credentials so we never write secrets to the security state file or logs.
|
||||
return _redact_url_credentials(self.processed)
|
||||
raise NotImplementedError(self.proto)
|
||||
|
||||
def with_timestamp(self, timestamp):
|
||||
|
|
|
|||
|
|
@ -198,51 +198,51 @@ class TestLocationWithoutEnv:
|
|||
== "Location(proto='rest', user=None, pass=None, host=None, port=None, path='/absolute/path')"
|
||||
)
|
||||
|
||||
# For the protocols handled (parsed + validated) by borgstore itself, borg only detects
|
||||
# the scheme and passes the raw URL through; it no longer extracts user/host/port/path.
|
||||
|
||||
def test_s3(self, monkeypatch):
|
||||
monkeypatch.delenv("BORG_REPO", raising=False)
|
||||
assert (
|
||||
repr(Location("s3:/test/path"))
|
||||
== "Location(proto='s3', user=None, pass=None, host=None, port=None, path='test/path')"
|
||||
loc = Location("s3:/test/path")
|
||||
assert loc.proto == "s3"
|
||||
assert (loc.user, loc.host, loc.port, loc.path) == (None, None, None, None)
|
||||
assert loc.processed == "s3:/test/path"
|
||||
# credentials in the URL are stripped from canonical_path (security state file / logs)
|
||||
assert Location("s3:profile@http://172.28.52.116:9000/test/path").canonical_path() == (
|
||||
"s3:http://172.28.52.116:9000/test/path"
|
||||
)
|
||||
assert (
|
||||
repr(Location("s3:profile@http://172.28.52.116:9000/test/path"))
|
||||
== "Location(proto='s3', user='profile', pass=None, host='172.28.52.116', port=9000, path='test/path')" # noqa: E501
|
||||
assert Location("s3:user:pass@http://172.28.52.116:9000/test/path").canonical_path() == (
|
||||
"s3:http://172.28.52.116:9000/test/path"
|
||||
)
|
||||
assert (
|
||||
repr(Location("s3:user:pass@http://172.28.52.116:9000/test/path"))
|
||||
== "Location(proto='s3', user='user', pass='REDACTED', host='172.28.52.116', port=9000, path='test/path')" # noqa: E501
|
||||
)
|
||||
assert (
|
||||
repr(Location("b2:user:pass@https://s3.us-east-005.backblazeb2.com/test/path"))
|
||||
== "Location(proto='b2', user='user', pass='REDACTED', host='s3.us-east-005.backblazeb2.com', port=None, path='test/path')" # noqa: E501
|
||||
assert Location("b2:user:pass@https://s3.us-east-005.backblazeb2.com/test/path").canonical_path() == (
|
||||
"b2:https://s3.us-east-005.backblazeb2.com/test/path"
|
||||
)
|
||||
|
||||
def test_rclone(self, monkeypatch):
|
||||
monkeypatch.delenv("BORG_REPO", raising=False)
|
||||
assert (
|
||||
repr(Location("rclone:remote:path"))
|
||||
== "Location(proto='rclone', user=None, pass=None, host=None, port=None, path='remote:path')"
|
||||
)
|
||||
loc = Location("rclone:remote:path")
|
||||
assert loc.proto == "rclone"
|
||||
assert (loc.user, loc.host, loc.port, loc.path) == (None, None, None, None)
|
||||
assert loc.processed == "rclone:remote:path"
|
||||
assert loc.canonical_path() == "rclone:remote:path"
|
||||
|
||||
def test_sftp(self, monkeypatch):
|
||||
monkeypatch.delenv("BORG_REPO", raising=False)
|
||||
# relative path
|
||||
assert (
|
||||
repr(Location("sftp://user@host:1234/rel/path"))
|
||||
== "Location(proto='sftp', user='user', pass=None, host='host', port=1234, path='rel/path')"
|
||||
)
|
||||
# absolute path
|
||||
assert (
|
||||
repr(Location("sftp://user@host:1234//abs/path"))
|
||||
== "Location(proto='sftp', user='user', pass=None, host='host', port=1234, path='/abs/path')"
|
||||
)
|
||||
loc = Location("sftp://user@host:1234/rel/path")
|
||||
assert loc.proto == "sftp"
|
||||
assert (loc.user, loc.host, loc.port, loc.path) == (None, None, None, None)
|
||||
assert loc.processed == "sftp://user@host:1234/rel/path"
|
||||
# credentials stripped from canonical_path
|
||||
assert loc.canonical_path() == "sftp://host:1234/rel/path"
|
||||
|
||||
def test_http(self, monkeypatch):
|
||||
monkeypatch.delenv("BORG_REPO", raising=False)
|
||||
assert (
|
||||
repr(Location("http://user:pass@host:1234/"))
|
||||
== "Location(proto='http', user='user', pass='REDACTED', host='host', port=1234, path='/')"
|
||||
)
|
||||
loc = Location("http://user:pass@host:1234/")
|
||||
assert loc.proto == "http"
|
||||
assert (loc.user, loc.host, loc.port, loc.path) == (None, None, None, None)
|
||||
assert loc.processed == "http://user:pass@host:1234/"
|
||||
# credentials stripped from canonical_path
|
||||
assert loc.canonical_path() == "http://host:1234/"
|
||||
|
||||
def test_socket(self, monkeypatch):
|
||||
monkeypatch.delenv("BORG_REPO", raising=False)
|
||||
|
|
|
|||
Loading…
Reference in a new issue