mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-11 09:59:19 -04:00
Merge pull request #9754 from ThomasWaldmann/simplify-location-parsing
Some checks are pending
Lint / lint (push) Waiting to run
CI / lint (push) Waiting to run
CI / security (push) Waiting to run
CI / asan_ubsan (push) Blocked by required conditions
CI / native_tests (push) Blocked by required conditions
CI / vm_tests (NetBSD, false, netbsd, 10.1) (push) Blocked by required conditions
CI / vm_tests (OmniOS, false, omnios, r151056) (push) Blocked by required conditions
CI / vm_tests (OpenBSD, false, openbsd, 7.8) (push) Blocked by required conditions
CI / vm_tests (borg-freebsd-14-x86_64-gh, FreeBSD, true, freebsd, 14.3) (push) Blocked by required conditions
CI / windows_tests (push) Blocked by required conditions
CodeQL / Analyze (push) Waiting to run
Some checks are pending
Lint / lint (push) Waiting to run
CI / lint (push) Waiting to run
CI / security (push) Waiting to run
CI / asan_ubsan (push) Blocked by required conditions
CI / native_tests (push) Blocked by required conditions
CI / vm_tests (NetBSD, false, netbsd, 10.1) (push) Blocked by required conditions
CI / vm_tests (OmniOS, false, omnios, r151056) (push) Blocked by required conditions
CI / vm_tests (OpenBSD, false, openbsd, 7.8) (push) Blocked by required conditions
CI / vm_tests (borg-freebsd-14-x86_64-gh, FreeBSD, true, freebsd, 14.3) (push) Blocked by required conditions
CI / windows_tests (push) Blocked by required conditions
CodeQL / Analyze (push) Waiting to run
Simplify location parsing, remove socket: remainders
This commit is contained in:
commit
c710edf0df
10 changed files with 84 additions and 148 deletions
|
|
@ -205,6 +205,14 @@ Fixes:
|
|||
|
||||
Other changes:
|
||||
|
||||
- Location: simplify parsing/validation, #9678.
|
||||
For sftp/http(s)/s3/b2/rclone repositories, borg now only detects the scheme and hands the raw
|
||||
URL to borgstore, which parses and validates it (removing the duplicate parsing borg did before).
|
||||
Note: for these repositories the canonical location string changed slightly, so on the first run
|
||||
against an existing such repository borg may warn once that it "was previously located at ..." -
|
||||
this is harmless and can be confirmed.
|
||||
- remove leftover socket: protocol code (the unix-socket transport and the --socket option were
|
||||
never part of a stable borg 2 release).
|
||||
- keyfile: name key files by sha256(keyfile_contents).
|
||||
Existing legacy-named keyfiles continue to work.
|
||||
- repokey: use same format as with external keyfile
|
||||
|
|
|
|||
|
|
@ -17,5 +17,4 @@
|
|||
--upload-buffer UPLOAD_BUFFER set network upload buffer size in MiB. (default: 0=no buffer)
|
||||
--debug-profile FILE Write execution profile in Borg format into FILE. For local use a Python-compatible file can be generated by suffixing FILE with ".pyprof".
|
||||
--rsh RSH Use this command to connect to the 'borg serve' process (default: 'ssh')
|
||||
--socket PATH Use UNIX DOMAIN (IPC) socket at PATH for client/server communication with socket: protocol.
|
||||
-r REPO, --repo REPO repository to use
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ class CreateMixIn:
|
|||
except OSError:
|
||||
pass
|
||||
# Add local repository dir to inode_skip list
|
||||
if not args.location.host:
|
||||
if args.location.proto == "file":
|
||||
try:
|
||||
st = os.stat(args.location.path)
|
||||
skip_inodes.add((st.st_ino, st.st_dev))
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ from .errors import RTError, PathNotAllowed, modern_ec
|
|||
from .errors import BorgWarning, FileChangedWarning, BackupWarning, IncludePatternNeverMatchedWarning
|
||||
from .errors import BackupError, BackupOSError, BackupRaceConditionError, BackupItemExcluded
|
||||
from .errors import BackupPermissionError, BackupIOError, BackupFileNotFoundError
|
||||
from .fs import ensure_dir, join_base_dir, get_socket_filename
|
||||
from .fs import ensure_dir, join_base_dir
|
||||
from .fs import get_security_dir, get_keys_dir, get_base_dir, get_cache_dir, get_config_dir, get_runtime_dir
|
||||
from .fs import dir_is_tagged, dir_is_cachedir, remove_dotdot_prefixes, make_path_safe, scandir_inorder
|
||||
from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, get_strip_prefix, umount, slashify
|
||||
|
|
|
|||
|
|
@ -169,10 +169,6 @@ def get_runtime_dir(*, legacy=False, create=True):
|
|||
return runtime_dir
|
||||
|
||||
|
||||
def get_socket_filename():
|
||||
return str(Path(get_runtime_dir()) / "borg.sock")
|
||||
|
||||
|
||||
def get_cache_dir(*, legacy=False, create=True):
|
||||
"""Determine where to store Borg cache data."""
|
||||
|
||||
|
|
|
|||
|
|
@ -503,6 +503,15 @@ def parse_stringified_list(s):
|
|||
return [item for item in items if item != ""]
|
||||
|
||||
|
||||
def _redact_url_credentials(url):
|
||||
"""Remove embedded credentials from a repository URL for safe display/identity use."""
|
||||
# netloc style: scheme://user[:pass]@host... -> scheme://host...
|
||||
url = re.sub(r"(://)[^/@]+@", r"\1", url)
|
||||
# s3/b2 style: (s3|b2):profile@... or (s3|b2):key:secret@... -> (s3|b2):...
|
||||
url = re.sub(r"^((?:s3|b2):)[^@/]+@", r"\1", url)
|
||||
return url
|
||||
|
||||
|
||||
class Location:
|
||||
"""Object representing a repository location"""
|
||||
|
||||
|
|
@ -528,12 +537,14 @@ class Location:
|
|||
# :port (optional)
|
||||
optional_port_re = r"(?::(?P<port>\d+))?"
|
||||
|
||||
# path may contain any chars. to avoid ambiguities with other regexes,
|
||||
# it must not start with "//" nor with "scheme://" nor with "rclone:".
|
||||
local_path_re = r"""
|
||||
(?!(//|(ssh|socket|sftp|file)://|(rclone|s3|b2):))
|
||||
(?P<path>.+)
|
||||
"""
|
||||
# locations that borgstore parses and validates itself - borg only detects the scheme and
|
||||
# passes the raw URL through. covers both "scheme://..." and opaque "scheme:..." forms.
|
||||
BORGSTORE_SCHEMES = ("sftp", "http", "https", "s3", "b2", "rclone")
|
||||
|
||||
# path may contain any chars. to avoid ambiguities with other regexes, it must not start with
|
||||
# "//", a "scheme://" or one of the borgstore "scheme:" specifiers (all of which are matched
|
||||
# before local_re in _parse). the borgstore scheme list is sourced from BORGSTORE_SCHEMES.
|
||||
local_path_re = r"(?!(//|(?:ssh|file)://|(?:" + "|".join(BORGSTORE_SCHEMES) + r"):))" r"(?P<path>.+)"
|
||||
|
||||
# abs_path must start with a slash (or drive letter on Windows).
|
||||
abs_path_re = r"(?P<path>[A-Za-z]:/.+)" if is_win32 else r"(?P<path>/.+)"
|
||||
|
|
@ -541,9 +552,14 @@ class Location:
|
|||
# path may or may not start with a slash.
|
||||
abs_or_rel_path_re = r"(?P<path>.+)"
|
||||
|
||||
# regexes for misc. kinds of supported location specifiers:
|
||||
ssh_or_sftp_re = re.compile(
|
||||
r"(?P<proto>(ssh|sftp))://"
|
||||
# We only parse out individual fields (user/host/port/path) for the protocols where borg
|
||||
# itself needs them: legacy "ssh" (v1 repositories) and "rest" (for the ssh tunnel + FILE
|
||||
# backend), plus local "file" paths. Everything else (see BORGSTORE_SCHEMES) is handed to
|
||||
# borgstore as the raw URL and parsed/validated there - we only detect the scheme.
|
||||
|
||||
# ssh:// is only used for legacy borg 1.x repositories nowadays.
|
||||
ssh_re = re.compile(
|
||||
r"(?P<proto>ssh)://"
|
||||
+ optional_user_re
|
||||
+ host_re
|
||||
+ optional_port_re
|
||||
|
|
@ -565,39 +581,8 @@ class Location:
|
|||
re.VERBOSE,
|
||||
)
|
||||
|
||||
# BorgStore REST server
|
||||
# (http|https)://user:pass@host:port/
|
||||
http_re = re.compile(
|
||||
r"(?P<proto>http|https)://"
|
||||
+ r"((?P<user>[^:@]+):(?P<pass>[^@]+)@)?"
|
||||
+ host_re
|
||||
+ optional_port_re
|
||||
+ r"(?P<path>/)",
|
||||
re.VERBOSE,
|
||||
)
|
||||
|
||||
# (s3|b2):[(profile|(access_key_id:access_key_secret))@][scheme://hostname[:port]]/bucket/path
|
||||
s3_re = re.compile(
|
||||
r"""
|
||||
(?P<s3type>(s3|b2)):
|
||||
((
|
||||
(?P<profile>[^@:]+) # profile (no colons allowed)
|
||||
|
|
||||
(?P<access_key_id>[^:@]+):(?P<access_key_secret>[^@]+) # access key and secret
|
||||
)@)? # optional authentication
|
||||
(
|
||||
[^:/]+:// # scheme (often https)
|
||||
(?P<hostname>[^:/]+)
|
||||
(:(?P<port>\d+))?
|
||||
)? # optional endpoint
|
||||
/
|
||||
(?P<bucket>[^/]+)/ # bucket name
|
||||
(?P<path>.+) # path
|
||||
""",
|
||||
re.VERBOSE,
|
||||
)
|
||||
|
||||
rclone_re = re.compile(r"(?P<proto>rclone):(?P<path>(.*))", re.VERBOSE)
|
||||
# scheme detector for the borgstore-handled locations listed in BORGSTORE_SCHEMES above.
|
||||
scheme_re = re.compile(r"(?P<scheme>[a-zA-Z][a-zA-Z0-9+.\-]*):")
|
||||
|
||||
sl = "/" if is_win32 else ""
|
||||
file_re = re.compile(r"(?P<proto>file)://" + sl + abs_path_re, re.VERBOSE)
|
||||
|
|
@ -636,7 +621,7 @@ class Location:
|
|||
raise ValueError('Invalid location format: "%s"' % self.processed)
|
||||
|
||||
def _parse(self, text):
|
||||
m = self.ssh_or_sftp_re.match(text)
|
||||
m = self.ssh_re.match(text)
|
||||
if m:
|
||||
self.proto = m.group("proto")
|
||||
self.user = m.group("user")
|
||||
|
|
@ -652,33 +637,16 @@ class Location:
|
|||
self.port = m.group("port") and int(m.group("port")) or None
|
||||
self.path = os.path.normpath(m.group("path"))
|
||||
return True
|
||||
m = self.http_re.match(text)
|
||||
if m:
|
||||
self.proto = m.group("proto")
|
||||
self.user = m.group("user")
|
||||
self._pass = True if m.group("pass") else False
|
||||
self._host = m.group("host")
|
||||
self.port = m.group("port") and int(m.group("port")) or None
|
||||
self.path = m.group("path")
|
||||
return True
|
||||
m = self.rclone_re.match(text)
|
||||
if m:
|
||||
self.proto = m.group("proto")
|
||||
self.path = m.group("path")
|
||||
return True
|
||||
m = self.file_re.match(text)
|
||||
if m:
|
||||
self.proto = m.group("proto")
|
||||
self.path = os.path.normpath(m.group("path"))
|
||||
return True
|
||||
m = self.s3_re.match(text)
|
||||
if m:
|
||||
self.proto = m.group("s3type")
|
||||
self.user = m.group("profile") if m.group("profile") else m.group("access_key_id")
|
||||
self._pass = True if m.group("access_key_secret") else False
|
||||
self._host = m.group("hostname")
|
||||
self.port = m.group("port") and int(m.group("port")) or None
|
||||
self.path = m.group("bucket") + "/" + m.group("path")
|
||||
m = self.scheme_re.match(text)
|
||||
if m and m.group("scheme") in self.BORGSTORE_SCHEMES:
|
||||
# borgstore parses/validates these; we only detect the scheme and pass the raw
|
||||
# URL (self.processed) through to it - no fields are extracted here.
|
||||
self.proto = m.group("scheme")
|
||||
return True
|
||||
m = self.local_re.match(text)
|
||||
if m:
|
||||
|
|
@ -711,9 +679,7 @@ class Location:
|
|||
def canonical_path(self):
|
||||
if self.proto == "file":
|
||||
return self.path
|
||||
if self.proto == "rclone":
|
||||
return f"{self.proto}:{self.path}"
|
||||
if self.proto in ("rest", "sftp", "ssh", "s3", "b2", "http", "https"):
|
||||
if self.proto in ("rest", "ssh"):
|
||||
return (
|
||||
f"{self.proto}://"
|
||||
f"{(self.user + '@') if self.user else ''}"
|
||||
|
|
@ -721,6 +687,10 @@ class Location:
|
|||
f"{(':' + str(self.port)) if self.port else ''}/"
|
||||
f"{self.path}"
|
||||
)
|
||||
if self.proto in self.BORGSTORE_SCHEMES:
|
||||
# borgstore-handled locations: use the raw (processed) URL as given, but strip any
|
||||
# embedded credentials so we never write secrets to the security state file or logs.
|
||||
return _redact_url_credentials(self.processed)
|
||||
raise NotImplementedError(self.proto)
|
||||
|
||||
def with_timestamp(self, timestamp):
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ import os
|
|||
import queue
|
||||
import select
|
||||
import shlex
|
||||
import socket
|
||||
import sys
|
||||
import textwrap
|
||||
import time
|
||||
|
|
@ -23,7 +22,6 @@ from ..helpers import replace_placeholders
|
|||
from ..helpers import sysinfo
|
||||
from ..helpers import format_file_size
|
||||
from ..helpers import prepare_subprocess_env, ignore_sigint
|
||||
from ..helpers import get_socket_filename
|
||||
from ..fslocking import LockTimeout, NotLocked, NotMyLock, LockFailed
|
||||
from ..logger import create_logger, borg_serve_log_queue
|
||||
from ..helpers import msgpack
|
||||
|
|
@ -255,7 +253,7 @@ class LegacyRemoteRepository:
|
|||
self.upload_buffer_size_limit = args.upload_buffer * 1024 * 1024 if args and args.upload_buffer else 0
|
||||
self.unpacker = get_limited_unpacker("client")
|
||||
self.server_version = None # we update this after server sends its version
|
||||
self.p = self.sock = None
|
||||
self.p = None
|
||||
self._args = args
|
||||
if self.location.proto == "ssh":
|
||||
testing = location.host == "__testsuite__"
|
||||
|
|
@ -281,25 +279,6 @@ class LegacyRemoteRepository:
|
|||
self.stderr_fd = self.p.stderr.fileno()
|
||||
self.r_fds = [self.stdout_fd, self.stderr_fd]
|
||||
self.x_fds = [self.stdin_fd, self.stdout_fd, self.stderr_fd]
|
||||
elif self.location.proto == "socket":
|
||||
if args.use_socket is False or args.use_socket is True: # nothing or --socket
|
||||
socket_path = get_socket_filename()
|
||||
else: # --socket=/some/path
|
||||
socket_path = args.use_socket
|
||||
self.sock = socket.socket(family=socket.AF_UNIX, type=socket.SOCK_STREAM)
|
||||
try:
|
||||
self.sock.connect(socket_path) # note: socket_path length is rather limited.
|
||||
except FileNotFoundError:
|
||||
self.sock = None
|
||||
raise Error(f"The socket file {socket_path} does not exist.")
|
||||
except ConnectionRefusedError:
|
||||
self.sock = None
|
||||
raise Error(f"There is no borg serve running for the socket file {socket_path}.")
|
||||
self.stdin_fd = self.sock.makefile("wb").fileno()
|
||||
self.stdout_fd = self.sock.makefile("rb").fileno()
|
||||
self.stderr_fd = None
|
||||
self.r_fds = [self.stdout_fd]
|
||||
self.x_fds = [self.stdin_fd, self.stdout_fd]
|
||||
else:
|
||||
raise Error(f"Unsupported protocol {location.proto}")
|
||||
|
||||
|
|
@ -339,7 +318,7 @@ class LegacyRemoteRepository:
|
|||
def __del__(self):
|
||||
if len(self.responses):
|
||||
logging.debug("still %d cached responses left in LegacyRemoteRepository" % (len(self.responses),))
|
||||
if self.p or self.sock:
|
||||
if self.p:
|
||||
self.close()
|
||||
assert False, "cleanup happened in LegacyRemoteRepository.__del__"
|
||||
|
||||
|
|
@ -661,21 +640,12 @@ class LegacyRemoteRepository:
|
|||
"""actual remoting is done via self.call in the @api decorator"""
|
||||
|
||||
def close(self):
|
||||
if self.p or self.sock:
|
||||
self.call("close", {}, wait=True)
|
||||
if self.p:
|
||||
self.call("close", {}, wait=True)
|
||||
self.p.stdin.close()
|
||||
self.p.stdout.close()
|
||||
self.p.wait()
|
||||
self.p = None
|
||||
if self.sock:
|
||||
try:
|
||||
self.sock.shutdown(socket.SHUT_RDWR)
|
||||
except OSError as e:
|
||||
if e.errno != errno.ENOTCONN:
|
||||
raise
|
||||
self.sock.close()
|
||||
self.sock = None
|
||||
|
||||
def async_response(self, wait=True):
|
||||
for resp in self.call_many("async_responses", calls=[], wait=True, async_wait=wait):
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ Logging setup in Borg needs to work under various conditions:
|
|||
- Tests: potentially running in parallel via pytest-xdist, capturing Borg output into a
|
||||
given stream.
|
||||
- Logging might be short-lived (e.g., when invoking a single Borg command via the CLI)
|
||||
or long-lived (e.g., borg serve --socket or when running the tests).
|
||||
or long-lived (e.g., borg serve or when running the tests).
|
||||
- Logging is global and exists only once per process.
|
||||
"""
|
||||
|
||||
|
|
|
|||
|
|
@ -198,58 +198,51 @@ class TestLocationWithoutEnv:
|
|||
== "Location(proto='rest', user=None, pass=None, host=None, port=None, path='/absolute/path')"
|
||||
)
|
||||
|
||||
# For the protocols handled (parsed + validated) by borgstore itself, borg only detects
|
||||
# the scheme and passes the raw URL through; it no longer extracts user/host/port/path.
|
||||
|
||||
def test_s3(self, monkeypatch):
|
||||
monkeypatch.delenv("BORG_REPO", raising=False)
|
||||
assert (
|
||||
repr(Location("s3:/test/path"))
|
||||
== "Location(proto='s3', user=None, pass=None, host=None, port=None, path='test/path')"
|
||||
loc = Location("s3:/test/path")
|
||||
assert loc.proto == "s3"
|
||||
assert (loc.user, loc.host, loc.port, loc.path) == (None, None, None, None)
|
||||
assert loc.processed == "s3:/test/path"
|
||||
# credentials in the URL are stripped from canonical_path (security state file / logs)
|
||||
assert Location("s3:profile@http://172.28.52.116:9000/test/path").canonical_path() == (
|
||||
"s3:http://172.28.52.116:9000/test/path"
|
||||
)
|
||||
assert (
|
||||
repr(Location("s3:profile@http://172.28.52.116:9000/test/path"))
|
||||
== "Location(proto='s3', user='profile', pass=None, host='172.28.52.116', port=9000, path='test/path')" # noqa: E501
|
||||
assert Location("s3:user:pass@http://172.28.52.116:9000/test/path").canonical_path() == (
|
||||
"s3:http://172.28.52.116:9000/test/path"
|
||||
)
|
||||
assert (
|
||||
repr(Location("s3:user:pass@http://172.28.52.116:9000/test/path"))
|
||||
== "Location(proto='s3', user='user', pass='REDACTED', host='172.28.52.116', port=9000, path='test/path')" # noqa: E501
|
||||
)
|
||||
assert (
|
||||
repr(Location("b2:user:pass@https://s3.us-east-005.backblazeb2.com/test/path"))
|
||||
== "Location(proto='b2', user='user', pass='REDACTED', host='s3.us-east-005.backblazeb2.com', port=None, path='test/path')" # noqa: E501
|
||||
assert Location("b2:user:pass@https://s3.us-east-005.backblazeb2.com/test/path").canonical_path() == (
|
||||
"b2:https://s3.us-east-005.backblazeb2.com/test/path"
|
||||
)
|
||||
|
||||
def test_rclone(self, monkeypatch):
|
||||
monkeypatch.delenv("BORG_REPO", raising=False)
|
||||
assert (
|
||||
repr(Location("rclone:remote:path"))
|
||||
== "Location(proto='rclone', user=None, pass=None, host=None, port=None, path='remote:path')"
|
||||
)
|
||||
loc = Location("rclone:remote:path")
|
||||
assert loc.proto == "rclone"
|
||||
assert (loc.user, loc.host, loc.port, loc.path) == (None, None, None, None)
|
||||
assert loc.processed == "rclone:remote:path"
|
||||
assert loc.canonical_path() == "rclone:remote:path"
|
||||
|
||||
def test_sftp(self, monkeypatch):
|
||||
monkeypatch.delenv("BORG_REPO", raising=False)
|
||||
# relative path
|
||||
assert (
|
||||
repr(Location("sftp://user@host:1234/rel/path"))
|
||||
== "Location(proto='sftp', user='user', pass=None, host='host', port=1234, path='rel/path')"
|
||||
)
|
||||
# absolute path
|
||||
assert (
|
||||
repr(Location("sftp://user@host:1234//abs/path"))
|
||||
== "Location(proto='sftp', user='user', pass=None, host='host', port=1234, path='/abs/path')"
|
||||
)
|
||||
loc = Location("sftp://user@host:1234/rel/path")
|
||||
assert loc.proto == "sftp"
|
||||
assert (loc.user, loc.host, loc.port, loc.path) == (None, None, None, None)
|
||||
assert loc.processed == "sftp://user@host:1234/rel/path"
|
||||
# credentials stripped from canonical_path
|
||||
assert loc.canonical_path() == "sftp://host:1234/rel/path"
|
||||
|
||||
def test_http(self, monkeypatch):
|
||||
monkeypatch.delenv("BORG_REPO", raising=False)
|
||||
assert (
|
||||
repr(Location("http://user:pass@host:1234/"))
|
||||
== "Location(proto='http', user='user', pass='REDACTED', host='host', port=1234, path='/')"
|
||||
)
|
||||
|
||||
def test_socket(self, monkeypatch):
|
||||
monkeypatch.delenv("BORG_REPO", raising=False)
|
||||
# socket:// is no longer supported and must be rejected as an invalid location.
|
||||
url = "socket:///c:/repo/path" if is_win32 else "socket:///repo/path"
|
||||
with pytest.raises(ValueError):
|
||||
Location(url)
|
||||
loc = Location("http://user:pass@host:1234/")
|
||||
assert loc.proto == "http"
|
||||
assert (loc.user, loc.host, loc.port, loc.path) == (None, None, None, None)
|
||||
assert loc.processed == "http://user:pass@host:1234/"
|
||||
# credentials stripped from canonical_path
|
||||
assert loc.canonical_path() == "http://host:1234/"
|
||||
|
||||
def test_file(self, monkeypatch):
|
||||
monkeypatch.delenv("BORG_REPO", raising=False)
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ def reopen(repository, exclusive: bool | None = True, create=False):
|
|||
return LegacyRepository(repository.path, exclusive=exclusive, create=create)
|
||||
|
||||
if isinstance(repository, LegacyRemoteRepository):
|
||||
if repository.p is not None or repository.sock is not None:
|
||||
if repository.p is not None:
|
||||
raise RuntimeError("Remote repo must be closed before a reopen. Cannot support nested repository contexts.")
|
||||
return LegacyRemoteRepository(repository.location, exclusive=exclusive, create=create)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue