Merge pull request #9754 from ThomasWaldmann/simplify-location-parsing
Some checks are pending
Lint / lint (push) Waiting to run
CI / lint (push) Waiting to run
CI / security (push) Waiting to run
CI / asan_ubsan (push) Blocked by required conditions
CI / native_tests (push) Blocked by required conditions
CI / vm_tests (NetBSD, false, netbsd, 10.1) (push) Blocked by required conditions
CI / vm_tests (OmniOS, false, omnios, r151056) (push) Blocked by required conditions
CI / vm_tests (OpenBSD, false, openbsd, 7.8) (push) Blocked by required conditions
CI / vm_tests (borg-freebsd-14-x86_64-gh, FreeBSD, true, freebsd, 14.3) (push) Blocked by required conditions
CI / windows_tests (push) Blocked by required conditions
CodeQL / Analyze (push) Waiting to run

Simplify location parsing, remove socket: remainders
This commit is contained in:
TW 2026-06-11 10:48:23 +02:00 committed by GitHub
commit c710edf0df
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 84 additions and 148 deletions

View file

@ -205,6 +205,14 @@ Fixes:
Other changes:
- Location: simplify parsing/validation, #9678.
For sftp/http(s)/s3/b2/rclone repositories, borg now only detects the scheme and hands the raw
URL to borgstore, which parses and validates it (removing the duplicate parsing borg did before).
Note: for these repositories the canonical location string changed slightly, so on the first run
against an existing such repository borg may warn once that it "was previously located at ..." -
this is harmless and can be confirmed.
- remove leftover socket: protocol code (the unix-socket transport and the --socket option were
never part of a stable borg 2 release).
- keyfile: name key files by sha256(keyfile_contents).
Existing legacy-named keyfiles continue to work.
- repokey: use same format as with external keyfile

View file

@ -17,5 +17,4 @@
--upload-buffer UPLOAD_BUFFER set network upload buffer size in MiB. (default: 0=no buffer)
--debug-profile FILE Write execution profile in Borg format into FILE. For local use a Python-compatible file can be generated by suffixing FILE with ".pyprof".
--rsh RSH Use this command to connect to the 'borg serve' process (default: 'ssh')
--socket PATH Use UNIX DOMAIN (IPC) socket at PATH for client/server communication with socket: protocol.
-r REPO, --repo REPO repository to use

View file

@ -56,7 +56,7 @@ class CreateMixIn:
except OSError:
pass
# Add local repository dir to inode_skip list
if not args.location.host:
if args.location.proto == "file":
try:
st = os.stat(args.location.path)
skip_inodes.add((st.st_ino, st.st_dev))

View file

@ -17,7 +17,7 @@ from .errors import RTError, PathNotAllowed, modern_ec
from .errors import BorgWarning, FileChangedWarning, BackupWarning, IncludePatternNeverMatchedWarning
from .errors import BackupError, BackupOSError, BackupRaceConditionError, BackupItemExcluded
from .errors import BackupPermissionError, BackupIOError, BackupFileNotFoundError
from .fs import ensure_dir, join_base_dir, get_socket_filename
from .fs import ensure_dir, join_base_dir
from .fs import get_security_dir, get_keys_dir, get_base_dir, get_cache_dir, get_config_dir, get_runtime_dir
from .fs import dir_is_tagged, dir_is_cachedir, remove_dotdot_prefixes, make_path_safe, scandir_inorder
from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, get_strip_prefix, umount, slashify

View file

@ -169,10 +169,6 @@ def get_runtime_dir(*, legacy=False, create=True):
return runtime_dir
def get_socket_filename():
return str(Path(get_runtime_dir()) / "borg.sock")
def get_cache_dir(*, legacy=False, create=True):
"""Determine where to store Borg cache data."""

View file

@ -503,6 +503,15 @@ def parse_stringified_list(s):
return [item for item in items if item != ""]
def _redact_url_credentials(url):
"""Remove embedded credentials from a repository URL for safe display/identity use."""
# netloc style: scheme://user[:pass]@host... -> scheme://host...
url = re.sub(r"(://)[^/@]+@", r"\1", url)
# s3/b2 style: (s3|b2):profile@... or (s3|b2):key:secret@... -> (s3|b2):...
url = re.sub(r"^((?:s3|b2):)[^@/]+@", r"\1", url)
return url
class Location:
"""Object representing a repository location"""
@ -528,12 +537,14 @@ class Location:
# :port (optional)
optional_port_re = r"(?::(?P<port>\d+))?"
# path may contain any chars. to avoid ambiguities with other regexes,
# it must not start with "//" nor with "scheme://" nor with "rclone:".
local_path_re = r"""
(?!(//|(ssh|socket|sftp|file)://|(rclone|s3|b2):))
(?P<path>.+)
"""
# locations that borgstore parses and validates itself - borg only detects the scheme and
# passes the raw URL through. covers both "scheme://..." and opaque "scheme:..." forms.
BORGSTORE_SCHEMES = ("sftp", "http", "https", "s3", "b2", "rclone")
# path may contain any chars. to avoid ambiguities with other regexes, it must not start with
# "//", a "scheme://" or one of the borgstore "scheme:" specifiers (all of which are matched
# before local_re in _parse). the borgstore scheme list is sourced from BORGSTORE_SCHEMES.
local_path_re = r"(?!(//|(?:ssh|file)://|(?:" + "|".join(BORGSTORE_SCHEMES) + r"):))" r"(?P<path>.+)"
# abs_path must start with a slash (or drive letter on Windows).
abs_path_re = r"(?P<path>[A-Za-z]:/.+)" if is_win32 else r"(?P<path>/.+)"
@ -541,9 +552,14 @@ class Location:
# path may or may not start with a slash.
abs_or_rel_path_re = r"(?P<path>.+)"
# regexes for misc. kinds of supported location specifiers:
ssh_or_sftp_re = re.compile(
r"(?P<proto>(ssh|sftp))://"
# We only parse out individual fields (user/host/port/path) for the protocols where borg
# itself needs them: legacy "ssh" (v1 repositories) and "rest" (for the ssh tunnel + FILE
# backend), plus local "file" paths. Everything else (see BORGSTORE_SCHEMES) is handed to
# borgstore as the raw URL and parsed/validated there - we only detect the scheme.
# ssh:// is only used for legacy borg 1.x repositories nowadays.
ssh_re = re.compile(
r"(?P<proto>ssh)://"
+ optional_user_re
+ host_re
+ optional_port_re
@ -565,39 +581,8 @@ class Location:
re.VERBOSE,
)
# BorgStore REST server
# (http|https)://user:pass@host:port/
http_re = re.compile(
r"(?P<proto>http|https)://"
+ r"((?P<user>[^:@]+):(?P<pass>[^@]+)@)?"
+ host_re
+ optional_port_re
+ r"(?P<path>/)",
re.VERBOSE,
)
# (s3|b2):[(profile|(access_key_id:access_key_secret))@][scheme://hostname[:port]]/bucket/path
s3_re = re.compile(
r"""
(?P<s3type>(s3|b2)):
((
(?P<profile>[^@:]+) # profile (no colons allowed)
|
(?P<access_key_id>[^:@]+):(?P<access_key_secret>[^@]+) # access key and secret
)@)? # optional authentication
(
[^:/]+:// # scheme (often https)
(?P<hostname>[^:/]+)
(:(?P<port>\d+))?
)? # optional endpoint
/
(?P<bucket>[^/]+)/ # bucket name
(?P<path>.+) # path
""",
re.VERBOSE,
)
rclone_re = re.compile(r"(?P<proto>rclone):(?P<path>(.*))", re.VERBOSE)
# scheme detector for the borgstore-handled locations listed in BORGSTORE_SCHEMES above.
scheme_re = re.compile(r"(?P<scheme>[a-zA-Z][a-zA-Z0-9+.\-]*):")
sl = "/" if is_win32 else ""
file_re = re.compile(r"(?P<proto>file)://" + sl + abs_path_re, re.VERBOSE)
@ -636,7 +621,7 @@ class Location:
raise ValueError('Invalid location format: "%s"' % self.processed)
def _parse(self, text):
m = self.ssh_or_sftp_re.match(text)
m = self.ssh_re.match(text)
if m:
self.proto = m.group("proto")
self.user = m.group("user")
@ -652,33 +637,16 @@ class Location:
self.port = m.group("port") and int(m.group("port")) or None
self.path = os.path.normpath(m.group("path"))
return True
m = self.http_re.match(text)
if m:
self.proto = m.group("proto")
self.user = m.group("user")
self._pass = True if m.group("pass") else False
self._host = m.group("host")
self.port = m.group("port") and int(m.group("port")) or None
self.path = m.group("path")
return True
m = self.rclone_re.match(text)
if m:
self.proto = m.group("proto")
self.path = m.group("path")
return True
m = self.file_re.match(text)
if m:
self.proto = m.group("proto")
self.path = os.path.normpath(m.group("path"))
return True
m = self.s3_re.match(text)
if m:
self.proto = m.group("s3type")
self.user = m.group("profile") if m.group("profile") else m.group("access_key_id")
self._pass = True if m.group("access_key_secret") else False
self._host = m.group("hostname")
self.port = m.group("port") and int(m.group("port")) or None
self.path = m.group("bucket") + "/" + m.group("path")
m = self.scheme_re.match(text)
if m and m.group("scheme") in self.BORGSTORE_SCHEMES:
# borgstore parses/validates these; we only detect the scheme and pass the raw
# URL (self.processed) through to it - no fields are extracted here.
self.proto = m.group("scheme")
return True
m = self.local_re.match(text)
if m:
@ -711,9 +679,7 @@ class Location:
def canonical_path(self):
if self.proto == "file":
return self.path
if self.proto == "rclone":
return f"{self.proto}:{self.path}"
if self.proto in ("rest", "sftp", "ssh", "s3", "b2", "http", "https"):
if self.proto in ("rest", "ssh"):
return (
f"{self.proto}://"
f"{(self.user + '@') if self.user else ''}"
@ -721,6 +687,10 @@ class Location:
f"{(':' + str(self.port)) if self.port else ''}/"
f"{self.path}"
)
if self.proto in self.BORGSTORE_SCHEMES:
# borgstore-handled locations: use the raw (processed) URL as given, but strip any
# embedded credentials so we never write secrets to the security state file or logs.
return _redact_url_credentials(self.processed)
raise NotImplementedError(self.proto)
def with_timestamp(self, timestamp):

View file

@ -6,7 +6,6 @@ import os
import queue
import select
import shlex
import socket
import sys
import textwrap
import time
@ -23,7 +22,6 @@ from ..helpers import replace_placeholders
from ..helpers import sysinfo
from ..helpers import format_file_size
from ..helpers import prepare_subprocess_env, ignore_sigint
from ..helpers import get_socket_filename
from ..fslocking import LockTimeout, NotLocked, NotMyLock, LockFailed
from ..logger import create_logger, borg_serve_log_queue
from ..helpers import msgpack
@ -255,7 +253,7 @@ class LegacyRemoteRepository:
self.upload_buffer_size_limit = args.upload_buffer * 1024 * 1024 if args and args.upload_buffer else 0
self.unpacker = get_limited_unpacker("client")
self.server_version = None # we update this after server sends its version
self.p = self.sock = None
self.p = None
self._args = args
if self.location.proto == "ssh":
testing = location.host == "__testsuite__"
@ -281,25 +279,6 @@ class LegacyRemoteRepository:
self.stderr_fd = self.p.stderr.fileno()
self.r_fds = [self.stdout_fd, self.stderr_fd]
self.x_fds = [self.stdin_fd, self.stdout_fd, self.stderr_fd]
elif self.location.proto == "socket":
if args.use_socket is False or args.use_socket is True: # nothing or --socket
socket_path = get_socket_filename()
else: # --socket=/some/path
socket_path = args.use_socket
self.sock = socket.socket(family=socket.AF_UNIX, type=socket.SOCK_STREAM)
try:
self.sock.connect(socket_path) # note: socket_path length is rather limited.
except FileNotFoundError:
self.sock = None
raise Error(f"The socket file {socket_path} does not exist.")
except ConnectionRefusedError:
self.sock = None
raise Error(f"There is no borg serve running for the socket file {socket_path}.")
self.stdin_fd = self.sock.makefile("wb").fileno()
self.stdout_fd = self.sock.makefile("rb").fileno()
self.stderr_fd = None
self.r_fds = [self.stdout_fd]
self.x_fds = [self.stdin_fd, self.stdout_fd]
else:
raise Error(f"Unsupported protocol {location.proto}")
@ -339,7 +318,7 @@ class LegacyRemoteRepository:
def __del__(self):
if len(self.responses):
logging.debug("still %d cached responses left in LegacyRemoteRepository" % (len(self.responses),))
if self.p or self.sock:
if self.p:
self.close()
assert False, "cleanup happened in LegacyRemoteRepository.__del__"
@ -661,21 +640,12 @@ class LegacyRemoteRepository:
"""actual remoting is done via self.call in the @api decorator"""
def close(self):
if self.p or self.sock:
self.call("close", {}, wait=True)
if self.p:
self.call("close", {}, wait=True)
self.p.stdin.close()
self.p.stdout.close()
self.p.wait()
self.p = None
if self.sock:
try:
self.sock.shutdown(socket.SHUT_RDWR)
except OSError as e:
if e.errno != errno.ENOTCONN:
raise
self.sock.close()
self.sock = None
def async_response(self, wait=True):
for resp in self.call_many("async_responses", calls=[], wait=True, async_wait=wait):

View file

@ -44,7 +44,7 @@ Logging setup in Borg needs to work under various conditions:
- Tests: potentially running in parallel via pytest-xdist, capturing Borg output into a
given stream.
- Logging might be short-lived (e.g., when invoking a single Borg command via the CLI)
or long-lived (e.g., borg serve --socket or when running the tests).
or long-lived (e.g., borg serve or when running the tests).
- Logging is global and exists only once per process.
"""

View file

@ -198,58 +198,51 @@ class TestLocationWithoutEnv:
== "Location(proto='rest', user=None, pass=None, host=None, port=None, path='/absolute/path')"
)
# For the protocols handled (parsed + validated) by borgstore itself, borg only detects
# the scheme and passes the raw URL through; it no longer extracts user/host/port/path.
def test_s3(self, monkeypatch):
monkeypatch.delenv("BORG_REPO", raising=False)
assert (
repr(Location("s3:/test/path"))
== "Location(proto='s3', user=None, pass=None, host=None, port=None, path='test/path')"
loc = Location("s3:/test/path")
assert loc.proto == "s3"
assert (loc.user, loc.host, loc.port, loc.path) == (None, None, None, None)
assert loc.processed == "s3:/test/path"
# credentials in the URL are stripped from canonical_path (security state file / logs)
assert Location("s3:profile@http://172.28.52.116:9000/test/path").canonical_path() == (
"s3:http://172.28.52.116:9000/test/path"
)
assert (
repr(Location("s3:profile@http://172.28.52.116:9000/test/path"))
== "Location(proto='s3', user='profile', pass=None, host='172.28.52.116', port=9000, path='test/path')" # noqa: E501
assert Location("s3:user:pass@http://172.28.52.116:9000/test/path").canonical_path() == (
"s3:http://172.28.52.116:9000/test/path"
)
assert (
repr(Location("s3:user:pass@http://172.28.52.116:9000/test/path"))
== "Location(proto='s3', user='user', pass='REDACTED', host='172.28.52.116', port=9000, path='test/path')" # noqa: E501
)
assert (
repr(Location("b2:user:pass@https://s3.us-east-005.backblazeb2.com/test/path"))
== "Location(proto='b2', user='user', pass='REDACTED', host='s3.us-east-005.backblazeb2.com', port=None, path='test/path')" # noqa: E501
assert Location("b2:user:pass@https://s3.us-east-005.backblazeb2.com/test/path").canonical_path() == (
"b2:https://s3.us-east-005.backblazeb2.com/test/path"
)
def test_rclone(self, monkeypatch):
monkeypatch.delenv("BORG_REPO", raising=False)
assert (
repr(Location("rclone:remote:path"))
== "Location(proto='rclone', user=None, pass=None, host=None, port=None, path='remote:path')"
)
loc = Location("rclone:remote:path")
assert loc.proto == "rclone"
assert (loc.user, loc.host, loc.port, loc.path) == (None, None, None, None)
assert loc.processed == "rclone:remote:path"
assert loc.canonical_path() == "rclone:remote:path"
def test_sftp(self, monkeypatch):
monkeypatch.delenv("BORG_REPO", raising=False)
# relative path
assert (
repr(Location("sftp://user@host:1234/rel/path"))
== "Location(proto='sftp', user='user', pass=None, host='host', port=1234, path='rel/path')"
)
# absolute path
assert (
repr(Location("sftp://user@host:1234//abs/path"))
== "Location(proto='sftp', user='user', pass=None, host='host', port=1234, path='/abs/path')"
)
loc = Location("sftp://user@host:1234/rel/path")
assert loc.proto == "sftp"
assert (loc.user, loc.host, loc.port, loc.path) == (None, None, None, None)
assert loc.processed == "sftp://user@host:1234/rel/path"
# credentials stripped from canonical_path
assert loc.canonical_path() == "sftp://host:1234/rel/path"
def test_http(self, monkeypatch):
monkeypatch.delenv("BORG_REPO", raising=False)
assert (
repr(Location("http://user:pass@host:1234/"))
== "Location(proto='http', user='user', pass='REDACTED', host='host', port=1234, path='/')"
)
def test_socket(self, monkeypatch):
monkeypatch.delenv("BORG_REPO", raising=False)
# socket:// is no longer supported and must be rejected as an invalid location.
url = "socket:///c:/repo/path" if is_win32 else "socket:///repo/path"
with pytest.raises(ValueError):
Location(url)
loc = Location("http://user:pass@host:1234/")
assert loc.proto == "http"
assert (loc.user, loc.host, loc.port, loc.path) == (None, None, None, None)
assert loc.processed == "http://user:pass@host:1234/"
# credentials stripped from canonical_path
assert loc.canonical_path() == "http://host:1234/"
def test_file(self, monkeypatch):
monkeypatch.delenv("BORG_REPO", raising=False)

View file

@ -51,7 +51,7 @@ def reopen(repository, exclusive: bool | None = True, create=False):
return LegacyRepository(repository.path, exclusive=exclusive, create=create)
if isinstance(repository, LegacyRemoteRepository):
if repository.p is not None or repository.sock is not None:
if repository.p is not None:
raise RuntimeError("Remote repo must be closed before a reopen. Cannot support nested repository contexts.")
return LegacyRemoteRepository(repository.location, exclusive=exclusive, create=create)