redo stale lock handling, fixes #3986

drop BORG_HOSTNAME_IS_UNIQUE (please use BORG_HOST_ID if needed) borg now always assumes it has a unique hostid - either automatically from fqdn plus uuid.getnode() or overridden via BORG_HOST_ID.
2026-05-28 04:03:21 -04:00 · 2019-03-03 15:51:40 +01:00 · 2019-03-03 15:51:40 +01:00 · 7ad5290501
commit 7ad5290501
parent 92e11833db
9 changed files with 32 additions and 30 deletions
--- a/docs/changes.rst
+++ b/docs/changes.rst
@ -205,6 +205,13 @@ Compatibility notes:
  - option "--list-format" (2017-10), use "--format"
  - option "--ignore-inode" (2017-09), use "--files-cache" w/o "inode"
  - option "--no-files-cache" (2017-09), use "--files-cache=disabled"
+- removed BORG_HOSTNAME_IS_UNIQUE env var.
+  to use borg you must implement one of these 2 scenarios:
+  - 1) the combination of FQDN and result of uuid.getnode() must be unique
+       and stable (this should be the case for almost everybody, except when
+       having duplicate FQDN *and* MAC address or all-zero MAC address)
+  - 2) if you are aware that 1) is not the case for you, you must set
+       BORG_HOST_ID env var to something unique.

 Fixes:

--- a/docs/usage/serve.rst
+++ b/docs/usage/serve.rst
@ -12,7 +12,7 @@ that it is also ``borg serve`` and enforce path restriction(s) as given by the
 forced command. That way, other options given by the client (like ``--info`` or
 ``--umask``) are preserved (and are not fixed by the forced command).

-Environment variables (such as BORG_HOSTNAME_IS_UNIQUE) contained in the original
+Environment variables (such as BORG_XXX) contained in the original
 command sent by the client are *not* interpreted, but ignored. If BORG_XXX environment
 variables should be set on the ``borg serve`` side, then these must be set in system-specific
 locations like ``/etc/environment`` or in the forced command itself (example below).
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@ -19,7 +19,7 @@ from .helpers import get_cache_dir, get_security_dir
 from .helpers import int_to_bigint, bigint_to_int, bin_to_hex, parse_stringified_list
 from .helpers import format_file_size
 from .helpers import safe_ns
-from .helpers import yes, hostname_is_unique
+from .helpers import yes
 from .helpers import remove_surrogates
 from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage
 from .helpers import set_ec, EXIT_WARNING
@ -256,8 +256,7 @@ class CacheConfig:
            config.write(fd)

    def open(self):
-        self.lock = Lock(os.path.join(self.path, 'lock'), exclusive=True, timeout=self.lock_wait,
-                         kill_stale_locks=hostname_is_unique()).acquire()
+        self.lock = Lock(os.path.join(self.path, 'lock'), exclusive=True, timeout=self.lock_wait).acquire()
        self.load()

    def load(self):
--- a/src/borg/helpers/yes.py
+++ b/src/borg/helpers/yes.py
@ -102,7 +102,3 @@ def yes(msg=None, false_msg=None, true_msg=None, default_msg=None,
            output(retry_msg, 'prompt_retry', is_prompt=True)
        # in case we used an environment variable and it gave an invalid answer, do not use it again:
        env_var_override = None
-
-
-def hostname_is_unique():
-    return yes(env_var_override='BORG_HOSTNAME_IS_UNIQUE', prompt=False, env_msg=None, default=True)
--- a/src/borg/locking.py
+++ b/src/borg/locking.py
@ -101,13 +101,13 @@ class ExclusiveLock:
    This makes sure the lock is released again if the block is left, no
    matter how (e.g. if an exception occurred).
    """
-    def __init__(self, path, timeout=None, sleep=None, id=None, kill_stale_locks=False):
+    def __init__(self, path, timeout=None, sleep=None, id=None):
        self.timeout = timeout
        self.sleep = sleep
        self.path = os.path.abspath(path)
        self.id = id or platform.get_process_id()
        self.unique_name = os.path.join(self.path, "%s.%d-%x" % self.id)
-        self.kill_stale_locks = kill_stale_locks
+        self.kill_stale_locks = True
        self.stale_warning_printed = False

    def __enter__(self):
@ -173,7 +173,7 @@ class ExclusiveLock:
            if not self.kill_stale_locks:
                if not self.stale_warning_printed:
                    # Log this at warning level to hint the user at the ability
-                    logger.warning("Found stale lock %s, but not deleting because BORG_HOSTNAME_IS_UNIQUE is False.", name)
+                    logger.warning("Found stale lock %s, but not deleting because self.kill_stale_locks = False.", name)
                    self.stale_warning_printed = True
                return False

@ -223,10 +223,10 @@ class LockRoster:
    Note: you usually should call the methods with an exclusive lock held,
    to avoid conflicting access by multiple threads/processes/machines.
    """
-    def __init__(self, path, id=None, kill_stale_locks=False):
+    def __init__(self, path, id=None):
        self.path = path
        self.id = id or platform.get_process_id()
-        self.kill_stale_locks = kill_stale_locks
+        self.kill_stale_locks = True

    def load(self):
        try:
@ -320,18 +320,18 @@ class Lock:
    This makes sure the lock is released again if the block is left, no
    matter how (e.g. if an exception occurred).
    """
-    def __init__(self, path, exclusive=False, sleep=None, timeout=None, id=None, kill_stale_locks=False):
+    def __init__(self, path, exclusive=False, sleep=None, timeout=None, id=None):
        self.path = path
        self.is_exclusive = exclusive
        self.sleep = sleep
        self.timeout = timeout
        self.id = id or platform.get_process_id()
        # globally keeping track of shared and exclusive lockers:
-        self._roster = LockRoster(path + '.roster', id=id, kill_stale_locks=kill_stale_locks)
+        self._roster = LockRoster(path + '.roster', id=id)
        # an exclusive lock, used for:
        # - holding while doing roster queries / updates
        # - holding while the Lock itself is exclusive
-        self._lock = ExclusiveLock(path + '.exclusive', id=id, timeout=timeout, kill_stale_locks=kill_stale_locks)
+        self._lock = ExclusiveLock(path + '.exclusive', id=id, timeout=timeout)

    def __enter__(self):
        return self.acquire()
--- a/src/borg/remote.py
+++ b/src/borg/remote.py
@ -22,7 +22,6 @@ from .helpers import Error, IntegrityError
 from .helpers import bin_to_hex
 from .helpers import get_base_dir
 from .helpers import get_limited_unpacker
-from .helpers import hostname_is_unique
 from .helpers import replace_placeholders
 from .helpers import sysinfo
 from .helpers import format_file_size
@ -679,8 +678,6 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
            if 'storage_quota' in args and args.storage_quota:
                opts.append('--storage-quota=%s' % args.storage_quota)
        env_vars = []
-        if not hostname_is_unique():
-            env_vars.append('BORG_HOSTNAME_IS_UNIQUE=no')
        if testing:
            return env_vars + [sys.executable, '-m', 'borg.archiver', 'serve'] + opts + self.extra_test_args
        else:  # pragma: no cover
--- a/src/borg/repository.py
+++ b/src/borg/repository.py
@ -17,7 +17,6 @@ from .helpers import Error, ErrorWithTraceback, IntegrityError, format_file_size
 from .helpers import Location
 from .helpers import ProgressIndicatorPercent
 from .helpers import bin_to_hex
-from .helpers import hostname_is_unique
 from .helpers import secure_erase, truncate_and_unlink
 from .helpers import Manifest
 from .helpers import msgpack
@ -390,7 +389,7 @@ class Repository:
        if not os.path.isdir(path):
            raise self.DoesNotExist(path)
        if lock:
-            self.lock = Lock(os.path.join(path, 'lock'), exclusive, timeout=lock_wait, kill_stale_locks=hostname_is_unique()).acquire()
+            self.lock = Lock(os.path.join(path, 'lock'), exclusive, timeout=lock_wait).acquire()
        else:
            self.lock = None
        self.config = ConfigParser(interpolation=None)
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@ -3640,7 +3640,7 @@ def test_get_args():
    # were not forced, environment variables would be interpreted by the shell, but this does not
    # happen for forced commands - we get the verbatim command line and need to deal with env vars.
    args = archiver.get_args(['borg', 'serve', ],
-                             'BORG_HOSTNAME_IS_UNIQUE=yes borg serve --info')
+                             'BORG_FOO=bar borg serve --info')
    assert args.func == archiver.do_serve


--- a/src/borg/testsuite/locking.py
+++ b/src/borg/testsuite/locking.py
@ -67,7 +67,7 @@ class TestExclusiveLock:
        cant_know_if_dead_id = 'foo.bar.example.net', 1, 2

        dead_lock = ExclusiveLock(lockpath, id=dead_id).acquire()
-        with ExclusiveLock(lockpath, id=our_id, kill_stale_locks=True):
+        with ExclusiveLock(lockpath, id=our_id):
            with pytest.raises(NotMyLock):
                dead_lock.release()
        with pytest.raises(NotLocked):
@ -75,7 +75,7 @@ class TestExclusiveLock:

        with ExclusiveLock(lockpath, id=cant_know_if_dead_id):
            with pytest.raises(LockTimeout):
-                ExclusiveLock(lockpath, id=our_id, kill_stale_locks=True, timeout=0.1).acquire()
+                ExclusiveLock(lockpath, id=our_id, timeout=0.1).acquire()

    def test_migrate_lock(self, lockpath):
        old_id, new_id = ID1, ID2
@ -157,7 +157,7 @@ class TestLock:

        dead_lock = Lock(lockpath, id=dead_id, exclusive=True).acquire()
        roster = dead_lock._roster
-        with Lock(lockpath, id=our_id, kill_stale_locks=True):
+        with Lock(lockpath, id=our_id):
            assert roster.get(EXCLUSIVE) == set()
            assert roster.get(SHARED) == {our_id}
        assert roster.get(EXCLUSIVE) == set()
@ -167,7 +167,7 @@ class TestLock:

        with Lock(lockpath, id=cant_know_if_dead_id, exclusive=True):
            with pytest.raises(LockTimeout):
-                Lock(lockpath, id=our_id, kill_stale_locks=True, timeout=0.1).acquire()
+                Lock(lockpath, id=our_id, timeout=0.1).acquire()

    def test_migrate_lock(self, lockpath):
        old_id, new_id = ID1, ID2
@ -217,25 +217,29 @@ class TestLockRoster:
        host, pid, tid = our_id = get_process_id()
        dead_id = host, free_pid, tid

+        # put a dead local process lock into roster
        roster1 = LockRoster(rosterpath, id=dead_id)
+        roster1.kill_stale_locks = False
        assert roster1.get(SHARED) == set()
        roster1.modify(SHARED, ADD)
        assert roster1.get(SHARED) == {dead_id}

+        # put a unknown-state remote process lock into roster
        cant_know_if_dead_id = 'foo.bar.example.net', 1, 2
        roster1 = LockRoster(rosterpath, id=cant_know_if_dead_id)
+        roster1.kill_stale_locks = False
        assert roster1.get(SHARED) == {dead_id}
        roster1.modify(SHARED, ADD)
        assert roster1.get(SHARED) == {dead_id, cant_know_if_dead_id}

-        killer_roster = LockRoster(rosterpath, kill_stale_locks=True)
-        # Did kill the dead processes lock (which was alive ... I guess?!)
+        killer_roster = LockRoster(rosterpath)
+        # Active kill_stale_locks here - does it kill the dead_id lock?
        assert killer_roster.get(SHARED) == {cant_know_if_dead_id}
        killer_roster.modify(SHARED, ADD)
        assert killer_roster.get(SHARED) == {our_id, cant_know_if_dead_id}

-        other_killer_roster = LockRoster(rosterpath, kill_stale_locks=True)
-        # Did not kill us, since we're alive
+        other_killer_roster = LockRoster(rosterpath)
+        # Active kill_stale_locks here - must not kill our_id lock since we're alive.
        assert other_killer_roster.get(SHARED) == {our_id, cant_know_if_dead_id}

    def test_migrate_lock(self, rosterpath):