redo stale lock handling, fixes #3986

drop BORG_HOSTNAME_IS_UNIQUE (please use BORG_HOST_ID if needed)

borg now always assumes it has a unique hostid - either automatically
from fqdn plus uuid.getnode() or overridden via BORG_HOST_ID.
This commit is contained in:
Thomas Waldmann 2019-03-03 15:51:40 +01:00
parent 92e11833db
commit 7ad5290501
9 changed files with 32 additions and 30 deletions

View file

@ -205,6 +205,13 @@ Compatibility notes:
- option "--list-format" (2017-10), use "--format"
- option "--ignore-inode" (2017-09), use "--files-cache" w/o "inode"
- option "--no-files-cache" (2017-09), use "--files-cache=disabled"
- removed BORG_HOSTNAME_IS_UNIQUE env var.
to use borg you must implement one of these 2 scenarios:
- 1) the combination of FQDN and result of uuid.getnode() must be unique
and stable (this should be the case for almost everybody, except when
having duplicate FQDN *and* MAC address or all-zero MAC address)
- 2) if you are aware that 1) is not the case for you, you must set
BORG_HOST_ID env var to something unique.
Fixes:

View file

@ -12,7 +12,7 @@ that it is also ``borg serve`` and enforce path restriction(s) as given by the
forced command. That way, other options given by the client (like ``--info`` or
``--umask``) are preserved (and are not fixed by the forced command).
Environment variables (such as BORG_HOSTNAME_IS_UNIQUE) contained in the original
Environment variables (such as BORG_XXX) contained in the original
command sent by the client are *not* interpreted, but ignored. If BORG_XXX environment
variables should be set on the ``borg serve`` side, then these must be set in system-specific
locations like ``/etc/environment`` or in the forced command itself (example below).

View file

@ -19,7 +19,7 @@ from .helpers import get_cache_dir, get_security_dir
from .helpers import int_to_bigint, bigint_to_int, bin_to_hex, parse_stringified_list
from .helpers import format_file_size
from .helpers import safe_ns
from .helpers import yes, hostname_is_unique
from .helpers import yes
from .helpers import remove_surrogates
from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage
from .helpers import set_ec, EXIT_WARNING
@ -256,8 +256,7 @@ class CacheConfig:
config.write(fd)
def open(self):
self.lock = Lock(os.path.join(self.path, 'lock'), exclusive=True, timeout=self.lock_wait,
kill_stale_locks=hostname_is_unique()).acquire()
self.lock = Lock(os.path.join(self.path, 'lock'), exclusive=True, timeout=self.lock_wait).acquire()
self.load()
def load(self):

View file

@ -102,7 +102,3 @@ def yes(msg=None, false_msg=None, true_msg=None, default_msg=None,
output(retry_msg, 'prompt_retry', is_prompt=True)
# in case we used an environment variable and it gave an invalid answer, do not use it again:
env_var_override = None
def hostname_is_unique():
return yes(env_var_override='BORG_HOSTNAME_IS_UNIQUE', prompt=False, env_msg=None, default=True)

View file

@ -101,13 +101,13 @@ class ExclusiveLock:
This makes sure the lock is released again if the block is left, no
matter how (e.g. if an exception occurred).
"""
def __init__(self, path, timeout=None, sleep=None, id=None, kill_stale_locks=False):
def __init__(self, path, timeout=None, sleep=None, id=None):
self.timeout = timeout
self.sleep = sleep
self.path = os.path.abspath(path)
self.id = id or platform.get_process_id()
self.unique_name = os.path.join(self.path, "%s.%d-%x" % self.id)
self.kill_stale_locks = kill_stale_locks
self.kill_stale_locks = True
self.stale_warning_printed = False
def __enter__(self):
@ -173,7 +173,7 @@ class ExclusiveLock:
if not self.kill_stale_locks:
if not self.stale_warning_printed:
# Log this at warning level to hint the user at the ability
logger.warning("Found stale lock %s, but not deleting because BORG_HOSTNAME_IS_UNIQUE is False.", name)
logger.warning("Found stale lock %s, but not deleting because self.kill_stale_locks = False.", name)
self.stale_warning_printed = True
return False
@ -223,10 +223,10 @@ class LockRoster:
Note: you usually should call the methods with an exclusive lock held,
to avoid conflicting access by multiple threads/processes/machines.
"""
def __init__(self, path, id=None, kill_stale_locks=False):
def __init__(self, path, id=None):
self.path = path
self.id = id or platform.get_process_id()
self.kill_stale_locks = kill_stale_locks
self.kill_stale_locks = True
def load(self):
try:
@ -320,18 +320,18 @@ class Lock:
This makes sure the lock is released again if the block is left, no
matter how (e.g. if an exception occurred).
"""
def __init__(self, path, exclusive=False, sleep=None, timeout=None, id=None, kill_stale_locks=False):
def __init__(self, path, exclusive=False, sleep=None, timeout=None, id=None):
self.path = path
self.is_exclusive = exclusive
self.sleep = sleep
self.timeout = timeout
self.id = id or platform.get_process_id()
# globally keeping track of shared and exclusive lockers:
self._roster = LockRoster(path + '.roster', id=id, kill_stale_locks=kill_stale_locks)
self._roster = LockRoster(path + '.roster', id=id)
# an exclusive lock, used for:
# - holding while doing roster queries / updates
# - holding while the Lock itself is exclusive
self._lock = ExclusiveLock(path + '.exclusive', id=id, timeout=timeout, kill_stale_locks=kill_stale_locks)
self._lock = ExclusiveLock(path + '.exclusive', id=id, timeout=timeout)
def __enter__(self):
return self.acquire()

View file

@ -22,7 +22,6 @@ from .helpers import Error, IntegrityError
from .helpers import bin_to_hex
from .helpers import get_base_dir
from .helpers import get_limited_unpacker
from .helpers import hostname_is_unique
from .helpers import replace_placeholders
from .helpers import sysinfo
from .helpers import format_file_size
@ -679,8 +678,6 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
if 'storage_quota' in args and args.storage_quota:
opts.append('--storage-quota=%s' % args.storage_quota)
env_vars = []
if not hostname_is_unique():
env_vars.append('BORG_HOSTNAME_IS_UNIQUE=no')
if testing:
return env_vars + [sys.executable, '-m', 'borg.archiver', 'serve'] + opts + self.extra_test_args
else: # pragma: no cover

View file

@ -17,7 +17,6 @@ from .helpers import Error, ErrorWithTraceback, IntegrityError, format_file_size
from .helpers import Location
from .helpers import ProgressIndicatorPercent
from .helpers import bin_to_hex
from .helpers import hostname_is_unique
from .helpers import secure_erase, truncate_and_unlink
from .helpers import Manifest
from .helpers import msgpack
@ -390,7 +389,7 @@ class Repository:
if not os.path.isdir(path):
raise self.DoesNotExist(path)
if lock:
self.lock = Lock(os.path.join(path, 'lock'), exclusive, timeout=lock_wait, kill_stale_locks=hostname_is_unique()).acquire()
self.lock = Lock(os.path.join(path, 'lock'), exclusive, timeout=lock_wait).acquire()
else:
self.lock = None
self.config = ConfigParser(interpolation=None)

View file

@ -3640,7 +3640,7 @@ def test_get_args():
# were not forced, environment variables would be interpreted by the shell, but this does not
# happen for forced commands - we get the verbatim command line and need to deal with env vars.
args = archiver.get_args(['borg', 'serve', ],
'BORG_HOSTNAME_IS_UNIQUE=yes borg serve --info')
'BORG_FOO=bar borg serve --info')
assert args.func == archiver.do_serve

View file

@ -67,7 +67,7 @@ class TestExclusiveLock:
cant_know_if_dead_id = 'foo.bar.example.net', 1, 2
dead_lock = ExclusiveLock(lockpath, id=dead_id).acquire()
with ExclusiveLock(lockpath, id=our_id, kill_stale_locks=True):
with ExclusiveLock(lockpath, id=our_id):
with pytest.raises(NotMyLock):
dead_lock.release()
with pytest.raises(NotLocked):
@ -75,7 +75,7 @@ class TestExclusiveLock:
with ExclusiveLock(lockpath, id=cant_know_if_dead_id):
with pytest.raises(LockTimeout):
ExclusiveLock(lockpath, id=our_id, kill_stale_locks=True, timeout=0.1).acquire()
ExclusiveLock(lockpath, id=our_id, timeout=0.1).acquire()
def test_migrate_lock(self, lockpath):
old_id, new_id = ID1, ID2
@ -157,7 +157,7 @@ class TestLock:
dead_lock = Lock(lockpath, id=dead_id, exclusive=True).acquire()
roster = dead_lock._roster
with Lock(lockpath, id=our_id, kill_stale_locks=True):
with Lock(lockpath, id=our_id):
assert roster.get(EXCLUSIVE) == set()
assert roster.get(SHARED) == {our_id}
assert roster.get(EXCLUSIVE) == set()
@ -167,7 +167,7 @@ class TestLock:
with Lock(lockpath, id=cant_know_if_dead_id, exclusive=True):
with pytest.raises(LockTimeout):
Lock(lockpath, id=our_id, kill_stale_locks=True, timeout=0.1).acquire()
Lock(lockpath, id=our_id, timeout=0.1).acquire()
def test_migrate_lock(self, lockpath):
old_id, new_id = ID1, ID2
@ -217,25 +217,29 @@ class TestLockRoster:
host, pid, tid = our_id = get_process_id()
dead_id = host, free_pid, tid
# put a dead local process lock into roster
roster1 = LockRoster(rosterpath, id=dead_id)
roster1.kill_stale_locks = False
assert roster1.get(SHARED) == set()
roster1.modify(SHARED, ADD)
assert roster1.get(SHARED) == {dead_id}
# put a unknown-state remote process lock into roster
cant_know_if_dead_id = 'foo.bar.example.net', 1, 2
roster1 = LockRoster(rosterpath, id=cant_know_if_dead_id)
roster1.kill_stale_locks = False
assert roster1.get(SHARED) == {dead_id}
roster1.modify(SHARED, ADD)
assert roster1.get(SHARED) == {dead_id, cant_know_if_dead_id}
killer_roster = LockRoster(rosterpath, kill_stale_locks=True)
# Did kill the dead processes lock (which was alive ... I guess?!)
killer_roster = LockRoster(rosterpath)
# Active kill_stale_locks here - does it kill the dead_id lock?
assert killer_roster.get(SHARED) == {cant_know_if_dead_id}
killer_roster.modify(SHARED, ADD)
assert killer_roster.get(SHARED) == {our_id, cant_know_if_dead_id}
other_killer_roster = LockRoster(rosterpath, kill_stale_locks=True)
# Did not kill us, since we're alive
other_killer_roster = LockRoster(rosterpath)
# Active kill_stale_locks here - must not kill our_id lock since we're alive.
assert other_killer_roster.get(SHARED) == {our_id, cant_know_if_dead_id}
def test_migrate_lock(self, rosterpath):