diff --git a/docs/misc/logging.conf b/docs/misc/logging.conf new file mode 100644 index 000000000..be8fb9cdc --- /dev/null +++ b/docs/misc/logging.conf @@ -0,0 +1,23 @@ +[loggers] +keys=root + +[handlers] +keys=logfile + +[formatters] +keys=logfile + +[logger_root] +level=NOTSET +handlers=logfile + +[handler_logfile] +class=FileHandler +level=INFO +formatter=logfile +args=('borg.log', 'w') + +[formatter_logfile] +format=%(asctime)s %(levelname)s %(message)s +datefmt= +class=logging.Formatter diff --git a/docs/usage_general.rst.inc b/docs/usage_general.rst.inc index d2d7d5c27..2d4c05333 100644 --- a/docs/usage_general.rst.inc +++ b/docs/usage_general.rst.inc @@ -202,6 +202,7 @@ General: use fqdn@uniqueid. BORG_LOGGING_CONF When set, use the given filename as INI_-style logging configuration. + A basic example conf can be found at ``docs/misc/logging.conf``. BORG_RSH When set, use this command instead of ``ssh``. This can be used to specify ssh options, such as a custom identity file ``ssh -i /path/to/private/key``. See ``man ssh`` for other options. Using diff --git a/src/borg/constants.py b/src/borg/constants.py index 5a67da54a..0de72b044 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -10,7 +10,9 @@ REQUIRED_ITEM_KEYS = frozenset(['path', 'mtime', ]) # this set must be kept complete, otherwise rebuild_manifest might malfunction: ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'username', 'time', 'time_end', 'comment', 'chunker_params', - 'recreate_cmdline', 'recreate_source_id', 'recreate_args']) + 'recreate_cmdline', + 'recreate_source_id', 'recreate_args', 'recreate_partial_chunks', # used in 1.1.0b1 .. b2 + ]) # this is the set of keys that are always present in archives: REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ]) @@ -52,6 +54,8 @@ LIST_SCAN_LIMIT = 100000 DEFAULT_SEGMENTS_PER_DIR = 1000 +FD_MAX_AGE = 4 * 60 # 4 minutes + CHUNK_MIN_EXP = 19 # 2**19 == 512kiB CHUNK_MAX_EXP = 23 # 2**23 == 8MiB HASH_WINDOW_SIZE = 0xfff # 4095B diff --git a/src/borg/helpers.py b/src/borg/helpers.py index ed6b3afeb..f7a02e2d9 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -141,7 +141,7 @@ def check_extension_modules(): raise ExtensionModuleError if platform.API_VERSION != platform.OS_API_VERSION or platform.API_VERSION != '1.1_04': raise ExtensionModuleError - if item.API_VERSION != '1.1_02': + if item.API_VERSION != '1.1_03': raise ExtensionModuleError diff --git a/src/borg/item.pyx b/src/borg/item.pyx index 1dc0b1f61..6b3cc70ce 100644 --- a/src/borg/item.pyx +++ b/src/borg/item.pyx @@ -3,12 +3,12 @@ import stat from collections import namedtuple -from .constants import ITEM_KEYS +from .constants import ITEM_KEYS, ARCHIVE_KEYS from .helpers import safe_encode, safe_decode from .helpers import bigint_to_int, int_to_bigint from .helpers import StableDict -API_VERSION = '1.1_02' +API_VERSION = '1.1_03' class PropDict: @@ -292,10 +292,7 @@ class ArchiveItem(PropDict): If a ArchiveItem shall be serialized, give as_dict() method output to msgpack packer. """ - VALID_KEYS = {'version', 'name', 'items', 'cmdline', 'hostname', 'username', 'time', 'time_end', - 'comment', 'chunker_params', - 'recreate_cmdline', 'recreate_source_id', 'recreate_args', 'recreate_partial_chunks', - } # str-typed keys + VALID_KEYS = ARCHIVE_KEYS # str-typed keys __slots__ = ("_dict", ) # avoid setting attributes not supported by properties @@ -309,8 +306,9 @@ class ArchiveItem(PropDict): time_end = PropDict._make_property('time_end', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) comment = PropDict._make_property('comment', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) chunker_params = PropDict._make_property('chunker_params', tuple) - recreate_source_id = PropDict._make_property('recreate_source_id', bytes) recreate_cmdline = PropDict._make_property('recreate_cmdline', list) # list of s-e-str + # recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2 + recreate_source_id = PropDict._make_property('recreate_source_id', bytes) recreate_args = PropDict._make_property('recreate_args', list) # list of s-e-str recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list) # list of tuples diff --git a/src/borg/lrucache.py b/src/borg/lrucache.py index 492e18b62..4f7f1f829 100644 --- a/src/borg/lrucache.py +++ b/src/borg/lrucache.py @@ -39,12 +39,17 @@ class LRUCache: self._lru.append(key) return value + def upd(self, key, value): + # special use only: update the value for an existing key without having to dispose it first + # this method complements __setitem__ which should be used for the normal use case. + assert key in self._cache, "Unexpected attempt to update a non-existing item." + self._cache[key] = value + def clear(self): for value in self._cache.values(): self._dispose(value) self._cache.clear() - # useful for testing def items(self): return self._cache.items() diff --git a/src/borg/repository.py b/src/borg/repository.py index afa6a3f91..ef70ccd69 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -3,6 +3,7 @@ import mmap import os import shutil import struct +import time from binascii import hexlify, unhexlify from collections import defaultdict from configparser import ConfigParser @@ -1164,20 +1165,21 @@ class LoggedIO: def __init__(self, path, limit, segments_per_dir, capacity=90): self.path = path - self.fds = LRUCache(capacity, - dispose=self.close_fd) + self.fds = LRUCache(capacity, dispose=self._close_fd) self.segment = 0 self.limit = limit self.segments_per_dir = segments_per_dir self.offset = 0 self._write_fd = None + self._fds_cleaned = 0 def close(self): self.close_segment() self.fds.clear() self.fds = None # Just to make sure we're disabled - def close_fd(self, fd): + def _close_fd(self, ts_fd): + ts, fd = ts_fd safe_fadvise(fd.fileno(), 0, 0, 'DONTNEED') fd.close() @@ -1291,13 +1293,37 @@ class LoggedIO: return self._write_fd def get_fd(self, segment): - try: - return self.fds[segment] - except KeyError: + # note: get_fd() returns a fd with undefined file pointer position, + # so callers must always seek() to desired position afterwards. + now = time.monotonic() + + def open_fd(): fd = open(self.segment_filename(segment), 'rb') - self.fds[segment] = fd + self.fds[segment] = (now, fd) return fd + def clean_old(): + # we regularly get rid of all old FDs here: + if now - self._fds_cleaned > FD_MAX_AGE // 8: + self._fds_cleaned = now + for k, ts_fd in list(self.fds.items()): + ts, fd = ts_fd + if now - ts > FD_MAX_AGE: + # we do not want to touch long-unused file handles to + # avoid ESTALE issues (e.g. on network filesystems). + del self.fds[k] + + clean_old() + try: + ts, fd = self.fds[segment] + except KeyError: + fd = open_fd() + else: + # we only have fresh enough stuff here. + # update the timestamp of the lru cache entry. + self.fds.upd(segment, (now, fd)) + return fd + def close_segment(self): # set self._write_fd to None early to guard against reentry from error handling code paths: fd, self._write_fd = self._write_fd, None