From 3e6a9d677fc5826002e260d2c003d11409e1caee Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 10 Aug 2025 15:51:29 +0200 Subject: [PATCH 01/16] code: fix typos and grammar --- src/borg/__init__.py | 2 +- src/borg/__main__.py | 10 ++-- src/borg/algorithms/__init__.py | 2 +- src/borg/archive.py | 22 +++---- src/borg/archiver.py | 14 ++--- src/borg/cache.py | 12 ++-- src/borg/constants.py | 42 +++++++------- src/borg/crypto/file_integrity.py | 20 +++---- src/borg/crypto/key.py | 27 +++++---- src/borg/crypto/keymanager.py | 2 +- src/borg/crypto/nonces.py | 20 +++---- src/borg/fuse.py | 4 +- src/borg/fuse_impl.py | 2 +- src/borg/helpers/__init__.py | 37 ++++++------ src/borg/helpers/datastruct.py | 38 ++++++------- src/borg/helpers/errors.py | 10 ++-- src/borg/helpers/fs.py | 29 +++++----- src/borg/helpers/manifest.py | 16 +++--- src/borg/helpers/misc.py | 28 ++++----- src/borg/helpers/msgpack.py | 4 +- src/borg/helpers/parseformat.py | 10 ++-- src/borg/helpers/process.py | 4 +- src/borg/helpers/progress.py | 12 ++-- src/borg/helpers/time.py | 30 +++++----- src/borg/helpers/yes.py | 56 +++++++++--------- src/borg/locking.py | 34 +++++------ src/borg/logger.py | 58 +++++++++---------- src/borg/lrucache.py | 4 +- src/borg/nanorst.py | 10 ++-- src/borg/patterns.py | 20 +++---- src/borg/platform/__init__.py | 10 ++-- src/borg/platform/base.py | 42 +++++++------- src/borg/platform/xattr.py | 18 +++--- src/borg/platformflags.py | 4 +- src/borg/remote.py | 14 ++--- src/borg/repository.py | 16 +++--- src/borg/selftest.py | 8 +-- src/borg/shellpattern.py | 4 +- src/borg/upgrader.py | 95 ++++++++++++++++--------------- src/borg/version.py | 4 +- src/borg/xattr.py | 27 +++++---- 41 files changed, 409 insertions(+), 412 deletions(-) diff --git a/src/borg/__init__.py b/src/borg/__init__.py index 9f13c7d64..986485b2e 100644 --- a/src/borg/__init__.py +++ b/src/borg/__init__.py @@ -1,6 +1,6 @@ from packaging.version import parse as parse_version -# IMPORTANT keep imports from borg here to a minimum because our testsuite depends on +# IMPORTANT keep imports from borg here to a minimum because our test suite depends on # being able to import borg.constants and then monkey patching borg.constants.PBKDF2_ITERATIONS from ._version import version as __version__ diff --git a/src/borg/__main__.py b/src/borg/__main__.py index 73a2187de..cb76b7bf9 100644 --- a/src/borg/__main__.py +++ b/src/borg/__main__.py @@ -1,12 +1,12 @@ import sys import os -# On windows loading the bundled libcrypto dll fails if the folder -# containing the dll is not in the search path. The dll is shipped -# with python in the "DLLs" folder, so let's add this folder -# to the path. The folder is always in sys.path, get it from there. +# On Windows, loading the bundled libcrypto DLL fails if the folder +# containing the DLL is not in the search path. The DLL is shipped +# with Python in the "DLLs" folder, so let's add this folder +# to the PATH. The folder is always in sys.path; get it from there. if sys.platform.startswith('win32'): - # Keep it an iterable to support multiple folder which contain "DLLs". + # Keep it as an iterable to support multiple folders that contain "DLLs". dll_path = (p for p in sys.path if 'DLLs' in os.path.normpath(p).split(os.path.sep)) os.environ['PATH'] = os.pathsep.join(dll_path) + os.pathsep + os.environ['PATH'] diff --git a/src/borg/algorithms/__init__.py b/src/borg/algorithms/__init__.py index 32a0cfdbf..1cf662e3e 100644 --- a/src/borg/algorithms/__init__.py +++ b/src/borg/algorithms/__init__.py @@ -8,6 +8,6 @@ Ideally these would be sourced from existing libraries, but: - are frequently not available yet (lz4, zstd), -- are available but in poor form (crc32), or +- are available but in poor form (CRC32), or - don't really make sense as a library (xxHash). """ diff --git a/src/borg/archive.py b/src/borg/archive.py index 0ec5d7b9e..ad17dc9be 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -219,20 +219,20 @@ def backup_io_iter(iterator): def stat_update_check(st_old, st_curr): """ - this checks for some race conditions between the first filename-based stat() - we did before dispatching to the (hopefully correct) file type backup handler - and the (hopefully) fd-based fstat() we did in the handler. + This checks for race conditions between the first filename-based stat() + performed before dispatching to the (hopefully correct) file type backup handler + and the (hopefully) fd-based fstat() performed in the handler. - if there is a problematic difference (e.g. file type changed), we rather - skip the file than being tricked into a security problem. + If there is a problematic difference (e.g., the file type changed), we would rather + skip the file than risk a security problem. - such races should only happen if: - - we are backing up a live filesystem (no snapshot, not inactive) - - if files change due to normal fs activity at an unfortunate time - - if somebody is doing an attack against us + Such races should only happen if: + - We are backing up a live filesystem (no snapshot, not inactive). + - Files change due to normal fs activity at an unfortunate time. + - Somebody is performing an attack against us. """ - # assuming that a file type change implicates a different inode change AND that inode numbers - # are not duplicate in a short timeframe, this check is redundant and solved by the ino check: + # assuming that a file type change implies a different inode change AND that inode numbers + # are not duplicated in a short timeframe, this check is redundant and solved by the ino check: if stat.S_IFMT(st_old.st_mode) != stat.S_IFMT(st_curr.st_mode): # in this case, we dispatched to wrong handler - abort raise BackupRaceConditionError('file type changed (race condition), skipping file') diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 498d87b51..0720af034 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -1,9 +1,9 @@ -# borg cli interface / toplevel archiver code +# Borg CLI interface / top-level archiver code import sys import traceback -# quickfix to disallow running borg with assertions switched off +# Quick fix to disallow running Borg with assertions switched off try: assert False except AssertionError: @@ -98,9 +98,9 @@ try: from .selftest import selftest from .upgrader import AtticRepositoryUpgrader, BorgRepositoryUpgrader except BaseException: - # an unhandled exception in the try-block would cause the borg cli command to exit with rc 1 due to python's - # default behavior, see issue #4424. - # as borg defines rc 1 as WARNING, this would be a mismatch, because a crash should be an ERROR (rc 2). + # An unhandled exception in the try-block would cause the Borg CLI command to exit with rc 1 due to Python's + # default behavior; see issue #4424. + # As Borg defines rc 1 as WARNING, this would be a mismatch, because a crash should be an ERROR (rc 2). traceback.print_exc() sys.exit(2) # == EXIT_ERROR @@ -160,7 +160,7 @@ def with_repository(fake=False, invert_fake=False, create=False, lock=True, # `nonlocal` statement to access `lock` as modifications would also # affect the scope outside of `wrapper`. Subsequent calls would # only see the overwritten value of `lock`, not the original one. - # The solution is to define a place holder variable `_lock` to + # The solution is to define a placeholder variable `_lock` to # propagate the value into `wrapper`. _lock = lock @@ -5316,7 +5316,7 @@ class Archiver: selftest(logger) def _setup_implied_logging(self, args): - """ turn on INFO level logging for args that imply that they will produce output """ + """Turn on INFO level logging for args that imply that they will produce output.""" # map of option name to name of logger for that option option_logger = { 'output_list': 'borg.output.list', diff --git a/src/borg/cache.py b/src/borg/cache.py index 57830f5c6..54874ecc8 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -35,7 +35,7 @@ from .platform import SaveFile from .remote import cache_if_remote from .repository import LIST_SCAN_LIMIT -# note: cmtime might me either a ctime or a mtime timestamp +# note: cmtime might be either a ctime or an mtime timestamp FileCacheEntry = namedtuple('FileCacheEntry', 'age inode size cmtime chunk_ids') @@ -69,7 +69,7 @@ class SecurityManager: @staticmethod def destroy(repository, path=None): - """destroy the security dir for ``repository`` or at ``path``""" + """Destroy the security dir for ``repository`` or at ``path``.""" path = path or get_security_dir(repository.id_str) if os.path.exists(path): shutil.rmtree(path) @@ -338,7 +338,7 @@ class CacheConfig: class Cache: - """Client Side cache + """Client-side cache. """ class CacheInitAbortedError(Error): """Cache initialization aborted""" @@ -367,7 +367,7 @@ class Cache: @staticmethod def destroy(repository, path=None): - """destroy the cache for ``repository`` or at ``path``""" + """Destroy the cache for ``repository`` or at ``path``.""" path = path or os.path.join(get_cache_dir(), repository.id_str) config = os.path.join(path, 'config') if os.path.exists(config): @@ -416,7 +416,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" def __init__(self, iec=False): self.iec = iec - self.pre12_meta = {} # here we cache archive metadata for borg < 1.2 + self.pre12_meta = {} # Here we cache archive metadata for Borg < 1.2. def __str__(self): return self.str_format.format(self.format_tuple()) @@ -892,7 +892,7 @@ class LocalCache(CacheStatsMixin): return chunk_idx def legacy_cleanup(): - """bring old cache dirs into the desired state (cleanup and adapt)""" + """Bring old cache dirs into the desired state (cleanup and adapt).""" try: os.unlink(os.path.join(self.path, 'chunks.archive')) except: diff --git a/src/borg/constants.py b/src/borg/constants.py index 322cc5579..f5fe38630 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -1,27 +1,27 @@ -# this set must be kept complete, otherwise the RobustUnpacker might malfunction: +# This set must be kept complete; otherwise the RobustUnpacker might malfunction: ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master', 'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', 'birthtime', 'size', 'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended', 'part']) -# this is the set of keys that are always present in items: +# This is the set of keys that are always present in items: REQUIRED_ITEM_KEYS = frozenset(['path', 'mtime', ]) -# this set must be kept complete, otherwise rebuild_manifest might malfunction: +# This set must be kept complete; otherwise rebuild_manifest might malfunction: ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'username', 'time', 'time_end', 'comment', 'chunker_params', 'recreate_cmdline', 'recreate_source_id', 'recreate_args', 'recreate_partial_chunks', # used in 1.1.0b1 .. b2 'size', 'csize', 'nfiles', 'size_parts', 'csize_parts', 'nfiles_parts', ]) -# this is the set of keys that are always present in archives: +# This is the set of keys that are always present in archives: REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ]) -# default umask, overridden by --umask, defaults to read/write only for owner +# Default umask, overridden by --umask; defaults to read/write only for owner UMASK_DEFAULT = 0o077 -# default file mode to store stdin data, defaults to read/write for owner and group -# forcing to 0o100XXX later +# Default file mode to store stdin data; defaults to read/write for owner and group. +# Forcing to 0o100XXX later. STDIN_MODE_DEFAULT = 0o660 CACHE_TAG_NAME = 'CACHEDIR.TAG' @@ -34,7 +34,7 @@ CACHE_TAG_CONTENTS = b'Signature: 8a477f597d28d172789f06886806bc55' DEFAULT_MAX_SEGMENT_SIZE = 500 * 1024 * 1024 # 20 MiB minus 41 bytes for a Repository header (because the "size" field in the Repository includes -# the header, and the total size was set to 20 MiB). +# the header, and the total size is set to 20 MiB). MAX_DATA_SIZE = 20971479 # MAX_OBJECT_SIZE = <20 MiB (MAX_DATA_SIZE) + 41 bytes for a Repository PUT header, which consists of @@ -52,12 +52,12 @@ zeros = bytes(MAX_DATA_SIZE) # borg.remote read() buffer size BUFSIZE = 10 * 1024 * 1024 -# to use a safe, limited unpacker, we need to set a upper limit to the archive count in the manifest. -# this does not mean that you can always really reach that number, because it also needs to be less than -# MAX_DATA_SIZE or it will trigger the check for that. +# To use a safe, limited unpacker, we need to set an upper limit for the archive count in the manifest. +# This does not mean that you can always reach that number, because it also needs to be less than +# MAX_DATA_SIZE, otherwise it will trigger the check for that. MAX_ARCHIVES = 400000 -# repo.list() / .scan() result count limit the borg client uses +# repo.list()/.scan() result count limit used by the Borg client LIST_SCAN_LIMIT = 100000 DEFAULT_SEGMENTS_PER_DIR = 1000 @@ -70,10 +70,10 @@ MAX_SEGMENT_DIR_INDEX = 2**32 - 1 FD_MAX_AGE = 4 * 60 # 4 minutes -CHUNK_MIN_EXP = 19 # 2**19 == 512kiB -CHUNK_MAX_EXP = 23 # 2**23 == 8MiB -HASH_WINDOW_SIZE = 0xfff # 4095B -HASH_MASK_BITS = 21 # results in ~2MiB chunks statistically +CHUNK_MIN_EXP = 19 # 2**19 == 512 KiB +CHUNK_MAX_EXP = 23 # 2**23 == 8 MiB +HASH_WINDOW_SIZE = 0xfff # 4095 B +HASH_MASK_BITS = 21 # Results in ~2 MiB chunks statistically # chunker algorithms CH_BUZHASH = 'buzhash' @@ -88,9 +88,9 @@ ITEMS_CHUNKER_PARAMS = (CH_BUZHASH, 15, 19, 17, HASH_WINDOW_SIZE) # normal on-disk data, allocated (but not written, all zeros), not allocated hole (all zeros) CH_DATA, CH_ALLOC, CH_HOLE = 0, 1, 2 -# operating mode of the files cache (for fast skipping of unchanged files) +# Operating mode of the files cache (for fast skipping of unchanged files) FILES_CACHE_MODE_UI_DEFAULT = 'ctime,size,inode' # default for "borg create" command (CLI UI) -FILES_CACHE_MODE_DISABLED = 'd' # most borg commands do not use the files cache at all (disable) +FILES_CACHE_MODE_DISABLED = 'd' # Most Borg commands do not use the files cache at all (disable). # return codes returned by borg command EXIT_SUCCESS = 0 # everything done, no problems @@ -100,9 +100,9 @@ EXIT_ERROR_BASE = 3 # specific error codes are 3..99 (enabled by BORG_EXIT_CODE EXIT_WARNING_BASE = 100 # specific warning codes are 100..127 (enabled by BORG_EXIT_CODES=modern) EXIT_SIGNAL_BASE = 128 # terminated due to signal, rc = 128 + sig_no -# never use datetime.isoformat(), it is evil. always use one of these: -# datetime.strftime(ISO_FORMAT) # output always includes .microseconds -# datetime.strftime(ISO_FORMAT_NO_USECS) # output never includes microseconds +# Never use datetime.isoformat(); it is problematic. Always use one of these: +# datetime.strftime(ISO_FORMAT) # Output always includes .microseconds +# datetime.strftime(ISO_FORMAT_NO_USECS) # Output never includes microseconds ISO_FORMAT_NO_USECS = '%Y-%m-%dT%H:%M:%S' ISO_FORMAT = ISO_FORMAT_NO_USECS + '.%f' diff --git a/src/borg/crypto/file_integrity.py b/src/borg/crypto/file_integrity.py index 6ad2272ce..2cb81247b 100644 --- a/src/borg/crypto/file_integrity.py +++ b/src/borg/crypto/file_integrity.py @@ -43,14 +43,14 @@ class FileHashingWrapper(FileLikeWrapper): Wrapper for file-like objects that computes a hash on-the-fly while reading/writing. WARNING: Seeks should only be used to query the size of the file, not - to skip data, because skipped data isn't read and not hashed into the digest. + to skip data, because skipped data is not read and therefore not hashed into the digest. - Similarly skipping while writing to create sparse files is also not supported. + Similarly, skipping while writing to create sparse files is also not supported. - Data has to be read/written in a symmetric fashion, otherwise different + Data has to be read/written in a symmetric fashion; otherwise different digests will be generated. - Note: When used as a context manager read/write operations outside the enclosed scope + Note: When used as a context manager, read/write operations outside the enclosed scope are illegal. """ @@ -85,9 +85,9 @@ class FileHashingWrapper(FileLikeWrapper): def hexdigest(self): """ - Return current digest bytes as hex-string. + Return the current digest as a hex string. - Note: this can be called multiple times. + Note: This can be called multiple times. """ return self.hash.hexdigest() @@ -96,7 +96,7 @@ class FileHashingWrapper(FileLikeWrapper): def hash_length(self, seek_to_end=False): if seek_to_end: - # Add length of file to the hash to avoid problems if only a prefix is read. + # Add the length of the file to the hash to avoid problems if only a prefix is read. self.seek(0, io.SEEK_END) self.hash.update(str(self.tell()).encode()) @@ -150,10 +150,10 @@ class IntegrityCheckedFile(FileLikeWrapper): return self.parse_integrity_data(path, integrity_data) def hash_filename(self, filename=None): - # Hash the name of the file, but only the basename, ie. not the path. - # In Borg the name itself encodes the context (eg. index.N, cache, files), + # Hash the name of the file, but only the basename, i.e. not the path. + # In Borg the name itself encodes the context (e.g., index.N, cache, files), # while the path doesn't matter, and moving e.g. a repository or cache directory is supported. - # Changing the name however imbues a change of context that is not permissible. + # Changing the name, however, implies a change of context that is not permissible. # While Borg does not use anything except ASCII in these file names, it's important to use # the same encoding everywhere for portability. Using os.fsencode() would be wrong. filename = os.path.basename(filename or self.path) diff --git a/src/borg/crypto/key.py b/src/borg/crypto/key.py index 25cff0a8c..7a59a71d6 100644 --- a/src/borg/crypto/key.py +++ b/src/borg/crypto/key.py @@ -37,22 +37,22 @@ AUTHENTICATED_NO_KEY = 'authenticated_no_key' in helpers.workarounds class NoPassphraseFailure(Error): - """can not acquire a passphrase: {}""" + """Cannot acquire a passphrase: {}""" exit_mcode = 50 class PasscommandFailure(Error): - """passcommand supplied in BORG_PASSCOMMAND failed: {}""" + """Passcommand supplied in BORG_PASSCOMMAND failed: {}""" exit_mcode = 51 class PassphraseWrong(Error): - """passphrase supplied in BORG_PASSPHRASE, by BORG_PASSCOMMAND or via BORG_PASSPHRASE_FD is incorrect.""" + """Passphrase supplied via BORG_PASSPHRASE, by BORG_PASSCOMMAND, or via BORG_PASSPHRASE_FD is incorrect.""" exit_mcode = 52 class PasswordRetriesExceeded(Error): - """exceeded the maximum password retries""" + """Exceeded the maximum password retries.""" exit_mcode = 53 @@ -193,7 +193,7 @@ class KeyBase: chunk_seed = None # Whether this *particular instance* is encrypted from a practical point of view, - # i.e. when it's using encryption with a empty passphrase, then + # i.e. when it's using encryption with an empty passphrase, then # that may be *technically* called encryption, but for all intents and purposes # that's as good as not encrypting in the first place, and this member should be False. # @@ -212,8 +212,7 @@ class KeyBase: self.tam_required = True def id_hash(self, data): - """Return HMAC hash using the "id" HMAC key - """ + """Return HMAC using the "id" HMAC key.""" def encrypt(self, chunk): pass @@ -297,7 +296,7 @@ class KeyBase: """Unpack msgpacked *data* and return (object, did_verify, salt).""" tam_required = self.tam_required if force_tam_not_required and tam_required: - # for a long time, borg only checked manifest for "tam_required" and + # For a long time, Borg only checked the manifest for "tam_required" and # people might have archives without TAM, so don't be too annoyingly loud here: logger.debug('Archive authentication DISABLED.') tam_required = False @@ -394,7 +393,7 @@ def random_blake2b_256_key(): # and len(block) >= len(state), hence wide.) # In other words, a key longer than 64 bytes would have simply no advantage, since the function # has no way of propagating more than 64 bytes of entropy internally. - # It's padded to a full block so that the key is never buffered internally by blake2b_update, ie. + # It's padded to a full block so that the key is never buffered internally by blake2b_update, i.e. # it remains in a single memory location that can be tracked and could be erased securely, if we # wanted to. return os.urandom(64) + bytes(64) @@ -430,14 +429,14 @@ class ID_HMAC_SHA_256: class AESKeyBase(KeyBase): """ - Common base class shared by KeyfileKey and PassphraseKey + Common base class shared by KeyfileKey and PassphraseKey. - Chunks are encrypted using 256bit AES in Counter Mode (CTR) + Chunks are encrypted using 256-bit AES in Counter Mode (CTR). Payload layout: TYPE(1) + HMAC(32) + NONCE(8) + CIPHERTEXT - To reduce payload size only 8 bytes of the 16 bytes nonce is saved - in the payload, the first 8 bytes are always zeros. This does not + To reduce payload size, only 8 bytes of the 16-byte nonce are saved + in the payload; the first 8 bytes are always zeros. This does not affect security but limits the maximum repository capacity to only 295 exabytes! """ @@ -520,7 +519,7 @@ class Passphrase(str): def env_passcommand(cls, default=None): passcommand = os.environ.get('BORG_PASSCOMMAND', None) if passcommand is not None: - # passcommand is a system command (not inside pyinstaller env) + # The passcommand is a system command (not inside the PyInstaller environment) env = prepare_subprocess_env(system=True) try: passphrase = subprocess.check_output(shlex.split(passcommand), text=True, env=env) diff --git a/src/borg/crypto/keymanager.py b/src/borg/crypto/keymanager.py index 2865f306d..f35801362 100644 --- a/src/borg/crypto/keymanager.py +++ b/src/borg/crypto/keymanager.py @@ -10,7 +10,7 @@ from .key import KeyfileKey, KeyfileNotFoundError, RepoKeyNotFoundError, KeyBlob class NotABorgKeyFile(Error): - """This file is not a borg key backup, aborting.""" + """This file is not a Borg key backup, aborting.""" exit_mcode = 43 diff --git a/src/borg/crypto/nonces.py b/src/borg/crypto/nonces.py index ec14669a4..ea80cd278 100644 --- a/src/borg/crypto/nonces.py +++ b/src/borg/crypto/nonces.py @@ -10,7 +10,7 @@ from ..remote import InvalidRPCMethod from .low_level import bytes_to_long, long_to_bytes MAX_REPRESENTABLE_NONCE = 2**64 - 1 -NONCE_SPACE_RESERVATION = 2**28 # This in units of AES blocksize (16 bytes) +NONCE_SPACE_RESERVATION = 2**28 # This is in units of AES block size (16 bytes) class NonceManager: @@ -54,12 +54,12 @@ class NonceManager: def ensure_reservation(self, nonce, nonce_space_needed): """ - Call this before doing encryption, give current, yet unused, integer IV as + Call this before doing encryption; give the current, yet unused, integer IV as and the amount of subsequent (counter-like) IVs needed as . - Return value is the IV (counter) integer you shall use for encryption. + The return value is the IV (counter) integer you should use for encryption. - Note: this method may return the you gave, if a reservation for it exists or - can be established, so make sure you give a unused nonce. + Note: This method may return the you gave if a reservation for it exists or + can be established, so make sure you give an unused nonce. """ # Nonces may never repeat, even if a transaction aborts or the system crashes. # Therefore a part of the nonce space is reserved before any nonce is used for encryption. @@ -67,16 +67,16 @@ class NonceManager: # against nonce reuse in crashes and transaction aborts. In that case the reservation still # persists and the whole reserved space is never reused. # - # Local storage on the client is used to protect against an attacker that is able to rollback the + # Local storage on the client is used to protect against an attacker that is able to roll back the # state of the server or can do arbitrary modifications to the repository. - # Storage on the server is used for the multi client use case where a transaction on client A is + # Storage on the server is used for the multi-client use case where a transaction on client A is # aborted and later client B writes to the repository. # - # This scheme does not protect against attacker who is able to rollback the state of the server - # or can do arbitrary modifications to the repository in the multi client usecase. + # This scheme does not protect against an attacker who is able to roll back the state of the server + # or can do arbitrary modifications to the repository in the multi-client use case. if self.end_of_nonce_reservation: - # we already got a reservation, if nonce_space_needed still fits everything is ok + # We already have a reservation; if nonce_space_needed still fits, everything is okay. next_nonce = nonce assert next_nonce <= self.end_of_nonce_reservation if next_nonce + nonce_space_needed <= self.end_of_nonce_reservation: diff --git a/src/borg/fuse.py b/src/borg/fuse.py index d124d2544..d03784f70 100644 --- a/src/borg/fuse.py +++ b/src/borg/fuse.py @@ -59,7 +59,7 @@ def fuse_main(): # size of some LRUCaches (1 element per simultaneously open file) # note: _inode_cache might have rather large elements - Item.chunks can be large! # also, simultaneously reading too many files should be avoided anyway. -# thus, do not set FILES to high values. +# thus, do not set FILES to very high values. FILES = 4 @@ -524,7 +524,7 @@ class FuseOperations(llfuse.Operations, FuseBackend): if mount_options: options.extend(mount_options.split(',')) if is_darwin: - # macFUSE supports a volname mount option to give what finder displays on desktop / in directory list. + # macFUSE supports a volname mount option to give what Finder displays on the desktop / in directory list. volname = pop_option(options, 'volname', '', '', str) # if the user did not specify it, we make something up, # because otherwise it would be "macFUSE Volume 0 (Python)", #7690. diff --git a/src/borg/fuse_impl.py b/src/borg/fuse_impl.py index 390ac576f..6c7129b4b 100644 --- a/src/borg/fuse_impl.py +++ b/src/borg/fuse_impl.py @@ -1,5 +1,5 @@ """ -load library for lowlevel FUSE implementation +Load library for low-level FUSE implementation. """ import os diff --git a/src/borg/helpers/__init__.py b/src/borg/helpers/__init__.py index f0dca474e..3a55d598f 100644 --- a/src/borg/helpers/__init__.py +++ b/src/borg/helpers/__init__.py @@ -1,9 +1,8 @@ """ -This package contains all sorts of small helper / utility functionality, -that did not fit better elsewhere. +This package contains various small helper/utility functions that did not fit better elsewhere. -Code used to be in borg/helpers.py but was split into the modules in this -package, which are imported into here for compatibility. +Code used to be in borg/helpers.py but was split into modules in this +package, which are imported here for compatibility. """ from contextlib import contextmanager @@ -22,9 +21,9 @@ from .yes import * # NOQA from .msgpack import is_slow_msgpack, is_supported_msgpack, int_to_bigint, bigint_to_int, get_limited_unpacker from . import msgpack -# generic mechanism to enable users to invoke workarounds by setting the +# Generic mechanism to enable users to invoke workarounds by setting the # BORG_WORKAROUNDS environment variable to a list of comma-separated strings. -# see the docs for a list of known workaround strings. +# See the docs for a list of known workaround strings. workarounds = tuple(os.environ.get('BORG_WORKAROUNDS', '').split(',')) @@ -33,7 +32,7 @@ def ignore_invalid_archive_tam(): global workarounds saved = workarounds if 'ignore_invalid_archive_tam' not in workarounds: - # we really need this workaround here or borg will likely raise an exception. + # We really need this workaround here, or Borg will likely raise an exception. workarounds += ('ignore_invalid_archive_tam',) yield workarounds = saved @@ -43,7 +42,7 @@ def ignore_invalid_archive_tam(): warning_info = namedtuple("warning_info", "wc,msg,args,wt") """ -The global warnings_list variable is used to collect warning_info elements while borg is running. +The global warnings_list variable is used to collect warning_info elements while Borg is running. """ _warnings_list = [] @@ -80,8 +79,8 @@ def classify_ec(ec): def max_ec(ec1, ec2): - """return the more severe error code of ec1 and ec2""" - # note: usually, there can be only 1 error-class ec, the other ec is then either success or warning. + """Return the more severe error code of ec1 and ec2.""" + # Note: Usually, there can be only one error-class ec; the other ec is then either success or warning. ec1_class = classify_ec(ec1) ec2_class = classify_ec(ec2) if ec1_class == "signal": @@ -102,7 +101,7 @@ def max_ec(ec1, ec2): def set_ec(ec): """ - Sets the exit code of the program to ec IF ec is more severe than the current exit code. + Set the exit code of the program to ec IF ec is more severe than the current exit code. """ global _exit_code _exit_code = max_ec(_exit_code, ec) @@ -110,7 +109,7 @@ def set_ec(ec): def init_ec_warnings(ec=EXIT_SUCCESS, warnings=None): """ - (Re-)Init the globals for the exit code and the warnings list. + (Re-)Initialize the globals for the exit code and the warnings list. """ global _exit_code, _warnings_list _exit_code = ec @@ -121,7 +120,7 @@ def init_ec_warnings(ec=EXIT_SUCCESS, warnings=None): def get_ec(ec=None): """ - compute the final return code of the borg process + Compute the final return code of the Borg process. """ if ec is not None: set_ec(ec) @@ -129,21 +128,21 @@ def get_ec(ec=None): global _exit_code exit_code_class = classify_ec(_exit_code) if exit_code_class in ("signal", "error", "warning"): - # there was a signal/error/warning, return its exit code + # There was a signal/error/warning; return its exit code. return _exit_code assert exit_code_class == "success" global _warnings_list if not _warnings_list: - # we do not have any warnings in warnings list, return success exit code + # We do not have any warnings in the warnings list; return the success exit code. return _exit_code - # looks like we have some warning(s) + # There are some warning(s). rcs = sorted(set(w_info.wc for w_info in _warnings_list)) logger.debug(f"rcs: {rcs!r}") if len(rcs) == 1: - # easy: there was only one kind of warning, so we can be specific + # Easy: there was only one kind of warning, so we can be specific. return rcs[0] - # there were different kinds of warnings - return EXIT_WARNING # generic warning rc, user has to look into the logs + # There were different kinds of warnings. + return EXIT_WARNING # generic warning rc; user has to look into the logs def get_reset_ec(ec=None): diff --git a/src/borg/helpers/datastruct.py b/src/borg/helpers/datastruct.py index 31192a8c7..ea0cfde01 100644 --- a/src/borg/helpers/datastruct.py +++ b/src/borg/helpers/datastruct.py @@ -2,7 +2,7 @@ from .errors import Error class StableDict(dict): - """A dict subclass with stable items() ordering""" + """A dict subclass with stable items() ordering.""" def items(self): return sorted(super().items()) @@ -17,8 +17,8 @@ class Buffer: def __init__(self, allocator, size=4096, limit=None): """ - Initialize the buffer: use allocator(size) call to allocate a buffer. - Optionally, set the upper for the buffer size. + Initialize the buffer: use allocator(size) to allocate a buffer. + Optionally set the upper limit for the buffer size via limit. """ assert callable(allocator), 'must give alloc(size) function as first param' assert limit is None or size <= limit, 'initial size must be <= limit' @@ -31,9 +31,9 @@ class Buffer: def resize(self, size, init=False): """ - resize the buffer - to avoid frequent reallocation, we usually always grow (if needed). - giving init=True it is possible to first-time initialize or shrink the buffer. - if a buffer size beyond the limit is requested, raise Buffer.MemoryLimitExceeded (OSError). + Resize the buffer. To avoid frequent reallocation, we usually grow (if needed). + By giving init=True it is possible to first-time initialize or shrink the buffer. + If a buffer size beyond the limit is requested, raise Buffer.MemoryLimitExceeded (OSError). """ size = int(size) if self.limit is not None and size > self.limit: @@ -43,7 +43,7 @@ class Buffer: def get(self, size=None, init=False): """ - return a buffer of at least the requested size (None: any current size). + Return a buffer of at least the requested size (None: any current size). init=True can be given to trigger shrinking of the buffer to the given size. """ if size is not None: @@ -57,13 +57,13 @@ class EfficientCollectionQueue: """ class SizeUnderflow(Error): - """Could not pop_front first {} elements, collection only has {} elements..""" + """Could not pop_front the first {} elements; collection only has {} elements.""" def __init__(self, split_size, member_type): """ - Initializes empty queue. - Requires split_size to define maximum chunk size. - Requires member_type to be type defining what base collection looks like. + Initialize an empty queue. + Requires split_size to define the maximum chunk size. + Requires member_type to be the type defining what the base collection looks like. """ self.buffers = [] self.size = 0 @@ -72,9 +72,9 @@ class EfficientCollectionQueue: def peek_front(self): """ - Returns first chunk from queue without removing it. - Returned collection will have between 1 and split_size length. - Returns empty collection when nothing is queued. + Return the first chunk from the queue without removing it. + The returned collection will have between 1 and split_size elements. + Returns an empty collection when nothing is queued. """ if not self.buffers: return self.member_type() @@ -83,8 +83,8 @@ class EfficientCollectionQueue: def pop_front(self, size): """ - Removes first size elements from queue. - Throws if requested removal size is larger than whole queue. + Remove the first size elements from the queue. + Raises if the requested removal size is larger than the whole queue. """ if size > self.size: raise EfficientCollectionQueue.SizeUnderflow(size, self.size) @@ -101,8 +101,8 @@ class EfficientCollectionQueue: def push_back(self, data): """ - Adds data at end of queue. - Takes care to chunk data into split_size sized elements. + Add data at the end of the queue. + Takes care to chunk data into split_size-sized elements. """ if not self.buffers: self.buffers = [self.member_type()] @@ -126,6 +126,6 @@ class EfficientCollectionQueue: def __bool__(self): """ - Returns true if queue isn't empty. + Return True if the queue is not empty. """ return self.size != 0 diff --git a/src/borg/helpers/errors.py b/src/borg/helpers/errors.py index 168264425..d00159fba 100644 --- a/src/borg/helpers/errors.py +++ b/src/borg/helpers/errors.py @@ -12,8 +12,8 @@ class ErrorBase(Exception): """ErrorBase: {}""" # Error base class - # if we raise such an Error and it is only caught by the uppermost - # exception handler (that exits short after with the given exit_code), + # If we raise such an Error and it is only caught by the uppermost + # exception handler (that exits shortly after with the given exit_code), # it is always a (fatal and abrupt) error, never just a warning. exit_mcode = EXIT_ERROR # modern, more specific exit code (defaults to EXIT_ERROR) @@ -31,7 +31,7 @@ class ErrorBase(Exception): @property def exit_code(self): - # legacy: borg used to always use rc 2 (EXIT_ERROR) for all errors. + # legacy: Borg used to always use rc 2 (EXIT_ERROR) for all errors. # modern: users can opt in to more specific return codes, using BORG_EXIT_CODES: return self.exit_mcode if modern_ec else EXIT_ERROR @@ -88,7 +88,7 @@ class BorgWarning: @property def exit_code(self): - # legacy: borg used to always use rc 1 (EXIT_WARNING) for all warnings. + # legacy: Borg used to always use rc 1 (EXIT_WARNING) for all warnings. # modern: users can opt in to more specific return codes, using BORG_EXIT_CODES: return self.exit_mcode if modern_ec else EXIT_WARNING @@ -105,7 +105,7 @@ class IncludePatternNeverMatchedWarning(BorgWarning): class BackupWarning(BorgWarning): """{}: {}""" - # this is to wrap a caught BackupError exception, so it can be given to print_warning_instance + # This is to wrap a caught BackupError exception so it can be given to print_warning_instance. @property def exit_code(self): diff --git a/src/borg/helpers/fs.py b/src/borg/helpers/fs.py index a880bb0fd..dbd55ed23 100644 --- a/src/borg/helpers/fs.py +++ b/src/borg/helpers/fs.py @@ -20,14 +20,15 @@ logger = create_logger() def ensure_dir(path, mode=stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO, pretty_deadly=True): """ - Ensures that the dir exists with the right permissions. - 1) Make sure the directory exists in a race-free operation + Ensure that the directory exists with the right permissions. + 1) Make sure the directory exists in a race-free operation. 2) If mode is not None and the directory has been created, give the right - permissions to the leaf directory. The current umask value is masked out first. - 3) If pretty_deadly is True, catch exceptions, reraise them with a pretty - message. - Returns if the directory has been created and has the right permissions, - An exception otherwise. If a deadly exception happened it is reraised. + permissions to the leaf directory. The current umask value is masked out first. + 3) If pretty_deadly is True, catch exceptions and re-raise them with a clearer + message. + + Returns normally if the directory exists (or was created) and has the right permissions; + raises an exception otherwise. If a fatal exception happened, it is re-raised. """ try: os.makedirs(path, mode=mode, exist_ok=True) @@ -57,7 +58,7 @@ def get_base_dir(): def get_keys_dir(): - """Determine where to repository keys and cache""" + """Determine where to store repository keys.""" keys_dir = os.environ.get('BORG_KEYS_DIR') if keys_dir is None: # note: do not just give this as default to the environment.get(), see issue #5979. @@ -79,7 +80,7 @@ def get_security_dir(repository_id=None): def get_cache_dir(): - """Determine where to repository keys and cache""" + """Determine where to store the cache.""" # Get cache home path cache_home = os.path.join(get_base_dir(), '.cache') # Try to use XDG_CACHE_HOME instead if BORG_BASE_DIR isn't explicitly set @@ -103,7 +104,7 @@ def get_cache_dir(): def get_config_dir(): - """Determine where to store whole config""" + """Determine where to store the whole configuration.""" # Get config home path config_home = os.path.join(get_base_dir(), '.config') # Try to use XDG_CONFIG_HOME instead if BORG_BASE_DIR isn't explicitly set @@ -157,7 +158,7 @@ _safe_re = re.compile(r'^((\.\.)?/+)+') def make_path_safe(path): - """Make path safe by making it relative and local + """Make path safe by making it relative and local. """ return _safe_re.sub('', path) or '.' @@ -178,7 +179,7 @@ def get_strip_prefix(path): def hardlinkable(mode): - """return True if we support hardlinked items of this type""" + """Return True if we support hardlinked items of this type.""" return stat.S_ISREG(mode) or stat.S_ISBLK(mode) or stat.S_ISCHR(mode) or stat.S_ISFIFO(mode) @@ -282,7 +283,7 @@ flags_dir = O_('DIRECTORY', 'RDONLY', 'NOFOLLOW') def os_open(*, flags, path=None, parent_fd=None, name=None, noatime=False): """ - Use os.open to open a fs item. + Use os.open to open a filesystem item. If parent_fd and name are given, they are preferred and openat will be used, path is not used in this case. @@ -328,7 +329,7 @@ def os_open(*, flags, path=None, parent_fd=None, name=None, noatime=False): def os_stat(*, path=None, parent_fd=None, name=None, follow_symlinks=False): """ - Use os.stat to open a fs item. + Use os.stat to stat a filesystem item. If parent_fd and name are given, they are preferred and statat will be used, path is not used in this case. diff --git a/src/borg/helpers/manifest.py b/src/borg/helpers/manifest.py index ea4494b44..6658d08ea 100644 --- a/src/borg/helpers/manifest.py +++ b/src/borg/helpers/manifest.py @@ -19,7 +19,7 @@ from ..constants import * # NOQA class MandatoryFeatureUnsupported(Error): - """Unsupported repository feature(s) {}. A newer version of borg is required to access this repository.""" + """Unsupported repository feature(s) {}. A newer version of Borg is required to access this repository.""" exit_mcode = 25 @@ -79,16 +79,16 @@ class Archives(abc.MutableMapping): def list(self, *, glob=None, match_end=r'\Z', sort_by=(), consider_checkpoints=True, first=None, last=None, reverse=False): """ - Return list of ArchiveInfo instances according to the parameters. + Return a list of ArchiveInfo instances according to the parameters. First match *glob* (considering *match_end*), then *sort_by*. Apply *first* and *last* filters, and then possibly *reverse* the list. *sort_by* is a list of sort keys applied in reverse order. - Note: for better robustness, all filtering / limiting parameters must default to - "not limit / not filter", so a FULL archive list is produced by a simple .list(). - some callers EXPECT to iterate over all archives in a repo for correct operation. + Note: For better robustness, all filtering/limiting parameters must default to + "not limit / not filter", so a full archive list is produced by a simple .list(). + Some callers expect to iterate over all archives in a repo for correct operation. """ if isinstance(sort_by, (str, bytes)): raise TypeError('sort_by must be a sequence of str') @@ -108,7 +108,7 @@ class Archives(abc.MutableMapping): def list_considering(self, args): """ - get a list of archives, considering --first/last/prefix/glob-archives/sort/consider-checkpoints cmdline args + Get a list of archives, considering --first/last/prefix/glob-archives/sort/consider-checkpoints command-line arguments. """ if args.location.archive: raise Error('The options --first, --last, --prefix, and --glob-archives, and --consider-checkpoints can only be used on repository targets.') @@ -117,14 +117,14 @@ class Archives(abc.MutableMapping): return self.list(sort_by=args.sort_by.split(','), consider_checkpoints=args.consider_checkpoints, glob=args.glob_archives, first=args.first, last=args.last) def set_raw_dict(self, d): - """set the dict we get from the msgpack unpacker""" + """Set the dict we get from the msgpack unpacker.""" for k, v in d.items(): assert isinstance(k, bytes) assert isinstance(v, dict) and b'id' in v and b'time' in v self._archives[k] = v def get_raw_dict(self): - """get the dict we can give to the msgpack packer""" + """Get the dict we can give to the msgpack packer.""" return self._archives diff --git a/src/borg/helpers/misc.py b/src/borg/helpers/misc.py index 9dbd4e545..e7bcdd44c 100644 --- a/src/borg/helpers/misc.py +++ b/src/borg/helpers/misc.py @@ -41,21 +41,21 @@ def default_period_func(pattern): def quarterly_13weekly_period_func(a): (year, week, _) = to_localtime(a.ts).isocalendar() if week <= 13: - # Weeks containing Jan 4th to Mar 28th (leap year) or 29th- 91 (13*7) + # Weeks containing Jan 4th to Mar 28th (leap year) or 29th — 91 (13*7) # days later. return (year, 1) elif 14 <= week <= 26: - # Weeks containing Apr 4th (leap year) or 5th to Jun 27th or 28th- 91 + # Weeks containing Apr 4th (leap year) or 5th to Jun 27th or 28th — 91 # days later. return (year, 2) elif 27 <= week <= 39: - # Weeks containing Jul 4th (leap year) or 5th to Sep 26th or 27th- + # Weeks containing Jul 4th (leap year) or 5th to Sep 26th or 27th — # at least 91 days later. return (year, 3) else: - # Everything else, Oct 3rd (leap year) or 4th onward, will always - # include week of Dec 26th (leap year) or Dec 27th, may also include - # up to possibly Jan 3rd of next year. + # Everything else: Oct 3rd (leap year) or 4th onward; will always + # include the week of Dec 26th (leap year) or Dec 27th and may also include + # up to Jan 3rd of next year. return (year, 4) @@ -155,9 +155,9 @@ def sysinfo(): def log_multi(*msgs, level=logging.INFO, logger=logger): """ - log multiple lines of text, each line by a separate logging call for cosmetic reasons + Log multiple lines of text, each line via a separate logging call for cosmetic reasons. - each positional argument may be a single or multiple lines (separated by newlines) of text. + Each positional argument may be a single or multiple lines (separated by newlines) of text. """ lines = [] for msg in msgs: @@ -171,14 +171,14 @@ def normalize_chunker_params(cp): if isinstance(cp, list): cp = tuple(cp) if len(cp) == 4 and isinstance(cp[0], int): - # this is a borg < 1.2 chunker_params tuple, no chunker algo specified, but we only had buzhash: + # This is a Borg < 1.2 chunker_params tuple: no chunker algorithm specified, but we only had buzhash. cp = (CH_BUZHASH, ) + cp assert cp[0] in (CH_BUZHASH, CH_FIXED) return cp class ChunkIteratorFileWrapper: - """File-like wrapper for chunk iterators""" + """File-like wrapper for chunk iterators.""" def __init__(self, chunk_iterator, read_callback=None): """ @@ -245,13 +245,13 @@ def chunkit(it, size): def consume(iterator, n=None): - """Advance the iterator n-steps ahead. If n is none, consume entirely.""" + """Advance the iterator n steps ahead. If n is None, consume entirely.""" # Use functions that consume iterators at C speed. if n is None: - # feed the entire iterator into a zero-length deque + # Feed the entire iterator into a zero-length deque. deque(iterator, maxlen=0) else: - # advance to the empty slice starting at position n + # Advance to the empty slice starting at position n. next(islice(iterator, n, n), None) @@ -280,7 +280,7 @@ class ErrorIgnoringTextIOWrapper(io.TextIOWrapper): def iter_separated(fd, sep=None, read_size=4096): - """Iter over chunks of open file ``fd`` delimited by ``sep``. Doesn't trim.""" + """Iterate over chunks of the open file ``fd`` delimited by ``sep``. Does not trim.""" buf = fd.read(read_size) is_str = isinstance(buf, str) part = '' if is_str else b'' diff --git a/src/borg/helpers/msgpack.py b/src/borg/helpers/msgpack.py index 8dd07c5b4..558b47e7e 100644 --- a/src/borg/helpers/msgpack.py +++ b/src/borg/helpers/msgpack.py @@ -33,11 +33,11 @@ version = mp_version class PackException(Exception): - """Exception while msgpack packing""" + """Exception while msgpack packing.""" class UnpackException(Exception): - """Exception while msgpack unpacking""" + """Exception while msgpack unpacking.""" class Packer(mp_Packer): diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index 1a6ea4191..54ad12ecf 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -42,26 +42,26 @@ def hex_to_bin(hex, length=None): def safe_decode(s, coding='utf-8', errors='surrogateescape'): - """decode bytes to str, with round-tripping "invalid" bytes""" + """Decode bytes to str, with round-tripping of "invalid" bytes.""" if s is None: return None return s.decode(coding, errors) def safe_encode(s, coding='utf-8', errors='surrogateescape'): - """encode str to bytes, with round-tripping "invalid" bytes""" + """Encode str to bytes, with round-tripping of "invalid" bytes.""" if s is None: return None return s.encode(coding, errors) def remove_surrogates(s, errors='replace'): - """Replace surrogates generated by fsdecode with '?'""" + """Replace surrogates generated by fsdecode with '?'.""" return s.encode('utf-8', errors).decode('utf-8') def eval_escapes(s): - """Evaluate literal escape sequences in a string (eg `\\n` -> `\n`).""" + """Evaluate literal escape sequences in a string (e.g., `\\n` -> `\n`).""" return s.encode('ascii', 'backslashreplace').decode('unicode-escape') @@ -73,7 +73,7 @@ def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'): def positive_int_validator(value): - """argparse type for positive integers""" + """Argparse type for positive integers.""" int_value = int(value) if int_value <= 0: raise argparse.ArgumentTypeError('A positive integer is required: %s' % value) diff --git a/src/borg/helpers/process.py b/src/borg/helpers/process.py index 785f191cb..373e66b27 100644 --- a/src/borg/helpers/process.py +++ b/src/borg/helpers/process.py @@ -53,9 +53,9 @@ def _daemonize(): def daemonize(): - """Detach process from controlling terminal and run in background + """Detach process from controlling terminal and run in background. - Returns: old and new get_process_id tuples + Returns: old and new get_process_id tuples. """ with _daemonize() as (old_id, new_id): return old_id, new_id diff --git a/src/borg/helpers/progress.py b/src/borg/helpers/progress.py index 9784f80e1..c61b3353d 100644 --- a/src/borg/helpers/progress.py +++ b/src/borg/helpers/progress.py @@ -12,7 +12,7 @@ from .parseformat import ellipsis_truncate def justify_to_terminal_size(message): terminal_space = get_terminal_size(fallback=(-1, -1))[0] - # justify only if we are outputting to a terminal + # Justify only if we are outputting to a terminal. if terminal_space != -1: return message.ljust(terminal_space) return message @@ -110,12 +110,12 @@ class ProgressIndicatorPercent(ProgressIndicatorBase): def __init__(self, total=0, step=5, start=0, msg="%3.0f%%", msgid=None): """ - Percentage-based progress indicator + Percentage-based progress indicator. - :param total: total amount of items - :param step: step size in percent - :param start: at which percent value to start - :param msg: output message, must contain one %f placeholder for the percentage + :param total: Total number of items. + :param step: Step size in percent. + :param start: At which percentage value to start. + :param msg: Output message; must contain one %f placeholder for the percentage. """ self.counter = 0 # 0 .. (total-1) self.total = total diff --git a/src/borg/helpers/time.py b/src/borg/helpers/time.py index 776c29033..86f658365 100644 --- a/src/borg/helpers/time.py +++ b/src/borg/helpers/time.py @@ -6,17 +6,17 @@ from ..constants import ISO_FORMAT, ISO_FORMAT_NO_USECS def to_localtime(ts): - """Convert datetime object from UTC to local time zone""" + """Convert a datetime object from UTC to the local time zone.""" return datetime(*time.localtime((ts - datetime(1970, 1, 1, tzinfo=timezone.utc)).total_seconds())[:6]) def utcnow(): - """Returns a naive datetime instance representing the time in the UTC timezone""" + """Return a naive datetime instance representing the time in the UTC time zone.""" return datetime.now(timezone.utc).replace(tzinfo=None) def parse_timestamp(timestamp, tzinfo=timezone.utc): - """Parse a ISO 8601 timestamp string""" + """Parse an ISO 8601 timestamp string.""" fmt = ISO_FORMAT if '.' in timestamp else ISO_FORMAT_NO_USECS dt = datetime.strptime(timestamp, fmt) if tzinfo is not None: @@ -25,13 +25,13 @@ def parse_timestamp(timestamp, tzinfo=timezone.utc): def timestamp(s): - """Convert a --timestamp=s argument to a datetime object""" + """Convert a --timestamp=s argument to a datetime object.""" try: - # is it pointing to a file / directory? + # Is it pointing to a file/directory? ts = safe_s(os.stat(s).st_mtime) return datetime.fromtimestamp(ts, tz=timezone.utc) except OSError: - # didn't work, try parsing as timestamp. UTC, no TZ, no microsecs support. + # Didn't work; try parsing as a timestamp. UTC, no time zone, no microseconds support. for format in ('%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S+00:00', '%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S', '%Y-%m-%dT%H:%M', '%Y-%m-%d %H:%M', @@ -48,24 +48,24 @@ def timestamp(s): # As they are crap anyway (valid filesystem timestamps always refer to the past up to # the present, but never to the future), nothing is lost if we just clamp them to the # maximum value we can support. -# As long as people are using borg on 32bit platforms to access borg archives, we must -# keep this value True. But we can expect that we can stop supporting 32bit platforms +# As long as people are using Borg on 32-bit platforms to access Borg archives, we must +# keep this value True. But we can expect that we can stop supporting 32-bit platforms # well before coming close to the year 2038, so this will never be a practical problem. SUPPORT_32BIT_PLATFORMS = True # set this to False before y2038. if SUPPORT_32BIT_PLATFORMS: # second timestamps will fit into a signed int32 (platform time_t limit). # nanosecond timestamps thus will naturally fit into a signed int64. - # subtract last 48h to avoid any issues that could be caused by tz calculations. - # this is in the year 2038, so it is also less than y9999 (which is a datetime internal limit). + # Subtract the last 48 h to avoid any issues that could be caused by time zone calculations. + # This is in the year 2038, so it is also less than y9999 (which is a datetime internal limit). # msgpack can pack up to uint64. MAX_S = 2**31-1 - 48*3600 MAX_NS = MAX_S * 1000000000 else: # nanosecond timestamps will fit into a signed int64. - # subtract last 48h to avoid any issues that could be caused by tz calculations. - # this is in the year 2262, so it is also less than y9999 (which is a datetime internal limit). - # round down to 1e9 multiple, so MAX_NS corresponds precisely to a integer MAX_S. + # Subtract the last 48 h to avoid any issues that could be caused by time zone calculations. + # This is in the year 2262, so it is also less than y9999 (which is a datetime internal limit). + # Round down to a 1e9 multiple so MAX_NS corresponds precisely to an integer MAX_S. # msgpack can pack up to uint64. MAX_NS = (2**63-1 - 48*3600*1000000000) // 1000000000 * 1000000000 MAX_S = MAX_NS // 1000000000 @@ -95,9 +95,7 @@ def safe_timestamp(item_timestamp_ns): def format_time(ts: datetime, format_spec=''): - """ - Convert *ts* to a human-friendly format with textual weekday. - """ + """Convert *ts* to a human-friendly format with textual weekday.""" return ts.strftime('%a, %Y-%m-%d %H:%M:%S' if format_spec == '' else format_spec) diff --git a/src/borg/helpers/yes.py b/src/borg/helpers/yes.py index 3f3ff8cfa..e542414c8 100644 --- a/src/borg/helpers/yes.py +++ b/src/borg/helpers/yes.py @@ -17,34 +17,34 @@ def yes(msg=None, false_msg=None, true_msg=None, default_msg=None, falsish=FALSISH, truish=TRUISH, defaultish=DEFAULTISH, default=False, retry=True, env_var_override=None, ofile=None, input=input, prompt=True, msgid=None): - """Output (usually a question) and let user input an answer. - Qualifies the answer according to falsish, truish and defaultish as True, False or . - If it didn't qualify and retry is False (no retries wanted), return the default [which - defaults to False]. If retry is True let user retry answering until answer is qualified. + """Output (usually a question) and let the user input an answer. + Qualifies the answer according to falsish, truish, and defaultish as True, False, or . + If it does not qualify and retry is False (no retries wanted), return the default [which + defaults to False]. If retry is True, let the user retry answering until the answer is qualified. - If env_var_override is given and this var is present in the environment, do not ask - the user, but just use the env var contents as answer as if it was typed in. - Otherwise read input from stdin and proceed as normal. - If EOF is received instead an input or an invalid input without retry possibility, + If env_var_override is given and this variable is present in the environment, do not ask + the user, but use the environment variable's contents as the answer as if it were typed in. + Otherwise, read input from stdin and proceed as normal. + If EOF is received instead of input, or an invalid input occurs without the possibility to retry, return default. - :param msg: introducing message to output on ofile, no \n is added [None] - :param retry_msg: retry message to output on ofile, no \n is added [None] - :param false_msg: message to output before returning False [None] - :param true_msg: message to output before returning True [None] - :param default_msg: message to output before returning a [None] - :param invalid_msg: message to output after a invalid answer was given [None] - :param env_msg: message to output when using input from env_var_override ['{} (from {})'], - needs to have 2 placeholders for answer and env var name - :param falsish: sequence of answers qualifying as False - :param truish: sequence of answers qualifying as True - :param defaultish: sequence of answers qualifying as - :param default: default return value (defaultish answer was given or no-answer condition) [False] - :param retry: if True and input is incorrect, retry. Otherwise return default. [True] - :param env_var_override: environment variable name [None] - :param ofile: output stream [sys.stderr] - :param input: input function [input from builtins] - :return: boolean answer value, True or False + :param msg: introductory message to output on ofile; no \n is added. [None] + :param retry_msg: retry message to output on ofile; no \n is added. [None] + :param false_msg: message to output before returning False. [None] + :param true_msg: message to output before returning True. [None] + :param default_msg: message to output before returning the default value. [None] + :param invalid_msg: message to output after an invalid answer was given. [None] + :param env_msg: message to output when using input from env_var_override ['{} (from {})']; + needs to have two placeholders for the answer and the environment variable name. + :param falsish: sequence of answers qualifying as False. + :param truish: sequence of answers qualifying as True. + :param defaultish: sequence of answers qualifying as . + :param default: default return value (defaultish answer was given or no-answer condition). [False] + :param retry: If True and input is incorrect, retry; otherwise return default. [True] + :param env_var_override: environment variable name. [None] + :param ofile: output stream. [sys.stderr] + :param input: input function. [builtins.input] + :return: boolean answer value, True or False. """ def output(msg, msg_type, is_prompt=False, **kwargs): json_output = getattr(logging.getLogger('borg'), 'json', False) @@ -62,8 +62,8 @@ def yes(msg=None, false_msg=None, true_msg=None, default_msg=None, print(msg, file=ofile) msgid = msgid or env_var_override - # note: we do not assign sys.stderr as default above, so it is - # really evaluated NOW, not at function definition time. + # Note: We do not assign sys.stderr as the default above, so it is + # evaluated now, not at function definition time. if ofile is None: ofile = sys.stderr if default not in (True, False): @@ -84,7 +84,7 @@ def yes(msg=None, false_msg=None, true_msg=None, default_msg=None, if answer == ERROR: # for testing purposes raise UnicodeDecodeError("?", b"?", 0, 1, "?") # args don't matter except EOFError: - # avoid defaultish[0], defaultish could be empty + # Avoid defaultish[0]; defaultish could be empty. answer = truish[0] if default else falsish[0] except UnicodeDecodeError: answer = ERROR diff --git a/src/borg/locking.py b/src/borg/locking.py index eda9692bb..27966d4dd 100644 --- a/src/borg/locking.py +++ b/src/borg/locking.py @@ -24,10 +24,10 @@ class TimeoutTimer: """ Initialize a timer. - :param timeout: time out interval [s] or None (never timeout, wait forever) [default] - :param sleep: sleep interval [s] (>= 0: do sleep call, <0: don't call sleep) - or None (autocompute: use 10% of timeout [but not more than 60s], - or 1s for "never timeout" mode) + :param timeout: timeout interval [s] or None (never time out, wait forever). [default] + :param sleep: sleep interval [s] (>= 0: do sleep; < 0: do not call sleep), + or None (auto-compute: use 10% of timeout, but not more than 60 s; + or 1 s for "never timeout" mode). """ if timeout is not None and timeout < 0: raise ValueError("timeout must be >= 0") @@ -98,9 +98,9 @@ class NotMyLock(LockErrorT): class ExclusiveLock: - """An exclusive Lock based on mkdir fs operation being atomic. + """An exclusive lock based on the mkdir filesystem operation being atomic. - If possible, try to use the contextmanager here like:: + If possible, try to use the context manager here like:: with ExclusiveLock(...) as lock: ... @@ -212,7 +212,7 @@ class ExclusiveLock: if not self.kill_stale_locks: if not self.stale_warning_printed: - # Log this at warning level to hint the user at the ability + # Log this at warning level to hint to the user about the ability logger.warning("Found stale lock %s, but not deleting because self.kill_stale_locks = False.", name) self.stale_warning_printed = True return False @@ -246,7 +246,7 @@ class ExclusiveLock: os.rmdir(self.path) def migrate_lock(self, old_id, new_id): - """migrate the lock ownership from old_id to new_id""" + """Migrate the lock ownership from old_id to new_id.""" assert self.id == old_id new_unique_name = os.path.join(self.path, "%s.%d-%x" % new_id) if self.is_locked() and self.by_me(): @@ -319,8 +319,8 @@ class LockRoster: if op == ADD: elements.add(self.id) elif op == REMOVE: - # note: we ignore it if the element is already not present anymore. - # this has been frequently seen in teardowns involving Repository.__del__ and Repository.__exit__. + # Note: We ignore it if the element is already not present anymore. + # This has been frequently seen in teardowns involving Repository.__del__ and Repository.__exit__. elements.discard(self.id) elif op == REMOVE2: # needed for callers that do not want to ignore. @@ -331,7 +331,7 @@ class LockRoster: self.save(roster) def migrate_lock(self, key, old_id, new_id): - """migrate the lock ownership from old_id to new_id""" + """Migrate the lock ownership from old_id to new_id.""" assert self.id == old_id # need to temporarily switch off stale lock killing as we want to # rather migrate than kill them (at least the one made by old_id). @@ -352,12 +352,12 @@ class LockRoster: class Lock: """ - A Lock for a resource that can be accessed in a shared or exclusive way. + A lock for a resource that can be accessed in a shared or exclusive way. Typically, write access to a resource needs an exclusive lock (1 writer, no one is allowed reading) and read access to a resource needs a shared lock (multiple readers are allowed). - If possible, try to use the contextmanager here like:: + If possible, try to use the context manager here like:: with Lock(...) as lock: ... @@ -371,10 +371,10 @@ class Lock: self.sleep = sleep self.timeout = timeout self.id = id or platform.get_process_id() - # globally keeping track of shared and exclusive lockers: + # Globally keep track of shared and exclusive lockers: self._roster = LockRoster(path + '.roster', id=id) - # an exclusive lock, used for: - # - holding while doing roster queries / updates + # An exclusive lock, used for: + # - holding while doing roster queries/updates # - holding while the Lock itself is exclusive self._lock = ExclusiveLock(path + '.exclusive', id=id, timeout=timeout) @@ -415,7 +415,7 @@ class Lock: if remove is not None: self._roster.modify(remove, ADD) except: - # avoid orphan lock when an exception happens here, e.g. Ctrl-C! + # Avoid an orphan lock when an exception happens here (e.g., Ctrl-C)! self._lock.release() raise else: diff --git a/src/borg/logger.py b/src/borg/logger.py index 8c23d392d..d61ede22c 100644 --- a/src/borg/logger.py +++ b/src/borg/logger.py @@ -1,33 +1,33 @@ -"""logging facilities +"""Logging facilities. -The way to use this is as follows: +How to use: -* each module declares its own logger, using: +- Each module declares its own logger, using: from .logger import create_logger logger = create_logger() -* then each module uses logger.info/warning/debug/etc according to the +- Then each module uses logger.info/warning/debug/etc. according to the level it believes is appropriate: logger.debug('debugging info for developers or power users') logger.info('normal, informational output') - logger.warning('warn about a non-fatal error or sth else') + logger.warning('warn about a non-fatal error or something else') logger.error('a fatal error') - ... and so on. see the `logging documentation + See the `logging documentation `_ - for more information + for more information. -* console interaction happens on stderr, that includes interactive - reporting functions like `help`, `info` and `list` +- Console interaction happens on stderr; that includes interactive + reporting functions like `help`, `info`, and `list`. -* ...except ``input()`` is special, because we can't control the - stream it is using, unfortunately. we assume that it won't clutter - stdout, because interaction would be broken then anyways +- ...except ``input()`` is special, because we cannot control the + stream it uses. We assume that it will not clutter stdout, because + interaction would be broken otherwise. -* what is output on INFO level is additionally controlled by commandline - flags +- What is output at the INFO level is additionally controlled by command-line + flags. """ import inspect @@ -60,16 +60,16 @@ def _log_warning(message, category, filename, lineno, file=None, line=None): def setup_logging(stream=None, conf_fname=None, env_var='BORG_LOGGING_CONF', level='info', is_serve=False, json=False): - """setup logging module according to the arguments provided + """Set up the logging module according to the provided arguments. - if conf_fname is given (or the config file name can be determined via - the env_var, if given): load this logging configuration. + If conf_fname is given (or the config file name can be determined via + env_var, if given), load that logging configuration. - otherwise, set up a stream handler logger on stderr (by default, if no + Otherwise, set up a stream handler logger on stderr (by default, if no stream is provided). - if is_serve == True, we configure a special log format as expected by - the borg client log message interceptor. + If is_serve is True, configure a special log format as expected by + the Borg client log message interceptor. """ global configured err_msg = None @@ -123,9 +123,9 @@ def setup_logging(stream=None, conf_fname=None, env_var='BORG_LOGGING_CONF', lev def find_parent_module(): - """find the name of the first module calling this module + """Find the name of the first module calling this module. - if we cannot find it, we return the current module's name + If it cannot be found, return the current module's name (__name__) instead. """ try: @@ -142,18 +142,18 @@ def find_parent_module(): def create_logger(name=None): - """lazily create a Logger object with the proper path, which is returned by - find_parent_module() by default, or is provided via the commandline + """Lazily create a Logger object with the proper path, which is returned by + find_parent_module() by default, or is provided via the command-line. - this is really a shortcut for: + This is really a shortcut for: logger = logging.getLogger(__name__) - we use it to avoid errors and provide a more standard API. + We use it to avoid errors and provide a more standard API. - We must create the logger lazily, because this is usually called from - module level (and thus executed at import time - BEFORE setup_logging() - was called). By doing it lazily we can do the setup first, we just have to + We must create the logger lazily because this is usually called from + module level (and thus executed at import time—before setup_logging() + is called). By doing it lazily we can do the setup first; we just have to be careful not to call any logger methods before the setup_logging() call. If you try, you'll get an exception. """ diff --git a/src/borg/lrucache.py b/src/borg/lrucache.py index 4f7f1f829..9fa2addf8 100644 --- a/src/borg/lrucache.py +++ b/src/borg/lrucache.py @@ -40,8 +40,8 @@ class LRUCache: return value def upd(self, key, value): - # special use only: update the value for an existing key without having to dispose it first - # this method complements __setitem__ which should be used for the normal use case. + # Special use only: update the value for an existing key without having to dispose it first. + # This method complements __setitem__, which should be used for the normal use case. assert key in self._cache, "Unexpected attempt to update a non-existing item." self._cache[key] = value diff --git a/src/borg/nanorst.py b/src/borg/nanorst.py index 5700a0b3c..648dad120 100644 --- a/src/borg/nanorst.py +++ b/src/borg/nanorst.py @@ -50,10 +50,10 @@ def process_directive(directive, arguments, out, state_hook): def rst_to_text(text, state_hook=None, references=None): """ - Convert rST to a more human text form. + Convert reStructuredText (rST) to a more human-readable text form. This is a very loose conversion. No advanced rST features are supported. - The generated output directly depends on the input (e.g. indentation of + The generated output directly depends on the input (e.g., indentation of admonitions). """ state_hook = state_hook or (lambda old_state, new_state, out: None) @@ -90,7 +90,7 @@ def rst_to_text(text, state_hook=None, references=None): text.read(1) continue if text.peek(-1).isspace() and char == ':' and text.peek(5) == 'ref:`': - # translate reference + # Translate reference text.read(5) ref = '' while True: @@ -108,7 +108,7 @@ def rst_to_text(text, state_hook=None, references=None): raise ValueError("Undefined reference in Archiver help: %r — please add reference " "substitution to 'rst_plain_text_references'" % ref) continue - if char == ':' and text.peek(2) == ':\n': # End of line code block + if char == ':' and text.peek(2) == ':\n': # End-of-line code block text.read(2) state_hook(state, 'code-block', out) state = 'code-block' @@ -204,7 +204,7 @@ def rst_to_terminal(rst, references=None, destination=sys.stdout): Convert *rst* to a lazy string. If *destination* is a file-like object connected to a terminal, - enrich text with suitable ANSI escapes. Otherwise return plain text. + enrich the text with suitable ANSI escapes. Otherwise, return plain text. """ if is_terminal(destination): rst_state_hook = ansi_escapes diff --git a/src/borg/patterns.py b/src/borg/patterns.py index 565d0fd4b..514661467 100644 --- a/src/borg/patterns.py +++ b/src/borg/patterns.py @@ -20,7 +20,7 @@ def parse_patternfile_line(line, roots, ie_commands, fallback): elif ie_command.cmd is IECommand.PatternStyle: fallback = ie_command.val else: - # it is some kind of include/exclude command + # It is some kind of include/exclude command. ie_commands.append(ie_command) return fallback @@ -51,7 +51,7 @@ class ArgparsePatternFileAction(argparse.Action): def __call__(self, parser, args, values, option_string=None): """Load and parse patterns from a file. - Lines empty or starting with '#' after stripping whitespace on both line ends are ignored. + Empty lines or lines starting with '#' (after stripping whitespace at both ends) are ignored. """ filename = values[0] try: @@ -81,7 +81,7 @@ class PatternMatcher: # Value to return from match function when none of the patterns match. self.fallback = fallback - # optimizations + # Optimizations self._path_full_patterns = {} # full path -> return value # indicates whether the last match() call ended on a pattern for which @@ -89,13 +89,13 @@ class PatternMatcher: # False when calling match(). self.recurse_dir = None - # whether to recurse into directories when no match is found + # Whether to recurse into directories when no match is found # TODO: allow modification as a config option? self.recurse_dir_default = True self.include_patterns = [] - # TODO: move this info to parse_inclexcl_command and store in PatternBase subclass? + # TODO: Move this info to parse_inclexcl_command and store it in a PatternBase subclass? self.is_include_cmd = { IECommand.Exclude: False, IECommand.ExcludeNoRecurse: False, @@ -151,28 +151,28 @@ class PatternMatcher: """ path = normalize_path(path).lstrip(os.path.sep) - # do a fast lookup for full path matches (note: we do not count such matches): + # Do a fast lookup for full path matches (note: we do not count such matches): non_existent = object() value = self._path_full_patterns.get(path, non_existent) if value is not non_existent: - # we have a full path match! + # We have a full path match! self.recurse_dir = command_recurses_dir(value) return self.is_include_cmd[value] - # this is the slow way, if we have many patterns in self._items: + # This is the slow path if we have many patterns in self._items: for (pattern, cmd) in self._items: if pattern.match(path, normalize=False): self.recurse_dir = pattern.recurse_dir return self.is_include_cmd[cmd] - # by default we will recurse if there is no match + # By default we will recurse if there is no match self.recurse_dir = self.recurse_dir_default return self.fallback def normalize_path(path): - """normalize paths for MacOS (but do nothing on other platforms)""" + """Normalize paths for macOS (no-op on other platforms).""" # HFS+ converts paths to a canonical form, so users shouldn't be required to enter an exact match. # Windows and Unix filesystems allow different forms, so users always have to enter an exact match. return unicodedata.normalize('NFD', path) if sys.platform == 'darwin' else path diff --git a/src/borg/platform/__init__.py b/src/borg/platform/__init__.py index 713aa2457..6dd6e4725 100644 --- a/src/borg/platform/__init__.py +++ b/src/borg/platform/__init__.py @@ -17,7 +17,7 @@ OS_API_VERSION = API_VERSION if not is_win32: from .posix import process_alive, local_pid_alive - # posix swidth implementation works for: linux, freebsd, darwin, openindiana, cygwin + # POSIX swidth implementation works for: Linux, FreeBSD, Darwin, OpenIndiana, Cygwin from .posix import swidth from .posix import get_errno from .posix import uid2user, user2uid, gid2group, group2gid, getosusername @@ -26,17 +26,17 @@ else: from .windows import process_alive, local_pid_alive from .windows import uid2user, user2uid, gid2group, group2gid, getosusername -if is_linux: # pragma: linux only +if is_linux: # pragma: Linux only from .linux import API_VERSION as OS_API_VERSION from .linux import listxattr, getxattr, setxattr from .linux import acl_get, acl_set from .linux import set_flags, get_flags from .linux import SyncFile -elif is_freebsd: # pragma: freebsd only +elif is_freebsd: # pragma: FreeBSD only from .freebsd import API_VERSION as OS_API_VERSION from .freebsd import listxattr, getxattr, setxattr from .freebsd import acl_get, acl_set -elif is_darwin: # pragma: darwin only +elif is_darwin: # pragma: Darwin only from .darwin import API_VERSION as OS_API_VERSION from .darwin import listxattr, getxattr, setxattr from .darwin import acl_get, acl_set @@ -45,7 +45,7 @@ elif is_darwin: # pragma: darwin only def get_birthtime_ns(st, path, fd=None): if hasattr(st, "st_birthtime_ns"): - # added in Python 3.12 but not always available. + # Added in Python 3.12 but not always available. return st.st_birthtime_ns elif is_darwin and is_darwin_feature_64_bit_inode: return _get_birthtime_ns(fd or path, follow_symlinks=False) diff --git a/src/borg/platform/base.py b/src/borg/platform/base.py index f1d35bc83..84081a829 100644 --- a/src/borg/platform/base.py +++ b/src/borg/platform/base.py @@ -7,14 +7,14 @@ from borg.helpers import safe_unlink from borg.platformflags import is_win32 """ -platform base module +Platform base module ==================== Contains platform API implementations based on what Python itself provides. More specific APIs are stubs in this module. -When functions in this module use platform APIs themselves they access the public -platform API: that way platform APIs provided by the platform-specific support module +When functions in this module use platform APIs themselves, they access the public +platform API; that way, platform APIs provided by the platform-specific support module are correctly composed into the base functionality. """ @@ -45,7 +45,7 @@ def getxattr(path, name, *, follow_symlinks=False): *follow_symlinks* indicates whether symlinks should be followed and only applies when *path* is not an open file descriptor. """ - # as this base dummy implementation returns [] from listxattr, + # As this base dummy implementation returns [] from listxattr, # it must raise here for any given name: raise OSError(ENOATTR, os.strerror(ENOATTR), path) @@ -55,7 +55,7 @@ def setxattr(path, name, value, *, follow_symlinks=False): Write xattr on *path*. *path* can either be a path (bytes) or an open file descriptor (int). - *name* is the name of the xattr to read (bytes). + *name* is the name of the xattr to write (bytes). *value* is the value to write (bytes). *follow_symlinks* indicates whether symlinks should be followed and only applies when *path* is not an open file descriptor. @@ -64,18 +64,18 @@ def setxattr(path, name, value, *, follow_symlinks=False): def acl_get(path, item, st, numeric_ids=False, fd=None): """ - Saves ACL Entries + Save ACL entries. - If `numeric_ids` is True the user/group field is not preserved only uid/gid + If `numeric_ids` is True, the user/group field is not preserved; only uid/gid. """ def acl_set(path, item, numeric_ids=False, fd=None): """ - Restore ACL Entries + Restore ACL entries. - If `numeric_ids` is True the stored uid/gid is used instead - of the user/group names + If `numeric_ids` is True, the stored uid/gid is used instead + of the user/group names. """ @@ -96,7 +96,7 @@ def get_flags(path, st, fd=None): def sync_dir(path): if is_win32: - # Opening directories is not supported on windows. + # Opening directories is not supported on Windows. # TODO: do we need to handle this in some other way? return fd = os.open(path, os.O_RDONLY) @@ -143,7 +143,7 @@ class SyncFile: Calling SyncFile(path) for an existing path will raise FileExistsError, see comment in __init__. - TODO: Use F_FULLSYNC on OSX. + TODO: Use F_FULLSYNC on macOS. TODO: A Windows implementation should use CreateFile with FILE_FLAG_WRITE_THROUGH. """ @@ -208,7 +208,7 @@ class SaveFile: atomically and won't become corrupted, even on power failures or crashes (for caveats see SyncFile). - SaveFile can safely by used in parallel (e.g. by multiple processes) to write + SaveFile can safely be used in parallel (e.g. by multiple processes) to write to the same target path. Whatever writer finishes last (executes the os.replace last) "wins" and has successfully written its content to the target path. Internally used temporary files are created in the target directory and are @@ -249,9 +249,9 @@ class SaveFile: def swidth(s): - """terminal output width of string + """Terminal output width of string . - For western scripts, this is just len(s), but for cjk glyphs, 2 cells are used. + For Western scripts, this is just len(s), but for CJK glyphs, 2 cells are used. """ return len(s) @@ -277,17 +277,17 @@ def getfqdn(name=''): return name -# for performance reasons, only determine hostname / fqdn / hostid once. -# XXX this sometimes requires live internet access for issuing a DNS query in the background. +# For performance reasons, only determine hostname / FQDN / host ID once. +# XXX This sometimes requires live internet access for issuing a DNS query in the background. hostname = socket.gethostname() fqdn = getfqdn(hostname) -# some people put the fqdn into /etc/hostname (which is wrong, should be the short hostname) -# fix this (do the same as "hostname --short" cli command does internally): +# Some people put the FQDN into /etc/hostname (which is wrong; it should be the short hostname). +# Fix this (do the same as "hostname --short" CLI command does internally): hostname = hostname.split('.')[0] -# uuid.getnode() is problematic in some environments (e.g. OpenVZ, see #3968) where the virtual MAC address +# uuid.getnode() is problematic in some environments (e.g., OpenVZ, see #3968) where the virtual MAC address # is all-zero. uuid.getnode falls back to returning a random value in that case, which is not what we want. -# thus, we offer BORG_HOST_ID where a user can set an own, unique id for each of his hosts. +# Thus, we offer BORG_HOST_ID where a user can set an own, unique ID for each of his hosts. hostid = os.environ.get('BORG_HOST_ID') if not hostid: hostid = f'{fqdn}@{uuid.getnode()}' diff --git a/src/borg/platform/xattr.py b/src/borg/platform/xattr.py index 74e3d3ab6..d0492a9f8 100644 --- a/src/borg/platform/xattr.py +++ b/src/borg/platform/xattr.py @@ -7,7 +7,7 @@ from ..helpers import Buffer try: ENOATTR = errno.ENOATTR except AttributeError: - # on some platforms, ENOATTR is missing, use ENODATA there + # On some platforms, ENOATTR is missing; use ENODATA there. ENOATTR = errno.ENODATA @@ -15,14 +15,14 @@ buffer = Buffer(bytearray, limit=2**24) def split_string0(buf): - """split a list of zero-terminated strings into python not-zero-terminated bytes""" + """Split a list of zero-terminated strings into Python bytes (without terminating zeros).""" if isinstance(buf, bytearray): buf = bytes(buf) # use a bytes object, so we return a list of bytes objects return buf.split(b'\0')[:-1] def split_lstring(buf): - """split a list of length-prefixed strings into python not-length-prefixed bytes""" + """Split a list of length-prefixed strings into Python bytes (without length prefixes).""" result = [] mv = memoryview(buf) while mv: @@ -33,7 +33,7 @@ def split_lstring(buf): class BufferTooSmallError(Exception): - """the buffer given to a xattr function was too small for the result.""" + """The buffer given to an xattr function was too small for the result.""" def _check(rv, path=None, detect_buffer_too_small=False): @@ -41,8 +41,8 @@ def _check(rv, path=None, detect_buffer_too_small=False): if rv < 0: e = get_errno() if detect_buffer_too_small and e == errno.ERANGE: - # listxattr and getxattr signal with ERANGE that they need a bigger result buffer. - # setxattr signals this way that e.g. a xattr key name is too long / inacceptable. + # listxattr and getxattr indicate with ERANGE that they need a bigger result buffer. + # setxattr indicates this way that, e.g., an xattr key name is too long or unacceptable. raise BufferTooSmallError else: try: @@ -53,9 +53,9 @@ def _check(rv, path=None, detect_buffer_too_small=False): path = '' % path raise OSError(e, msg, path) if detect_buffer_too_small and rv >= len(buffer): - # freebsd does not error with ERANGE if the buffer is too small, - # it just fills the buffer, truncates and returns. - # so, we play safe and just assume that result is truncated if + # FreeBSD does not error with ERANGE if the buffer is too small; + # it just fills the buffer, truncates, and returns. + # Therefore, we play it safe and assume the result is truncated if # it happens to be a full buffer. raise BufferTooSmallError return rv diff --git a/src/borg/platformflags.py b/src/borg/platformflags.py index 8bfea7732..2c1567f26 100644 --- a/src/borg/platformflags.py +++ b/src/borg/platformflags.py @@ -1,7 +1,7 @@ """ -Flags for Platform-specific APIs. +Flags for platform-specific APIs. -Use these Flags instead of sys.platform.startswith('') or try/except. +Use these flags instead of sys.platform.startswith('') or try/except. """ import sys diff --git a/src/borg/remote.py b/src/borg/remote.py index 3064176da..2e20190db 100644 --- a/src/borg/remote.py +++ b/src/borg/remote.py @@ -47,11 +47,11 @@ RATELIMIT_PERIOD = 0.1 def os_write(fd, data): - """os.write wrapper so we do not lose data for partial writes.""" - # TODO: this issue is fixed in cygwin since at least 2.8.0, remove this - # wrapper / workaround when this version is considered ancient. - # This is happening frequently on cygwin due to its small pipe buffer size of only 64kiB - # and also due to its different blocking pipe behaviour compared to Linux/*BSD. + """Wrapper around os.write to avoid data loss on partial writes.""" + # TODO: This issue is fixed in Cygwin since at least 2.8.0; remove this + # wrapper/workaround when this version is considered ancient. + # This happens frequently on Cygwin due to its small pipe buffer size of only 64 KiB + # and also due to its different blocking pipe behavior compared to Linux/*BSD. # Neither Linux nor *BSD ever do partial writes on blocking pipes, unless interrupted by a # signal, in which case serve() would terminate. amount = remaining = len(data) @@ -66,7 +66,7 @@ def os_write(fd, data): class ConnectionClosed(Error): - """Connection closed by remote host""" + """Connection closed by remote host.""" exit_mcode = 80 @@ -81,7 +81,7 @@ class PathNotAllowed(Error): class InvalidRPCMethod(Error): - """RPC method {} is not valid""" + """RPC method {} is not valid.""" exit_mcode = 82 diff --git a/src/borg/repository.py b/src/borg/repository.py index 0daeef89b..b57e7a951 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -51,7 +51,7 @@ FreeSpace = partial(defaultdict, int) class Repository: """ - Filesystem based transactional key value store + Filesystem-based transactional key-value store. Transactionality is achieved by using a log (aka journal) to record changes. The log is a series of numbered files called segments. Each segment is a series of log entries. The segment number together with the offset of each @@ -84,9 +84,9 @@ class Repository: such obsolete entries is called sparse, while a segment containing no such entries is called compact. Sparse segments can be compacted and thereby disk space freed. This destroys the transaction for which the - superseded entries where current. + superseded entries were current. - On disk layout: + On-disk layout: dir/README dir/config @@ -97,16 +97,16 @@ class Repository: File system interaction ----------------------- - LoggedIO generally tries to rely on common behaviours across transactional file systems. + LoggedIO generally tries to rely on common behaviors across transactional file systems. Segments that are deleted are truncated first, which avoids problems if the FS needs to allocate space to delete the dirent of the segment. This mostly affects CoW file systems, traditional journaling file systems have a fairly good grip on this problem. Note that deletion, i.e. unlink(2), is atomic on every file system that uses inode reference - counts, which includes pretty much all of them. To remove a dirent the inodes refcount has - to be decreased, but you can't decrease the refcount before removing the dirent nor can you - decrease the refcount after removing the dirent. File systems solve this with a lock, + counts, which includes pretty much all of them. To remove a dirent the inode's reference count has + to be decreased, but you cannot decrease the reference count before removing the dirent nor can you + decrease the reference count after removing the dirent. File systems solve this with a lock, and by ensuring it all stays within the same FS transaction. Truncation is generally not atomic in itself, and combining truncate(2) and unlink(2) is of @@ -115,7 +115,7 @@ class Repository: this is of course way more complex). LoggedIO gracefully handles truncate/unlink splits as long as the truncate resulted in - a zero length file. Zero length segments are considered to not exist, while LoggedIO.cleanup() + a zero-length file. Zero-length segments are considered to not exist, while LoggedIO.cleanup() will still get rid of them. """ diff --git a/src/borg/selftest.py b/src/borg/selftest.py index 8c39ec604..167e76f79 100644 --- a/src/borg/selftest.py +++ b/src/borg/selftest.py @@ -2,16 +2,16 @@ # See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT """ -Self testing module +Self-testing module =================== The selftest() function runs a small test suite of relatively fast tests that are meant to discover issues with the way Borg was compiled or packaged and also bugs in Borg itself. -These tests are a subset of the borg/testsuite and are run with Pythons built-in unittest, hence none of +These tests are a subset of the borg/testsuite and are run with Python's built-in unittest, hence none of the tests used for this can or should be ported to py.test currently. -To assert that self test discovery works correctly the number of tests is kept in the SELFTEST_COUNT +To assert that self-test discovery works correctly the number of tests is kept in the SELFTEST_COUNT variable. SELFTEST_COUNT must be updated if new tests are added or removed to or from any of the tests used here. """ @@ -68,7 +68,7 @@ def selftest(logger): for test_case in SELFTEST_CASES: module = sys.modules[test_case.__module__] # a normal borg user does not have pytest installed, we must not require it in the test modules used here. - # note: this only detects the usual toplevel import + # Note: this only detects the usual top-level import assert 'pytest' not in dir(module), "pytest must not be imported in %s" % module.__name__ test_suite.addTest(defaultTestLoader.loadTestsFromTestCase(test_case)) test_suite.run(result) diff --git a/src/borg/shellpattern.py b/src/borg/shellpattern.py index c9447704b..c4ce654b4 100644 --- a/src/borg/shellpattern.py +++ b/src/borg/shellpattern.py @@ -10,14 +10,14 @@ def translate(pat, match_end=r"\Z"): any path separator. Wrap meta-characters in brackets for a literal match (i.e. "[?]" to match the literal character "?"). - Using match_end=regex one can give a regular expression that is used to match after the regex that is generated from + Using match_end=regex, one can provide a regular expression that is used to match after the regex that is generated from the pattern. The default is to match the end of the string. This function is derived from the "fnmatch" module distributed with the Python standard library. Copyright (C) 2001-2016 Python Software Foundation. All rights reserved. - TODO: support {alt1,alt2} shell-style alternatives + TODO: support {alt1,alt2} shell-style alternatives. """ sep = os.path.sep diff --git a/src/borg/upgrader.py b/src/borg/upgrader.py index 5be0aac0e..a6bdb79d8 100644 --- a/src/borg/upgrader.py +++ b/src/borg/upgrader.py @@ -23,15 +23,15 @@ class AtticRepositoryUpgrader(Repository): super().__init__(*args, **kw) def upgrade(self, dryrun=True, inplace=False, progress=False): - """convert an attic repository to a borg repository + """Convert an Attic repository to a Borg repository. - those are the files that need to be upgraded here, from most + These are the files that need to be upgraded here, from most important to least important: segments, key files, and various - caches, the latter being optional, as they will be rebuilt if + caches—the latter being optional, as they will be rebuilt if missing. - we nevertheless do the order in reverse, as we prefer to do - the fast stuff first, to improve interactivity. + We nevertheless do the order in reverse, as we prefer to do + the fast stuff first to improve interactivity. """ with self: backup = None @@ -70,13 +70,14 @@ class AtticRepositoryUpgrader(Repository): @staticmethod def convert_segments(segments, dryrun=True, inplace=False, progress=False): - """convert repository segments from attic to borg + """Convert repository segments from Attic to Borg. - replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in + Replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in `$ATTIC_REPO/data/**`. - luckily the magic string length didn't change so we can just - replace the 8 first bytes of all regular files in there.""" + Luckily the magic string length did not change, so we can just + replace the first 8 bytes of all regular files in there. + """ logger.info("converting %d segments..." % len(segments)) segment_count = len(segments) pi = ProgressIndicatorPercent(total=segment_count, msg="Converting segments %3.0f%%", msgid='upgrade.convert_segments') @@ -94,55 +95,57 @@ class AtticRepositoryUpgrader(Repository): def header_replace(filename, old_magic, new_magic, inplace=True): with open(filename, 'r+b') as segment: segment.seek(0) - # only write if necessary + # Only write if necessary. if segment.read(len(old_magic)) == old_magic: if inplace: segment.seek(0) segment.write(new_magic) else: - # rename the hardlink and rewrite the file. this works - # because the file is still open. so even though the file + # Rename the hardlink and rewrite the file. This works + # because the file is still open. Even though the file # is renamed, we can still read it until it is closed. os.rename(filename, filename + '.tmp') with open(filename, 'wb') as new_segment: new_segment.write(new_magic) new_segment.write(segment.read()) - # the little dance with the .tmp file is necessary - # because Windows won't allow overwriting an open file. + # The little dance with the .tmp file is necessary + # because Windows will not allow overwriting an open file. os.unlink(filename + '.tmp') def find_attic_keyfile(self): - """find the attic keyfiles + """Find the Attic key files. - the keyfiles are loaded by `KeyfileKey.find_key_file()`. that + The key files are loaded by `KeyfileKey.find_key_file()`. That finds the keys with the right identifier for the repo. - this is expected to look into $HOME/.attic/keys or + This is expected to look into $HOME/.attic/keys or $ATTIC_KEYS_DIR for key files matching the given Borg repository. - it is expected to raise an exception (KeyfileNotFoundError) if - no key is found. whether that exception is from Borg or Attic + It is expected to raise an exception (KeyfileNotFoundError) if + no key is found. Whether that exception is from Borg or Attic is unclear. - this is split in a separate function in case we want to use - the attic code here directly, instead of our local - implementation.""" + This is split into a separate function in case we want to use + the Attic code here directly, instead of our local + implementation. + """ return AtticKeyfileKey.find_key_file(self) @staticmethod def convert_keyfiles(keyfile, dryrun): - """convert key files from attic to borg + """Convert key files from Attic to Borg. - replacement pattern is `s/ATTIC KEY/BORG_KEY/` in + Replacement pattern is `s/ATTIC KEY/BORG_KEY/` in `get_keys_dir()`, that is `$ATTIC_KEYS_DIR` or `$HOME/.attic/keys`, and moved to `$BORG_KEYS_DIR` or `$HOME/.config/borg/keys`. - no need to decrypt to convert. we need to rewrite the whole - key file because magic string length changed, but that's not a - problem because the keyfiles are small (compared to, say, - all the segments).""" + No need to decrypt to convert. We need to rewrite the whole + key file because the magic string length changed, but that is not a + problem because the key files are small (compared to, say, + all the segments). + """ logger.info("converting keyfile %s" % keyfile) with open(keyfile) as f: data = f.read() @@ -154,16 +157,16 @@ class AtticRepositoryUpgrader(Repository): f.write(data) def convert_repo_index(self, dryrun, inplace): - """convert some repo files + """Convert some repo files. - those are all hash indexes, so we need to + These are all hash indexes, so we need to `s/ATTICIDX/BORG_IDX/` in a few locations: * the repository index (in `$ATTIC_REPO/index.%d`, where `%d` is the `Repository.get_index_transaction_id()`), which we - should probably update, with a lock, see - `Repository.open()`, which i'm not sure we should use - because it may write data on `Repository.close()`... + should probably update with a lock (see + `Repository.open()`), although we might avoid it because it may + write data on `Repository.close()`. """ transaction_id = self.get_index_transaction_id() if transaction_id is None: @@ -175,16 +178,16 @@ class AtticRepositoryUpgrader(Repository): AtticRepositoryUpgrader.header_replace(index, b'ATTICIDX', b'BORG_IDX', inplace=inplace) def convert_cache(self, dryrun): - """convert caches from attic to borg + """Convert caches from Attic to Borg. - those are all hash indexes, so we need to + These are all hash indexes, so we need to `s/ATTICIDX/BORG_IDX/` in a few locations: * the `files` and `chunks` cache (in `$ATTIC_CACHE_DIR` or `$HOME/.cache/attic//`), which we could just drop, - but if we'd want to convert, we could open it with the - `Cache.open()`, edit in place and then `Cache.close()` to - make sure we have locking right + but if we wanted to convert it, we could open it with + `Cache.open()`, edit in place, and then `Cache.close()` to + make sure we have locking right. """ # copy of attic's get_cache_dir() attic_cache_dir = os.environ.get('ATTIC_CACHE_DIR', @@ -194,19 +197,17 @@ class AtticRepositoryUpgrader(Repository): borg_cache_dir = os.path.join(get_cache_dir(), self.id_str) def copy_cache_file(path): - """copy the given attic cache path into the borg directory + """Copy the given Attic cache path into the Borg directory. - does nothing if dryrun is True. also expects + Does nothing if dryrun is True. Also expects attic_cache_dir and borg_cache_dir to be set in the parent - scope, to the directories path including the repository + scope, to the directories' paths including the repository identifier. - :params path: the basename of the cache file to copy - (example: "files" or "chunks") as a string - - :returns: the borg file that was created or None if no - Attic cache file was found. - + :param path: the basename of the cache file to copy + (example: "files" or "chunks") as a string + :returns: the Borg file that was created, or None if no + Attic cache file was found. """ attic_file = os.path.join(attic_cache_dir, path) if os.path.exists(attic_file): diff --git a/src/borg/version.py b/src/borg/version.py index a7a997f7b..0f67eb60f 100644 --- a/src/borg/version.py +++ b/src/borg/version.py @@ -13,7 +13,7 @@ def parse_version(version): For final versions the last element is a -1. For prerelease versions the last two elements are a smaller negative number and the number of e.g. the beta. - This version format is part of the remote protocol, don‘t change in breaking ways. + This version format is part of the remote protocol; don't change it in breaking ways. """ version_re = r""" (?P\d+)\.(?P\d+)\.(?P\d+) # version, e.g. 1.2.33 @@ -34,7 +34,7 @@ def parse_version(version): def format_version(version): - """a reverse for parse_version (obviously without the dropped information)""" + """A reverse for parse_version (obviously without the dropped information).""" f = [] it = iter(version) while True: diff --git a/src/borg/xattr.py b/src/borg/xattr.py index 76e3b8d41..cc1d91ccd 100644 --- a/src/borg/xattr.py +++ b/src/borg/xattr.py @@ -39,8 +39,7 @@ if sys.platform.startswith('linux'): def is_enabled(path=None): - """Determine if xattr is enabled on the filesystem - """ + """Determine if xattr is enabled on the filesystem.""" with tempfile.NamedTemporaryFile(dir=path, prefix='borg-tmp') as f: fd = f.fileno() name, value = b'user.name', b'value' @@ -66,7 +65,7 @@ def get_all(path, follow_symlinks=False): and only applies when *path* is not an open file descriptor. The returned mapping maps xattr names (bytes) to values (bytes or None). - None indicates, as a xattr value, an empty value, i.e. a value of length zero. + None indicates, as an xattr value, an empty value, i.e. a value of length zero. """ if isinstance(path, str): path = os.fsencode(path) @@ -75,18 +74,18 @@ def get_all(path, follow_symlinks=False): names = listxattr(path, follow_symlinks=follow_symlinks) for name in names: try: - # xattr name is a bytes object, we directly use it. - # if we get an empty xattr value (b''), we store None into the result dict - - # borg always did it like that... + # xattr name is a bytes object; we directly use it. + # If we get an empty xattr value (b''), we store None into the result dict— + # Borg has always done it like that. result[name] = getxattr(path, name, follow_symlinks=follow_symlinks) or None except OSError as e: - # note: platform.xattr._check has already made a nice exception e with errno, msg, path/fd - if e.errno in (ENOATTR, ): # errors we just ignore silently - # ENOATTR: a race has happened: xattr names were deleted after list. + # Note: platform.xattr._check has already made a nice exception e with errno, msg, path/fd + if e.errno in (ENOATTR, ): # errors we ignore silently + # ENOATTR: a race has happened: xattr names were deleted after listing. pass else: # all others: warn, skip this single xattr name, continue processing other xattrs # EPERM: we were not permitted to read this attribute - # EINVAL: maybe xattr name is invalid or other issue, #6988 + # EINVAL: maybe the xattr name is invalid or other issue, #6988 logger.warning('when getting extended attribute %s: %s', name.decode(errors='replace'), str(e)) except OSError as e: if e.errno in (errno.ENOTSUP, errno.EPERM): @@ -105,8 +104,8 @@ def set_all(path, xattrs, follow_symlinks=False): *path* can either be a path (str or bytes) or an open file descriptor (int). *follow_symlinks* indicates whether symlinks should be followed and only applies when *path* is not an open file descriptor. - *xattrs* is mapping maps xattr names (bytes) to values (bytes or None). - None indicates, as a xattr value, an empty value, i.e. a value of length zero. + *xattrs* is a mapping that maps xattr names (bytes) to values (bytes or None). + None indicates, as an xattr value, an empty value, i.e. a value of length zero. Return warning status (True means a non-fatal exception has happened and was dealt with). """ @@ -124,8 +123,8 @@ def set_all(path, xattrs, follow_symlinks=False): if e.errno == errno.E2BIG: err_str = 'too big for this filesystem (%s)' % str(e) elif e.errno == errno.ENOSPC: - # ext4 reports ENOSPC when trying to set an xattr with >4kiB while ext4 can only support 4kiB xattrs - # (in this case, this is NOT a "disk full" error, just a ext4 limitation). + # ext4 reports ENOSPC when trying to set an xattr with >4 KiB while ext4 can only support 4 KiB xattrs + # (in this case, this is NOT a "disk full" error, just an ext4 limitation). err_str = 'fs full or xattr too big? [xattr len = %d] (%s)' % (len(v), str(e)) else: # generic handler From 477366f4a5b320f21ac68dace868341896705730 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 10 Aug 2025 17:40:03 +0200 Subject: [PATCH 02/16] tests: fix typos and grammar --- src/borg/testsuite/__init__.py | 16 +++++----- src/borg/testsuite/archiver.py | 4 +-- src/borg/testsuite/benchmark.py | 2 +- src/borg/testsuite/chunker.py | 2 +- src/borg/testsuite/chunker_pytest.py | 4 +-- src/borg/testsuite/chunker_slow.py | 2 +- src/borg/testsuite/compress.py | 16 +++++----- src/borg/testsuite/hashindex.py | 36 ++++++++++----------- src/borg/testsuite/hashindex_stress.py | 10 +++--- src/borg/testsuite/item.py | 2 +- src/borg/testsuite/key.py | 2 +- src/borg/testsuite/nonces.py | 2 +- src/borg/testsuite/repository.py | 16 ++++------ src/borg/testsuite/shellpattern.py | 4 +-- src/borg/testsuite/upgrader.py | 44 +++++++++++++------------- src/borg/testsuite/xattr.py | 8 ++--- 16 files changed, 83 insertions(+), 87 deletions(-) diff --git a/src/borg/testsuite/__init__.py b/src/borg/testsuite/__init__.py index 1955351f8..17d3ea3f9 100644 --- a/src/borg/testsuite/__init__.py +++ b/src/borg/testsuite/__init__.py @@ -22,7 +22,7 @@ from ..helpers import umount from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR from .. import platform -# Note: this is used by borg.selftest, do not use or import py.test functionality here. +# Note: this is used by borg.selftest; do not use or import pytest functionality here. from ..fuse_impl import llfuse, has_pyfuse3, has_llfuse @@ -54,7 +54,7 @@ if sys.platform.startswith('netbsd'): def same_ts_ns(ts_ns1, ts_ns2): - """compare 2 timestamps (both in nanoseconds) whether they are (roughly) equal""" + """Compare two timestamps (both in nanoseconds) to determine whether they are (roughly) equal.""" diff_ts = int(abs(ts_ns1 - ts_ns2)) diff_max = 10 ** (-st_mtime_ns_round) return diff_ts <= diff_max @@ -81,7 +81,7 @@ def are_symlinks_supported(): @functools.lru_cache def are_hardlinks_supported(): if not hasattr(os, 'link'): - # some pythons do not have os.link + # Some Python builds do not have os.link return False with unopened_tempfile() as file1path, unopened_tempfile() as file2path: @@ -157,7 +157,7 @@ def is_birthtime_fully_supported(): def no_selinux(x): - # selinux fails our FUSE tests, thus ignore selinux xattrs + # SELinux fails our FUSE tests; thus, ignore SELinux xattrs SELINUX_KEY = b'security.selinux' if isinstance(x, dict): return {k: v for k, v in x.items() if k != SELINUX_KEY} @@ -222,7 +222,7 @@ class BaseTestCase(unittest.TestCase): d1[4] = None if not stat.S_ISCHR(s2.st_mode) and not stat.S_ISBLK(s2.st_mode): d2[4] = None - # If utime isn't fully supported, borg can't set mtime. + # If utime isn't fully supported, Borg can't set mtime. # Therefore, we shouldn't test it in that case. if is_utime_fully_supported(): # Older versions of llfuse do not support ns precision properly @@ -301,7 +301,7 @@ class BaseTestCase(unittest.TestCase): time.sleep(0.2) def wait_for_mountstate(self, mountpoint, *, mounted, timeout=5): - """Wait until a path meets specified mount point status""" + """Wait until a path meets the specified mount point status.""" timeout += time.time() while timeout > time.time(): if os.path.ismount(mountpoint) == mounted: @@ -312,7 +312,7 @@ class BaseTestCase(unittest.TestCase): @contextmanager def read_only(self, path): - """Some paths need to be made read-only for testing + """Some paths need to be made read-only for testing. If the tests are executed inside a fakeroot environment, the changes from chmod won't affect the real permissions of that @@ -379,7 +379,7 @@ class environment_variable: class FakeInputs: - """Simulate multiple user inputs, can be used as input() replacement""" + """Simulate multiple user inputs; can be used as an input() replacement.""" def __init__(self, inputs): self.inputs = inputs diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index c5a75578f..7dcd867d7 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -103,7 +103,7 @@ def exec_cmd(*args, archiver=None, fork=False, exe=None, input=b'', binary_outpu try: args = archiver.parse_args(list(args)) # argparse parsing may raise SystemExit when the command line is bad or - # actions that abort early (eg. --help) where given. Catch this and return + # Actions that abort early (e.g., --help) were given. Catch this and return # the error code as-if we invoked a Borg binary. except SystemExit as e: output_text.flush() @@ -846,7 +846,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): shutil.rmtree(self.cache_path) self.cmd('info', self.repository_location) - # Needs confirmation: cache and security dir both gone (eg. another host or rm -rf ~) + # Needs confirmation: cache and security dir both gone (e.g., another host or rm -rf ~) shutil.rmtree(self.cache_path) shutil.rmtree(self.get_security_dir()) if self.FORK_DEFAULT: diff --git a/src/borg/testsuite/benchmark.py b/src/borg/testsuite/benchmark.py index f3ec06f2a..f46d0d578 100644 --- a/src/borg/testsuite/benchmark.py +++ b/src/borg/testsuite/benchmark.py @@ -1,5 +1,5 @@ """ -Do benchmarks using pytest-benchmark. +Run benchmarks using pytest-benchmark. Usage: diff --git a/src/borg/testsuite/chunker.py b/src/borg/testsuite/chunker.py index ce45b7c96..25fb7dc8e 100644 --- a/src/borg/testsuite/chunker.py +++ b/src/borg/testsuite/chunker.py @@ -9,7 +9,7 @@ from . import BaseTestCase def cf(chunks): - """chunk filter""" + """Chunk filter.""" # this is to simplify testing: either return the data piece (bytes) or the hole length (int). def _cf(chunk): if chunk.meta['allocation'] == CH_DATA: diff --git a/src/borg/testsuite/chunker_pytest.py b/src/borg/testsuite/chunker_pytest.py index aa2d9cddc..1b1cc45b8 100644 --- a/src/borg/testsuite/chunker_pytest.py +++ b/src/borg/testsuite/chunker_pytest.py @@ -140,7 +140,7 @@ def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse): def test_buzhash_chunksize_distribution(): data = os.urandom(1048576) - min_exp, max_exp, mask = 10, 16, 14 # chunk size target 16kiB, clip at 1kiB and 64kiB + min_exp, max_exp, mask = 10, 16, 14 # chunk size target 16 KiB, clip at 1 KiB and 64 KiB chunker = Chunker(0, min_exp, max_exp, mask, 4095) f = BytesIO(data) chunks = cf(chunker.chunkify(f)) @@ -153,7 +153,7 @@ def test_buzhash_chunksize_distribution(): max_count = sum(int(size == 2 ** max_exp) for size in chunk_sizes) print(f"count: {chunks_count} min: {min_chunksize_observed} max: {max_chunksize_observed} " f"min count: {min_count} max count: {max_count}") - # usually there will about 64 chunks + # usually there will be about 64 chunks assert 32 < chunks_count < 128 # chunks always must be between min and max (clipping must work): assert min_chunksize_observed >= 2 ** min_exp diff --git a/src/borg/testsuite/chunker_slow.py b/src/borg/testsuite/chunker_slow.py index da069403f..83a22835e 100644 --- a/src/borg/testsuite/chunker_slow.py +++ b/src/borg/testsuite/chunker_slow.py @@ -38,6 +38,6 @@ class ChunkerRegressionTestCase(BaseTestCase): runs.append(H(b''.join(chunks))) # The "correct" hash below matches the existing chunker behavior. - # Future chunker optimisations must not change this, or existing repos will bloat. + # Future chunker optimizations must not change this, or existing repos will bloat. overall_hash = H(b''.join(runs)) self.assert_equal(overall_hash, hex_to_bin("a43d0ecb3ae24f38852fcc433a83dacd28fe0748d09cc73fc11b69cf3f1a7299")) diff --git a/src/borg/testsuite/compress.py b/src/borg/testsuite/compress.py index 233bf7275..160f250fc 100644 --- a/src/borg/testsuite/compress.py +++ b/src/borg/testsuite/compress.py @@ -48,7 +48,7 @@ def test_lz4_buffer_allocation(monkeypatch): # disable fallback to no compression on incompressible data monkeypatch.setattr(LZ4, 'decide', lambda always_compress: LZ4) # test with a rather huge data object to see if buffer allocation / resizing works - data = os.urandom(5 * 2**20) * 10 # 50MiB badly compressible data + data = os.urandom(5 * 2**20) * 10 # 50 MiB badly compressible data assert len(data) == 50 * 2**20 c = Compressor('lz4') cdata = c.compress(data) @@ -90,8 +90,8 @@ def test_autodetect_invalid(): def test_zlib_compat(): - # for compatibility reasons, we do not add an extra header for zlib, - # nor do we expect one when decompressing / autodetecting + # For compatibility reasons, we do not add an extra header for zlib, + # nor do we expect one when decompressing or autodetecting for level in range(10): c = get_compressor(name='zlib', level=level) cdata1 = c.compress(data) @@ -109,7 +109,7 @@ def test_compressor(): dict(name='lz4'), dict(name='zstd', level=1), dict(name='zstd', level=3), - # avoiding high zstd levels, memory needs unclear + # Avoiding high zstd levels; memory needs unclear dict(name='zlib', level=0), dict(name='zlib', level=6), dict(name='zlib', level=9), @@ -118,7 +118,7 @@ def test_compressor(): params_list += [ dict(name='lzma', level=0), dict(name='lzma', level=6), - # we do not test lzma on level 9 because of the huge memory needs + # We do not test lzma on level 9 because of the huge memory needs ] for params in params_list: c = Compressor(**params) @@ -216,12 +216,12 @@ def test_obfuscate(): ) def test_padme_obfuscation(data_length, expected_padding): compressor = Compressor(name="obfuscate", level=250, compressor=Compressor("none")) - # the innner compressor will add an inner header of 2 bytes, so we reduce the data length by 2 bytes - # to be able to use (almost) the same test cases as in master branch. + # The inner compressor will add an inner header of 2 bytes, so we reduce the data length by 2 bytes + # to be able to use (almost) the same test cases as in the master branch. data = b"x" * (data_length - 2) compressed = compressor.compress(data) - # the outer "obfuscate" pseudo-compressor adds an outer header of 6 bytes. + # The outer "obfuscate" pseudo-compressor adds an outer header of 6 bytes. expected_padded_size = 6 + data_length + expected_padding assert ( diff --git a/src/borg/testsuite/hashindex.py b/src/borg/testsuite/hashindex.py index eb94e17a4..b75dfff4f 100644 --- a/src/borg/testsuite/hashindex.py +++ b/src/borg/testsuite/hashindex.py @@ -15,12 +15,12 @@ from . import BaseTestCase, unopened_tempfile def H(x): - # make some 32byte long thing that depends on x + # Make some 32-byte long thing that depends on x return bytes('%-0.32d' % x, 'ascii') def H2(x): - # like H(x), but with pseudo-random distribution of the output value + # Like H(x), but with pseudo-random distribution of the output value return hashlib.sha256(H(x)).digest() @@ -160,12 +160,12 @@ class HashIndexExtraTestCase(BaseTestCase): """These tests are separate because they should not become part of the selftest. """ def test_chunk_indexer(self): - # see _hashindex.c hash_sizes, we want to be close to the max. load + # See _hashindex.c hash_sizes; we want to be close to the maximum load # because interesting errors happen there. key_count = int(65537 * ChunkIndex.MAX_LOAD_FACTOR) - 10 index = ChunkIndex(key_count) all_keys = [hashlib.sha256(H(k)).digest() for k in range(key_count)] - # we're gonna delete 1/3 of all_keys, so let's split them 2/3 and 1/3: + # We are going to delete 1/3 of all_keys, so let's split them 2/3 and 1/3: keys, to_delete_keys = all_keys[0:(2*key_count//3)], all_keys[(2*key_count//3):] for i, key in enumerate(keys): @@ -180,10 +180,10 @@ class HashIndexExtraTestCase(BaseTestCase): for key in to_delete_keys: assert index.get(key) is None - # now delete every key still in the index + # Now delete every key still in the index for key in keys: del index[key] - # the index should now be empty + # The index should now be empty assert list(index.iteritems()) == [] @@ -533,33 +533,33 @@ class IndexCorruptionTestCase(BaseTestCase): from struct import pack def HH(x, y): - # make some 32byte long thing that depends on x and y. - # same x will mean a collision in the hashtable as bucket index is computed from - # first 4 bytes. giving a specific x targets bucket index x. - # y is to create different keys and does not go into the bucket index calculation. - # so, same x + different y --> collision + # Make some 32-byte long thing that depends on x and y. + # The same x will mean a collision in the hash table as the bucket index is computed from + # the first 4 bytes. Giving a specific x targets bucket index x. + # y is used to create different keys and does not go into the bucket index calculation. + # Therefore, same x + different y -> collision return pack(' tar archive to , all stuff starting with . + Extract the tar archive to , including all entries starting with . - return path to . + Return the path to . """ def files(members): @@ -35,10 +35,10 @@ def untar(tarfname, path, what): def repo_valid(path): """ - utility function to check if borg can open a repository + Utility function to check if Borg can open a repository. :param path: the path to the repository - :returns: if borg can check the repository + :returns: whether Borg can check the repository """ with Repository(str(path), exclusive=True, create=False) as repository: # can't check raises() because check() handles the error @@ -47,10 +47,10 @@ def repo_valid(path): def key_valid(path): """ - check that the new keyfile is alright + Check that the new key file is valid. :param path: the path to the key file - :returns: if the file starts with the borg magic string + :returns: whether the file starts with the Borg magic string """ keyfile = os.path.join(get_keys_dir(), os.path.basename(path)) @@ -60,10 +60,10 @@ def key_valid(path): def make_attic_repo(dir): """ - create an attic repo with some stuff in it + Create an Attic repo with some content in it. :param dir: path to the repository to be created - :returns: path to attic repository + :returns: path to the Attic repository """ # there is some stuff in that repo, copied from `RepositoryTestCase.test1` return untar(ATTIC_TAR, str(dir), 'repo') @@ -80,13 +80,13 @@ def inplace(request): def test_convert_segments(attic_repo, inplace): - """test segment conversion + """Test segment conversion. - this will load the given attic repository, list all the segments - then convert them one at a time. we need to close the repo before - conversion otherwise we have errors from borg + This will load the given Attic repository, list all the segments, + then convert them one at a time. We need to close the repo before + conversion; otherwise we have errors from Borg. - :param attic_repo: a populated attic repository (fixture) + :param attic_repo: a populated Attic repository (fixture) """ repo_path = attic_repo with pytest.raises(Repository.AtticRepository): @@ -102,21 +102,21 @@ def test_convert_segments(attic_repo, inplace): @pytest.fixture() def attic_key_file(tmpdir, monkeypatch): """ - create an attic key file from the given repo, in the keys - subdirectory of the given tmpdir + Create an Attic key file from the given repo, in the keys + subdirectory of the given tmpdir. - :param tmpdir: a temporary directory (a builtin fixture) - :returns: path to key file + :param tmpdir: a temporary directory (a built-in fixture) + :returns: path to the key file """ keys_dir = untar(ATTIC_TAR, str(tmpdir), 'keys') - # we use the repo dir for the created keyfile, because we do - # not want to clutter existing keyfiles + # We use the repo dir for the created key file, because we do + # not want to clutter existing key files. monkeypatch.setenv('ATTIC_KEYS_DIR', keys_dir) - # we use the same directory for the converted files, which - # will clutter the previously created one, which we don't care - # about anyways. in real runs, the original key will be retained. + # We use the same directory for the converted files, which + # will clutter the previously created one—which we don't care + # about anyway. In real runs, the original key will be retained. monkeypatch.setenv('BORG_KEYS_DIR', keys_dir) monkeypatch.setenv('ATTIC_PASSPHRASE', 'test') diff --git a/src/borg/testsuite/xattr.py b/src/borg/testsuite/xattr.py index 1c4a0fbba..a51aac66c 100644 --- a/src/borg/testsuite/xattr.py +++ b/src/borg/testsuite/xattr.py @@ -22,7 +22,7 @@ class XattrTestCase(BaseTestCase): os.unlink(self.symlink) def assert_equal_se(self, is_x, want_x): - # check 2 xattr lists for equality, but ignore security.selinux attr + # Check two xattr lists for equality, but ignore the security.selinux attribute. is_x = set(is_x) - {b'security.selinux', b'com.apple.provenance'} want_x = set(want_x) self.assert_equal(is_x, want_x) @@ -38,7 +38,7 @@ class XattrTestCase(BaseTestCase): setxattr(tmp_fd, b'user.bar', b'foo') setxattr(tmp_fn, b'user.empty', b'') if not is_linux: - # linux does not allow setting user.* xattrs on symlinks + # Linux does not allow setting user.* xattrs on symlinks. setxattr(tmp_lfn, b'user.linkxattr', b'baz') self.assert_equal_se(listxattr(tmp_fn), [b'user.foo', b'user.bar', b'user.empty']) self.assert_equal_se(listxattr(tmp_fd), [b'user.foo', b'user.bar', b'user.empty']) @@ -54,9 +54,9 @@ class XattrTestCase(BaseTestCase): def test_listxattr_buffer_growth(self): tmp_fn = os.fsencode(self.tmpfile.name) - # make it work even with ext4, which imposes rather low limits + # Make it work even with ext4, which imposes rather low limits. buffer.resize(size=64, init=True) - # xattr raw key list will be > 64 + # xattr raw key list will be greater than 64 keys = [b'user.attr%d' % i for i in range(20)] for key in keys: setxattr(tmp_fn, key, b'x') From a7f130f146401e844a269c777d6f016635e304b6 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 10 Aug 2025 20:40:03 +0200 Subject: [PATCH 03/16] toplevel files: fix typos and grammar --- Brewfile | 2 +- MANIFEST.in | 6 +++--- Vagrantfile | 8 ++++---- pyproject.toml | 14 +++++++------- setup.py | 6 +++--- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Brewfile b/Brewfile index cefb1d3cd..7922c222c 100644 --- a/Brewfile +++ b/Brewfile @@ -5,7 +5,7 @@ brew 'xxhash' brew 'openssl@3.0' # osxfuse (aka macFUSE) is only required for "borg mount", -# but won't work on github actions' workers. +# but won't work on GitHub Actions' workers. # it requires installing a kernel extension, so some users # may want it and some won't. diff --git a/MANIFEST.in b/MANIFEST.in index caf027ad9..0bb27cb3e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,6 @@ -# stuff we need to include into the sdist is handled automatically by -# setuptools_scm - it includes all git-committed files. -# but we want to exclude some committed files/dirs not needed in the sdist: +# Files to include into the sdist are handled automatically by +# setuptools_scm — it includes all Git-committed files. +# But we want to exclude some committed files/dirs not needed in the sdist: exclude .editorconfig .gitattributes .gitignore .mailmap Vagrantfile prune .github include src/borg/platform/darwin.c src/borg/platform/freebsd.c src/borg/platform/linux.c src/borg/platform/posix.c diff --git a/Vagrantfile b/Vagrantfile index c70726db7..cd4b2b68f 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -10,16 +10,16 @@ $wmem = $xdistn * 256 # give the VM additional memory for workers [MB] def packages_debianoid(user) return <<-EOF export DEBIAN_FRONTEND=noninteractive - # this is to avoid grub asking about which device it should install to: + # This is to avoid GRUB asking which device it should install to: echo "set grub-pc/install_devices /dev/sda" | debconf-communicate apt-get -y -qq update apt-get -y -qq dist-upgrade - # for building borgbackup and dependencies: + # For building BorgBackup and dependencies: apt install -y libssl-dev libacl1-dev liblz4-dev libzstd-dev libxxhash-dev pkg-config apt install -y libfuse-dev fuse || true apt install -y libfuse3-dev fuse3 || true apt install -y locales || true - # we need to give the prefix to support debian buster (no libxxhash.pc for pkg-config there): + # We need to give the prefix to support Debian Buster (no libxxhash.pc for pkg-config there): echo 'export BORG_LIBXXHASH_PREFIX=/usr' >> ~vagrant/.bash_profile sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen usermod -a -G fuse #{user} @@ -38,7 +38,7 @@ def packages_freebsd hostname freebsd # install all the (security and other) updates, base system freebsd-update --not-running-from-cron fetch install - # for building borgbackup and dependencies: + # For building BorgBackup and dependencies: pkg install -y liblz4 zstd pkgconf pkg install -y fusefs-libs || true pkg install -y fusefs-libs3 || true diff --git a/pyproject.toml b/pyproject.toml index ebe6442d8..76ce84e2f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,19 +31,19 @@ classifiers = [ license = "BSD-3-Clause" license-files = ["LICENSE", "AUTHORS"] dependencies = [ - # we are rather picky about msgpack versions, because a good working msgpack is - # very important for borg, see: https://github.com/borgbackup/borg/issues/3753 + # We are rather picky about msgpack versions, because a good working msgpack is + # very important for Borg, see: https://github.com/borgbackup/borg/issues/3753 # Please note: - # using any other msgpack version is not supported by borg development and + # Using any other msgpack version is not supported by Borg development and # any feedback related to issues caused by this will be ignored. "msgpack >=1.0.3, <=1.1.1", "packaging", ] -# note for package maintainers: if you package borgbackup for distribution, -# please (if available) add pyfuse3 (preferably) or llfuse as a *requirement*. +# Note for package maintainers: If you package BorgBackup for distribution, +# please (if available) add pyfuse3 (preferably) or llfuse as a requirement. # "borg mount" needs one of them to work. -# if neither is available, do not require it, most of borgbackup will work. +# If neither is available, do not require it; most of BorgBackup will work. [project.optional-dependencies] llfuse = ["llfuse >= 1.3.8"] pyfuse3 = ["pyfuse3 >= 3.1.1"] @@ -97,7 +97,7 @@ select = ["E", "F"] # F405 undefined or defined from star imports # F811 redef of unused var -# borg code style guidelines: +# Borg code style guidelines: # Ignoring E203 due to https://github.com/PyCQA/pycodestyle/issues/373 ignore = ["E203", "F405", "E402"] diff --git a/setup.py b/setup.py index 5cebde899..75713d62b 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -# borgbackup - main setup code (extension building here, rest see pyproject.toml) +# BorgBackup - main setup code (extensions built here; the rest is in pyproject.toml). import os import re @@ -28,7 +28,7 @@ sys.path += [os.path.dirname(__file__)] is_win32 = sys.platform.startswith("win32") -# Number of threads to use for cythonize, not used on windows +# Number of threads to use for cythonize; not used on Windows cpu_threads = multiprocessing.cpu_count() if multiprocessing and multiprocessing.get_start_method() != "spawn" else None # How the build process finds the system libs: @@ -106,7 +106,7 @@ if not on_rtd: try: import pkgconfig as pc except ImportError: - print("Warning: can not import pkgconfig python package.") + print("Warning: cannot import pkgconfig Python package.") pc = None def lib_ext_kwargs(pc, prefix_env_var, lib_name, lib_pkg_name, pc_version, lib_subdir="lib"): From 404bb1ca20fb0ee6e7c11349b54f3b1041ce583c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 10 Aug 2025 22:06:55 +0200 Subject: [PATCH 04/16] scripts: fix typos and grammar --- scripts/borg.exe.spec | 14 +++++++------- scripts/errorlist.py | 16 ++++++++-------- scripts/hash_sizes.py | 12 ++++++------ scripts/make.py | 10 +++++----- scripts/shell_completions/bash/borg | 10 +++++----- scripts/shell_completions/fish/borg.fish | 10 +++++----- scripts/shell_completions/zsh/_borg | 2 +- 7 files changed, 37 insertions(+), 37 deletions(-) diff --git a/scripts/borg.exe.spec b/scripts/borg.exe.spec index be7e1e134..2fd74eb4f 100644 --- a/scripts/borg.exe.spec +++ b/scripts/borg.exe.spec @@ -1,5 +1,5 @@ # -*- mode: python -*- -# this pyinstaller spec file is used to build borg binaries on posix platforms +# This PyInstaller spec file is used to build Borg binaries on POSIX platforms. import os, sys @@ -33,8 +33,8 @@ a = Analysis([os.path.join(basepath, 'src', 'borg', '__main__.py'), ], cipher=block_cipher) if sys.platform == 'darwin': - # do not bundle the osxfuse libraries, so we do not get a version - # mismatch to the installed kernel driver of osxfuse. + # Do not bundle the macFUSE libraries to avoid a version + # mismatch with the installed macFUSE kernel driver. a.binaries = [b for b in a.binaries if 'libosxfuse' not in b[0]] pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) @@ -51,10 +51,10 @@ exe = EXE(pyz, console=True) # Build a directory-based binary in addition to a packed -# single file. This allows one to easily look at all included -# files (e.g. without having to strace or halt the built binary -# and introspect /tmp). Also avoids unpacking all libs when -# running the app, which is better for app signing on various OS. +# single-file. This allows one to easily look at all included +# files (e.g., without having to strace or halt the built binary +# and introspect /tmp). Also avoids unpacking all libraries when +# running the app, which is better for app signing on various operating systems. slim_exe = EXE(pyz, a.scripts, exclude_binaries=True, diff --git a/scripts/errorlist.py b/scripts/errorlist.py index ed81fc981..dd480d5b3 100755 --- a/scripts/errorlist.py +++ b/scripts/errorlist.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# this script automatically generates the error list for the docs by +# This script automatically generates the error list for the docs by # looking at the "Error" class and its subclasses. from textwrap import indent @@ -14,14 +14,14 @@ def subclasses(cls): return set(direct_subclasses) | set(s for c in direct_subclasses for s in subclasses(c)) -# 0, 1, 2 are used for success, generic warning, generic error -# 3..99 are available for specific errors -# 100..127 are available for specific warnings -# 128+ are reserved for signals +# 0, 1, 2 are used for success, generic warning, generic error. +# 3..99 are available for specific errors. +# 100..127 are available for specific warnings. +# 128+ are reserved for signals. free_error_rcs = set(range(EXIT_ERROR_BASE, EXIT_WARNING_BASE)) # 3 .. 99 free_warning_rcs = set(range(EXIT_WARNING_BASE, EXIT_SIGNAL_BASE)) # 100 .. 127 -# these classes map to rc 2 +# These classes map to rc 2 generic_error_rc_classes = set() generic_warning_rc_classes = set() @@ -37,7 +37,7 @@ for cls in sorted(error_classes, key=lambda cls: (cls.__module__, cls.__qualname elif rc == 2: generic_error_rc_classes.add(cls.__qualname__) else: # rc != 2 - # if we did not intentionally map this to the generic error rc, this might be an issue: + # If we did not intentionally map this to the generic error rc, this might be an issue: print(f'ERROR: {rc} is not a free/available RC, but either duplicate or invalid') print() @@ -55,7 +55,7 @@ for cls in sorted(warning_classes, key=lambda cls: (cls.__module__, cls.__qualna elif rc == 1: generic_warning_rc_classes.add(cls.__qualname__) else: # rc != 1 - # if we did not intentionally map this to the generic warning rc, this might be an issue: + # If we did not intentionally map this to the generic warning rc, this might be an issue: print(f'ERROR: {rc} is not a free/available RC, but either duplicate or invalid') print("\n") diff --git a/scripts/hash_sizes.py b/scripts/hash_sizes.py index 68e6e160a..951eb1f88 100644 --- a/scripts/hash_sizes.py +++ b/scripts/hash_sizes.py @@ -1,13 +1,13 @@ """ -Compute hashtable sizes with nices properties +Compute hash table sizes with nice properties: - prime sizes (for small to medium sizes) -- 2 prime-factor sizes (for big sizes) +- two prime-factor sizes (for big sizes) - fast growth for small sizes - slow growth for big sizes Note: - this is just a tool for developers. - within borgbackup, it is just used to generate hash_sizes definition for _hashindex.c. + This is just a tool for developers. + Within BorgBackup, it is only used to generate the hash_sizes definition for _hashindex.c. """ from collections import namedtuple @@ -20,7 +20,7 @@ start, end_p1, end_p2 = 1 * K, 127 * M, 2 * G - 10 * M # stay well below 2^31 - Policy = namedtuple("Policy", "upto grow") policies = [ - # which growth factor to use when growing a hashtable of size < upto + # which growth factor to use when growing a hash table of size < upto # grow fast (*2.0) at the start so we do not have to resize too often (expensive). # grow slow (*1.1) for huge hash tables (do not jump too much in memory usage) Policy(256*K, 2.0), @@ -85,7 +85,7 @@ def main(): sizes.append(p) i = int(i * grow_factor) - gen = two_prime_factors() # for lower ram consumption + gen = two_prime_factors() # for lower RAM consumption while i < end_p2: grow_factor = get_grow_factor(i) p = find_bigger_prime(gen, i) diff --git a/scripts/make.py b/scripts/make.py index c90e1bec0..982cd67c3 100644 --- a/scripts/make.py +++ b/scripts/make.py @@ -23,7 +23,7 @@ def format_metavar(option): class BuildUsage: - """generate usage docs for each command""" + """Generate usage docs for each command.""" def run(self): print('generating usage docs') @@ -31,11 +31,11 @@ class BuildUsage: borg.doc_mode = 'build_man' if not os.path.exists('docs/usage'): os.mkdir('docs/usage') - # allows us to build docs without the C modules fully loaded during help generation + # Allows us to build docs without the C modules fully loaded during help generation from borg.archiver import Archiver parser = Archiver(prog='borg').build_parser() - # borgfs has a separate man page to satisfy debian's "every program from a package - # must have a man page" requirement, but it doesn't need a separate HTML docs page + # borgfs has a separate man page to satisfy Debian's "every program from a package + # must have a man page" requirement, but it does not need a separate HTML docs page. #borgfs_parser = Archiver(prog='borgfs').build_parser() self.generate_level("", parser, Archiver) @@ -290,7 +290,7 @@ class BuildMan: import borg borg.doc_mode = 'build_man' os.makedirs('docs/man', exist_ok=True) - # allows us to build docs without the C modules fully loaded during help generation + # Allows us to build docs without the C modules fully loaded during help generation from borg.archiver import Archiver parser = Archiver(prog='borg').build_parser() borgfs_parser = Archiver(prog='borgfs').build_parser() diff --git a/scripts/shell_completions/bash/borg b/scripts/shell_completions/bash/borg index 350266a8f..f5d6ccba3 100644 --- a/scripts/shell_completions/bash/borg +++ b/scripts/shell_completions/bash/borg @@ -1,7 +1,7 @@ -# Completions for borg +# Completions for Borg # https://www.borgbackup.org/ # Note: -# Listing archives works on password protected repositories only if $BORG_PASSPHRASE is set. +# Listing archives works on password-protected repositories only if $BORG_PASSPHRASE is set. # Install: # Copy this file to /usr/share/bash-completion/completions/ or /etc/bash_completion.d/ @@ -57,7 +57,7 @@ _borg() return 0 ;; '-o') - # FIXME This list is probably not full, but I tried to pick only those that are relevant to borg mount -o: + # FIXME: This list is probably not complete, but it includes options relevant to 'borg mount -o': local fuse_options="ac_attr_timeout= allow_damaged_files allow_other allow_root attr_timeout= auto auto_cache auto_unmount default_permissions entry_timeout= gid= group_id= kernel_cache max_read= negative_timeout= noauto noforget remember= remount rootmode= uid= umask= user user_id= versions" COMPREPLY=( $(compgen -W "${fuse_options}" -- ${cur}) ) return 0 @@ -162,11 +162,11 @@ _borg() if [[ ${prev} == "::" ]] ; then list_archives=1 fi - # Second archive listing for borg diff + # Second archive listing for 'borg diff' if [[ ${COMP_LINE} =~ ^.*\ diff\ .*::[^\ ]+\ ${cur}$ ]] ; then list_archives=1 fi - # Additional archive listing for borg delete + # Additional archive listing for 'borg delete' if [[ ${COMP_LINE} =~ ^.*\ delete\ .*::[^\ ]+.*${cur}$ ]] ; then list_archives=1 fi diff --git a/scripts/shell_completions/fish/borg.fish b/scripts/shell_completions/fish/borg.fish index 75af73a51..c1465871a 100644 --- a/scripts/shell_completions/fish/borg.fish +++ b/scripts/shell_completions/fish/borg.fish @@ -1,7 +1,7 @@ -# Completions for borg +# Completions for Borg # https://www.borgbackup.org/ # Note: -# Listing archives works on password protected repositories only if $BORG_PASSPHRASE is set. +# Listing archives works on password-protected repositories only if $BORG_PASSPHRASE is set. # Install: # Copy this file to /usr/share/fish/vendor_completions.d/ @@ -19,7 +19,7 @@ complete -c borg -f -n __fish_is_first_token -a 'prune' -d 'Prune repository arc complete -c borg -f -n __fish_is_first_token -a 'compact' -d 'Free repository space' complete -c borg -f -n __fish_is_first_token -a 'info' -d 'Show archive details' complete -c borg -f -n __fish_is_first_token -a 'mount' -d 'Mount archive or a repository' -complete -c borg -f -n __fish_is_first_token -a 'umount' -d 'Un-mount the mounted archive' +complete -c borg -f -n __fish_is_first_token -a 'umount' -d 'Unmount the mounted archive' function __fish_borg_seen_key if __fish_seen_subcommand_from key @@ -48,8 +48,8 @@ function __fish_borg_seen_benchmark end return 1 end -complete -c borg -f -n __fish_is_first_token -a 'benchmark' -d 'Benchmark borg operations' -complete -c borg -f -n __fish_borg_seen_benchmark -a 'crud' -d 'Benchmark borg CRUD operations' +complete -c borg -f -n __fish_is_first_token -a 'benchmark' -d 'Benchmark Borg operations' +complete -c borg -f -n __fish_borg_seen_benchmark -a 'crud' -d 'Benchmark Borg CRUD operations' function __fish_borg_seen_help if __fish_seen_subcommand_from help diff --git a/scripts/shell_completions/zsh/_borg b/scripts/shell_completions/zsh/_borg index 548d9a28e..1599843ab 100644 --- a/scripts/shell_completions/zsh/_borg +++ b/scripts/shell_completions/zsh/_borg @@ -51,7 +51,7 @@ _borg_commands() { 'recreate:re-create archives' 'rename:rename an existing archive' 'serve:start in server mode' - 'umount:un-mount the FUSE filesystem' + 'umount:unmount the FUSE filesystem' 'upgrade:upgrade a repository from a previous version' 'with-lock:run a user specified command with the repository lock held' ) From 6602cf1679950426bfb9e439096ae59535d76d44 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 11 Aug 2025 15:26:24 +0200 Subject: [PATCH 05/16] docs: fix typos and grammar --- docs/book.rst | 2 +- docs/changes.rst | 6 ++-- docs/deployment/automated-local.rst | 18 ++++++------ docs/deployment/central-backup-server.rst | 18 ++++++------ docs/deployment/hosting-repositories.rst | 4 +-- docs/deployment/image-backup.rst | 8 +++--- docs/deployment/non-root-user.rst | 24 ++++++++-------- docs/deployment/pull-backup.rst | 14 +++++----- docs/development.rst | 3 +- docs/index.rst | 2 +- docs/installation.rst | 2 +- docs/internals.rst | 6 ++-- docs/internals/data-structures.rst | 14 +++++----- docs/internals/frontends.rst | 14 +++++----- docs/internals/security.rst | 6 ++-- docs/introduction.rst | 6 ++-- docs/man_intro.rst | 2 +- docs/quickstart.rst | 32 ++++++++++----------- docs/support.rst | 6 ++-- docs/usage.rst | 2 +- docs/usage/compact.rst | 2 +- docs/usage/debug.rst | 4 +-- docs/usage/delete.rst | 2 +- docs/usage/general.rst | 10 +++---- docs/usage/mount.rst | 4 +-- docs/usage/notes.rst | 34 +++++++++++------------ docs/usage/prune.rst | 26 ++++++++--------- docs/usage/recreate.rst | 10 +++---- docs/usage/serve.rst | 32 ++++++++++----------- docs/usage/tar.rst | 10 +++---- docs/usage/upgrade.rst | 8 +++--- 31 files changed, 166 insertions(+), 165 deletions(-) diff --git a/docs/book.rst b/docs/book.rst index 56b724b26..969055b9d 100644 --- a/docs/book.rst +++ b/docs/book.rst @@ -5,7 +5,7 @@ Borg documentation ================== -.. when you add an element here, do not forget to add it to index.rst +.. When you add an element here, do not forget to add it to index.rst. .. Note: Some things are in appendices (see latex_appendices in conf.py) .. toctree:: diff --git a/docs/changes.rst b/docs/changes.rst index fc1fea349..3bc2fe85c 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -87,11 +87,11 @@ no matter what encryption mode they use, including "none"): 5. Done. Manifest and archives are TAM authenticated now. -Vulnerability time line: +Vulnerability timeline: * 2023-06-13: Vulnerability discovered during code review by Thomas Waldmann * 2023-06-13...: Work on fixing the issue, upgrade procedure, docs. -* 2023-06-30: CVE was assigned via Github CNA +* 2023-06-30: CVE was assigned via GitHub CNA * 2023-06-30 .. 2023-08-29: Fixed issue, code review, docs, testing. * 2023-08-30: Released fixed version 1.2.5 (broken upgrade procedure for some repos) * 2023-08-31: Released fixed version 1.2.6 (fixes upgrade procedure) @@ -230,7 +230,7 @@ This attack is mitigated by: We are not aware of others having discovered, disclosed or exploited this vulnerability. -Vulnerability time line: +Vulnerability timeline: * 2016-11-14: Vulnerability and fix discovered during review of cryptography by Marian Beermann (@enkore) * 2016-11-20: First patch diff --git a/docs/deployment/automated-local.rst b/docs/deployment/automated-local.rst index 9774b67b0..beae5eb62 100644 --- a/docs/deployment/automated-local.rst +++ b/docs/deployment/automated-local.rst @@ -14,8 +14,8 @@ systemd and udev. Overview -------- -An udev rule is created to trigger on the addition of block devices. The rule contains a tag -that triggers systemd to start a oneshot service. The oneshot service executes a script in +A udev rule is created to trigger on the addition of block devices. The rule contains a tag +that causes systemd to start a oneshot service. The oneshot service executes a script in the standard systemd service environment, which automatically captures stdout/stderr and logs it to the journal. @@ -36,9 +36,9 @@ Then, create ``/etc/backups/80-backup.rules`` with the following content (all on ACTION=="add", SUBSYSTEM=="block", ENV{ID_PART_TABLE_UUID}=="", TAG+="systemd", ENV{SYSTEMD_WANTS}+="automatic-backup.service" -The "systemd" tag in conjunction with the SYSTEMD_WANTS environment variable has systemd -launch the "automatic-backup" service, which we will create next, as the -``/etc/backups/automatic-backup.service`` file: +The "systemd" tag in conjunction with the SYSTEMD_WANTS environment variable causes systemd to +launch the "automatic-backup" service, which we will create next, as the file +``/etc/backups/automatic-backup.service``: .. code-block:: ini @@ -65,13 +65,13 @@ modify it to suit your needs (e.g. more backup sets, dumping databases etc.). # Script configuration # - # The backup partition is mounted there + # The backup partition is mounted here MOUNTPOINT=/mnt/backup # This is the location of the Borg repository TARGET=$MOUNTPOINT/borg-backups/backup.borg - # Archive name schema + # Archive name scheme DATE=$(date --iso-8601)-$(hostname) # This is the file that will later contain UUIDs of registered backup drives @@ -93,7 +93,7 @@ modify it to suit your needs (e.g. more backup sets, dumping databases etc.). echo "Disk $uuid is a backup disk" partition_path=/dev/disk/by-uuid/$uuid - # Mount file system if not already done. This assumes that if something is already + # Mount the file system if not already done. This assumes that if something is already # mounted at $MOUNTPOINT, it is the backup drive. It won't find the drive if # it was mounted somewhere else. findmnt $MOUNTPOINT >/dev/null || mount $partition_path $MOUNTPOINT @@ -104,7 +104,7 @@ modify it to suit your needs (e.g. more backup sets, dumping databases etc.). # Create backups # - # Options for borg create + # Options for Borg create BORG_OPTS="--stats --one-file-system --compression lz4 --checkpoint-interval 86400" # Set BORG_PASSPHRASE or BORG_PASSCOMMAND somewhere around here, using export, diff --git a/docs/deployment/central-backup-server.rst b/docs/deployment/central-backup-server.rst index 740c6a6f3..53a9c4f5c 100644 --- a/docs/deployment/central-backup-server.rst +++ b/docs/deployment/central-backup-server.rst @@ -4,20 +4,20 @@ Central repository server with Ansible or Salt ============================================== -This section will give an example how to setup a borg repository server for multiple +This section gives an example of how to set up a Borg repository server for multiple clients. Machines -------- There are multiple machines used in this section and will further be named by their -respective fully qualified domain name (fqdn). +respective fully qualified domain name (FQDN). * The backup server: `backup01.srv.local` * The clients: - John Doe's desktop: `johndoe.clnt.local` - - Webserver 01: `web01.srv.local` + - Web server 01: `web01.srv.local` - Application server 01: `app01.srv.local` User and group @@ -28,7 +28,7 @@ Recommended user and group with additional settings: * User: `backup` * Group: `backup` -* Shell: `/bin/bash` (or other capable to run the `borg serve` command) +* Shell: `/bin/bash` (or another capable of running the `borg serve` command) * Home: `/home/backup` Most clients shall initiate a backup from the root user to catch all @@ -79,11 +79,11 @@ The options which are added to the key will perform the following: 3. Restrict ssh and do not allow stuff which imposes a security risk Due to the ``cd`` command we use, the server automatically changes the current -working directory. Then client doesn't need to have knowledge of the absolute +working directory. The client doesn't need to have knowledge of the absolute or relative remote repository path and can directly access the repositories at ``ssh://@/./``. -.. note:: The setup above ignores all client given commandline parameters +.. note:: The setup above ignores all client-given command-line parameters which are normally appended to the `borg serve` command. Client @@ -95,14 +95,14 @@ The client needs to initialize the `pictures` repository like this: borg init ssh://backup@backup01.srv.local/./pictures -Or with the full path (should actually never be used, as only for demonstrational purposes). -The server should automatically change the current working directory to the `` folder. +Or with the full path (should actually never be used, as only for demonstration purposes). +The server should automatically change the current working directory to the `` folder. :: borg init ssh://backup@backup01.srv.local/home/backup/repos/johndoe.clnt.local/pictures -When `johndoe.clnt.local` tries to access a not restricted path the following error is raised. +When `johndoe.clnt.local` tries to access an unrestricted path, the following error is raised. John Doe tries to backup into the Web 01 path: :: diff --git a/docs/deployment/hosting-repositories.rst b/docs/deployment/hosting-repositories.rst index fd534e734..87ca1070a 100644 --- a/docs/deployment/hosting-repositories.rst +++ b/docs/deployment/hosting-repositories.rst @@ -5,11 +5,11 @@ Hosting repositories ==================== -This sections shows how to securely provide repository storage for users. +This section shows how to securely provide repository storage for users. Optionally, each user can have a storage quota. Repositories are accessed through SSH. Each user of the service should -have her own login which is only able to access the user's files. +have their own login which is only able to access the user's files. Technically it would be possible to have multiple users share one login, however, separating them is better. Separate logins increase isolation and are thus an additional layer of security and safety for both the diff --git a/docs/deployment/image-backup.rst b/docs/deployment/image-backup.rst index 19b8faebf..60a209907 100644 --- a/docs/deployment/image-backup.rst +++ b/docs/deployment/image-backup.rst @@ -8,9 +8,9 @@ Backing up disk images can still be efficient with Borg because its `deduplicati technique makes sure only the modified parts of the file are stored. Borg also has optional simple sparse file support for extract. -It is of utmost importancy to pin down the disk you want to backup. -You need to use the SERIAL for that. -Use: +It is of utmost importance to pin down the disk you want to back up. +You need to use the SERIAL for that. +Use: .. code-block:: bash @@ -65,7 +65,7 @@ deduplicating. For backup, save the disk header and the contents of each partiti PARTNUM=$(echo $x | grep -Eo "[0-9]+$") ntfsclone -so - $x | borg create repo::hostname-part$PARTNUM - done - # to backup non-NTFS partitions as well: + # to back up non-NTFS partitions as well: echo "$PARTITIONS" | grep -v NTFS | cut -d' ' -f1 | while read x; do PARTNUM=$(echo $x | grep -Eo "[0-9]+$") borg create --read-special repo::hostname-part$PARTNUM $x diff --git a/docs/deployment/non-root-user.rst b/docs/deployment/non-root-user.rst index a3d6dad44..f15b8e77e 100644 --- a/docs/deployment/non-root-user.rst +++ b/docs/deployment/non-root-user.rst @@ -6,29 +6,29 @@ Backing up using a non-root user ================================ -This section describes how to run borg as a non-root user and still be able to -backup every file on the system. +This section describes how to run Borg as a non-root user and still be able to +back up every file on the system. -Normally borg is run as the root user to bypass all filesystem permissions and -be able to read all files. But in theory this also allows borg to modify or -delete files on your system, in case of a bug for example. +Normally Borg is run as the root user to bypass all filesystem permissions and +be able to read all files. But in theory this also allows Borg to modify or +delete files on your system, in case of a bug, for example. -To eliminate this possibility, we can run borg as a non-root user and give it read-only +To eliminate this possibility, we can run Borg as a non-root user and give it read-only permissions to all files on the system. Using Linux capabilities inside a systemd service ================================================= -One way to do so, is to use linux `capabilities +One way to do so is to use Linux `capabilities `_ within a systemd service. -Linux capabilities allow us to give parts of the privileges the root user has to -a non-root user. This works on a per-thread level and does not give the permission +Linux capabilities allow us to give some of the privileges that the root user has to +a non-root user. This works on a per-thread level and does not grant these permissions to the non-root user as a whole. -For this we need to run our backup script from a systemd service and use the `AmbientCapabilities +For this, we need to run our backup script from a systemd service and use the `AmbientCapabilities `_ option added in systemd 229. @@ -46,7 +46,7 @@ A very basic unit file would look like this: AmbientCapabilities=CAP_DAC_READ_SEARCH -The ``CAP_DAC_READ_SEARCH`` capability gives borg read-only access to all files and directories on the system. +The ``CAP_DAC_READ_SEARCH`` capability gives Borg read-only access to all files and directories on the system. This service can then be started manually using ``systemctl start``, a systemd timer or other methods. @@ -57,7 +57,7 @@ When restoring files, the root user should be used. When using the non-root user change all files to be owned by the non-root user. Using borg mount will not allow the non-root user to access files that it would not have access to on the system itself. -Other than that, the same restore process, that would be used when running the backup as root, can be used. +Other than that, the same restore process that would be used when running the backup as root can be used. .. warning:: diff --git a/docs/deployment/pull-backup.rst b/docs/deployment/pull-backup.rst index 258a7fb77..20b2c6dc2 100644 --- a/docs/deployment/pull-backup.rst +++ b/docs/deployment/pull-backup.rst @@ -6,20 +6,20 @@ Backing up in pull mode ======================= -Typically the borg client connects to a backup server using SSH as a transport +Typically the Borg client connects to a backup server using SSH as a transport when initiating a backup. This is referred to as push mode. -If you however require the backup server to initiate the connection or prefer +If, however, you require the backup server to initiate the connection or prefer it to initiate the backup run, one of the following workarounds is required to allow such a pull mode setup. -A common use case for pull mode is to backup a remote server to a local personal +A common use case for pull mode is to back up a remote server to a local personal computer. SSHFS ===== -Assuming you have a pull backup system set up with borg, where a backup server +Assuming you have a pull backup system set up with Borg, where a backup server pulls the data from the target via SSHFS. In this mode, the backup client's file system is mounted remotely on the backup server. Pull mode is even possible if the SSH connection must be established by the client via a remote tunnel. Other @@ -54,7 +54,7 @@ completely in every aspect from such a backup. .. warning:: - The chroot method was chosen to get the right user and group name-id + The chroot method was chosen to get the right user and group name-ID mappings, assuming they only come from files (/etc/passwd and group). This assumption might be wrong, e.g. if users/groups also come from ldap or other providers. @@ -64,7 +64,7 @@ completely in every aspect from such a backup. Creating a backup ----------------- -Generally, in a pull backup situation there is no direct way for borg to know +Generally, in a pull backup situation there is no direct way for Borg to know the client's original UID:GID name mapping of files, because Borg would use ``/etc/passwd`` and ``/etc/group`` of the backup server to map the names. To derive the right names, Borg needs to have access to the client's passwd and @@ -72,7 +72,7 @@ group files and use them in the backup process. The solution to this problem is chrooting into an sshfs mounted directory. In this example the whole client root file system is mounted. We use the -stand-alone BorgBackup executable and copy it into the mounted file system to +standalone BorgBackup executable and copy it into the mounted file system to make Borg available after entering chroot; this can be skipped if Borg is already installed on the client. diff --git a/docs/development.rst b/docs/development.rst index e0167bfcf..43900e2fa 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -52,7 +52,7 @@ Borg development happens on the ``master`` branch and uses GitHub pull requests (if you don't have GitHub or don't want to use it you can send smaller patches via the borgbackup mailing list to the maintainers). -Stable releases are maintained on maintenance branches named ``x.y-maint``, eg. +Stable releases are maintained on maintenance branches named ``x.y-maint``, e.g. the maintenance branch of the 1.4.x series is ``1.4-maint``. Most PRs should be filed against the ``master`` branch. Only if an @@ -372,6 +372,7 @@ Checklist: :: scripts/sign-binaries 201912312359 + - Close the release milestone on GitHub. - `Update borgbackup.org `_ with the diff --git a/docs/index.rst b/docs/index.rst index 9675ed989..7b420e142 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,7 +6,7 @@ Borg Documentation .. include:: ../README.rst -.. when you add an element here, do not forget to add it to book.rst +.. When you add an element here, do not forget to add it to book.rst. .. toctree:: :maxdepth: 2 diff --git a/docs/installation.rst b/docs/installation.rst index 381a05a66..016ba445a 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -105,7 +105,7 @@ Standalone Binary .. note:: Releases are signed with an OpenPGP key, see :ref:`security-contact` for more instructions. -Borg x86-64 amd/intel compatible binaries (generated with `pyinstaller`_) +Borg x86-64 AMD/Intel-compatible binaries (generated with `pyinstaller`_) are available on the releases_ page for the following platforms (for more details see the ``00_README.txt`` file there): diff --git a/docs/internals.rst b/docs/internals.rst index d11f0bfed..cf521a263 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -4,7 +4,7 @@ Internals ========= -The internals chapter describes and analyses most of the inner workings +The internals chapter describes and analyzes most of the inner workings of Borg. Borg uses a low-level, key-value store, the :ref:`repository`, and @@ -19,12 +19,12 @@ specified when the backup was performed. Deduplication is performed globally across all data in the repository (multiple backups and even multiple hosts), both on data and file metadata, using :ref:`chunks` created by the chunker using the -Buzhash_ algorithm ("buzhash" chunker) or a simpler fixed blocksize +Buzhash_ algorithm ("buzhash" chunker) or a simpler fixed block size algorithm ("fixed" chunker). To actually perform the repository-wide deduplication, a hash of each chunk is checked against the :ref:`chunks cache `, which is a -hash-table of all chunks that already exist. +hash table of all chunks that already exist. .. figure:: internals/structure.png :figwidth: 100% diff --git a/docs/internals/data-structures.rst b/docs/internals/data-structures.rst index f5cf8150b..6e2cdfed9 100644 --- a/docs/internals/data-structures.rst +++ b/docs/internals/data-structures.rst @@ -21,29 +21,29 @@ Repository .. Some parts of this description were taken from the Repository docstring -Borg stores its data in a `Repository`, which is a file system based +Borg stores its data in a `Repository`, which is a filesystem-based transactional key-value store. Thus the repository does not know about the concept of archives or items. Each repository has the following file structure: README - simple text file telling that this is a Borg repository + Simple text file telling that this is a Borg repository config - repository configuration + Repository configuration data/ - directory where the actual data is stored + Directory where the actual data is stored hints.%d - hints for repository compaction + Hints for repository compaction index.%d - repository index + Repository index lock.roster and lock.exclusive/* - used by the locking system to manage shared and exclusive locks + Used by the locking system to manage shared and exclusive locks Transactionality is achieved by using a log (aka journal) to record changes. The log is a series of numbered files called segments_. Each segment is a series of log entries. The segment number together with the offset of each diff --git a/docs/internals/frontends.rst b/docs/internals/frontends.rst index 45003929c..dd54c1f15 100644 --- a/docs/internals/frontends.rst +++ b/docs/internals/frontends.rst @@ -10,18 +10,18 @@ Borg does not have a public API on the Python level. That does not keep you from but does mean that there are no release-to-release guarantees on what you might find in that package, not even for point releases (1.1.x), and there is no documentation beyond the code and the internals documents. -Borg does on the other hand provide an API on a command-line level. In other words, a frontend should -(for example) create a backup archive just invoke :ref:`borg_create`, give commandline parameters/options -as needed and parse JSON output from borg. +Borg does, on the other hand, provide an API on a command-line level. In other words, a frontend should +(for example) create a backup archive by invoking :ref:`borg_create`, pass command-line parameters/options +as needed, and parse JSON output from Borg. -Important: JSON output is expected to be UTF-8, but currently borg depends on the locale being configured -for that (must be a UTF-8 locale and *not* "C" or "ascii"), so that Python will choose to encode to UTF-8. -The same applies to any inputs read by borg, they are expected to be UTF-8 encoded also. +Important: JSON output is expected to be UTF-8, but currently Borg depends on the locale being configured +for that (must be a UTF-8 locale and not "C" or "ASCII"), so that Python will choose to encode to UTF-8. +The same applies to any inputs read by Borg; they are expected to be UTF-8 encoded also. We consider this a bug (see :issue:`2273`) and might fix it later, so borg will use UTF-8 independent of the locale. -On POSIX systems, you can usually set environment vars to choose a UTF-8 locale: +On POSIX systems, you can usually set environment variables to choose a UTF-8 locale: :: diff --git a/docs/internals/security.rst b/docs/internals/security.rst index e58778f6b..c342f2267 100644 --- a/docs/internals/security.rst +++ b/docs/internals/security.rst @@ -30,7 +30,7 @@ Under these circumstances Borg guarantees that the attacker cannot 1. modify the data of any archive without the client detecting the change 2. rename, remove or add an archive without the client detecting the change -3. recover plain-text data +3. recover plaintext data 4. recover definite (heuristics based on access patterns are possible) structural information such as the object graph (which archives refer to what chunks) @@ -144,10 +144,10 @@ Depending on the chosen mode (see :ref:`borg_init`) different primitives are use - The authentication primitive is either HMAC-SHA-256 or BLAKE2b-256 in a keyed mode. - Both HMAC-SHA-256 and BLAKE2b have undergone extensive cryptanalysis + Both HMAC-SHA-256 and BLAKE2b have undergone extensive cryptanalysis and have proven secure against known attacks. The known vulnerability of SHA-256 against length extension attacks does not apply to HMAC-SHA-256. - + The authentication primitive should be chosen based upon SHA hardware support: all AMD Ryzen, Intel 10th+ generation mobile and Intel 11th+ generation desktop processors, Apple M1+ and most current ARM64 architectures support diff --git a/docs/introduction.rst b/docs/introduction.rst index ab8bd32c4..7857b1502 100644 --- a/docs/introduction.rst +++ b/docs/introduction.rst @@ -1,8 +1,8 @@ Introduction ============ -.. this shim is here to fix the structure in the PDF - rendering. without this stub, the elements in the toctree of - index.rst show up a level below the README file included +.. This shim is here to fix the structure in the PDF + rendering. Without this stub, the elements in the toctree of + index.rst show up a level below the README file included there. .. include:: ../README.rst diff --git a/docs/man_intro.rst b/docs/man_intro.rst index e9ed1fd20..4cfa29731 100644 --- a/docs/man_intro.rst +++ b/docs/man_intro.rst @@ -40,7 +40,7 @@ NOTES SEE ALSO -------- -`borg-common(1)` for common command line options +`borg-common(1)` for common command-line options `borg-init(1)`, `borg-create(1)`, `borg-mount(1)`, `borg-extract(1)`, diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 6b41f35ac..c62317f3e 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -8,7 +8,7 @@ Quick Start This chapter will get you started with Borg and covers various use cases. -A step by step example +A step-by-step example ---------------------- .. include:: quickstart_example.rst.inc @@ -20,8 +20,8 @@ A *Borg archive* is the result of a single backup (``borg create``). An archive stores a snapshot of the data of the files "inside" it. One can later extract or mount an archive to restore from a backup. -*Repositories* are filesystem directories acting as self-contained stores of archives. -Repositories can be accessed locally via path or remotely via ssh. Under the hood, +*Repositories* are file system directories acting as self-contained stores of archives. +Repositories can be accessed locally via path or remotely via SSH. Under the hood, repositories contain data blocks and a manifest tracking which blocks are in each archive. If some data hasn't changed from one backup to another, Borg can simply reference an already uploaded data chunk (deduplication). @@ -37,7 +37,7 @@ a good amount of free space on the filesystem that has your backup repository repositories. See also :ref:`cache-memory-usage`. Borg doesn't use space reserved for root on repository disks (even when run as root), -on file systems which do not support this mechanism (e.g. XFS) we recommend to reserve +on file systems which do not support this mechanism (e.g., XFS) we recommend reserving some space in Borg itself just to be safe by adjusting the ``additional_free_space`` setting (a good starting point is ``2G``):: @@ -49,7 +49,7 @@ by deleting/pruning archives. This mechanism is not bullet-proof in some circumstances [1]_. If you *really* run out of disk space, it can be hard or impossible to free space, -because Borg needs free space to operate - even to delete backup +because Borg needs free space to operate—even to delete backup archives. You can use some monitoring process or just include the free space information @@ -58,38 +58,38 @@ in your backup log files (you check them regularly anyway, right?). Also helpful: - create a big file as a "space reserve", that you can delete to free space -- if you use LVM: use a LV + a filesystem that you can resize later and have +- if you use LVM: use an LV + a file system that you can resize later and have some unallocated PEs you can add to the LV. - consider using quotas - use `prune` and `compact` regularly -.. [1] This failsafe can fail in these circumstances: +.. [1] This fail-safe can fail in these circumstances: - - The underlying file system doesn't support statvfs(2), or returns incorrect - data, or the repository doesn't reside on a single file system + - The underlying file system does not support statvfs(2), or returns incorrect + data, or the repository does not reside on a single file system - Other tasks fill the disk simultaneously - Hard quotas (which may not be reflected in statvfs(2)) Important note about permissions -------------------------------- -To avoid permissions issues (in your borg repository or borg cache), **always +To avoid permissions issues (in your Borg repository or Borg cache), **always access the repository using the same user account**. -If you want to backup files of other users or the operating system, running -borg as root likely will be required (otherwise you'ld get `Permission denied` +If you want to back up files of other users or the operating system, running +Borg as root likely will be required (otherwise you'd get `Permission denied` errors). -If you only back up your own files, you neither need nor want to run borg as +If you only back up your own files, you neither need nor want to run Borg as root, just run it as your normal user. -For a local repository just always use the same user to invoke borg. +For a local repository just always use the same user to invoke Borg. For a remote repository: always use e.g. borg@remote_host. You can use this -from different local users, the remote user running borg and accessing the +from different local users; the remote user running Borg and accessing the repo will always be `borg`. If you need to access a local repository from different users, you can use the -same method by using ssh to borg@localhost. +same method by using SSH to borg@localhost. Important note about files changing during the backup process ------------------------------------------------------------- diff --git a/docs/support.rst b/docs/support.rst index 5fcac0120..0b11065ca 100644 --- a/docs/support.rst +++ b/docs/support.rst @@ -15,7 +15,7 @@ Security -------- In case you discover a security issue, please use this contact for reporting it -privately and please, if possible, use encrypted E-Mail: +privately and please, if possible, use encrypted email: Thomas Waldmann @@ -28,7 +28,7 @@ Verifying signed releases ------------------------- `Releases `_ are signed with the -same GPG key and a .asc file is provided for each binary. +same GPG key and an .asc file is provided for each binary. To verify a signature, the public key needs to be known to GPG. It can be imported into the local keystore from a keyserver with the fingerprint:: @@ -41,7 +41,7 @@ If GPG successfully imported the key, the output should include (among other thi gpg: Total number processed: 1 ... -To verify for example the signature of the borg-linux64 binary:: +To verify, for example, the signature of the borg-linux64 binary:: gpg --verify borg-linux64.asc diff --git a/docs/usage.rst b/docs/usage.rst index ad63cb7e4..826132996 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -22,7 +22,7 @@ Usage window.location.pathname = replaced; } } - // Fixup anchored links from when usage.html contained all the commands + // Fix up anchored links from when usage.html contained all the commands else if(hash.startsWith("borg-key") || hash == "borg-change-passphrase") { window.location.hash = ""; window.location.pathname = window.location.pathname.replace("usage.html", "usage/key.html"); diff --git a/docs/usage/compact.rst b/docs/usage/compact.rst index 2633d29eb..626b60f64 100644 --- a/docs/usage/compact.rst +++ b/docs/usage/compact.rst @@ -7,7 +7,7 @@ Examples # compact segments and free repo disk space $ borg compact /path/to/repo - # same as above plus clean up 17byte commit-only segments + # same as above plus clean up 17-byte commit-only segments $ borg compact --cleanup-commits /path/to/repo diff --git a/docs/usage/debug.rst b/docs/usage/debug.rst index 0dc54ae39..549223010 100644 --- a/docs/usage/debug.rst +++ b/docs/usage/debug.rst @@ -5,7 +5,7 @@ There is a ``borg debug`` command that has some subcommands which are all **not intended for normal use** and **potentially very dangerous** if used incorrectly. For example, ``borg debug put-obj`` and ``borg debug delete-obj`` will only do -what their name suggests: put objects into repo / delete objects from repo. +what their name suggests: put objects into the repository or delete objects from the repository. Please note: @@ -31,4 +31,4 @@ The ``borg debug profile-convert`` command can be used to take a Borg profile an it to a profile file that is compatible with the Python tools. Additionally, if the filename specified for ``--debug-profile`` ends with ".pyprof" a -Python compatible profile is generated. This is only intended for local use by developers. +Python-compatible profile is generated. This is only intended for local use by developers. diff --git a/docs/usage/delete.rst b/docs/usage/delete.rst index 72270cca2..6f32eb91b 100644 --- a/docs/usage/delete.rst +++ b/docs/usage/delete.rst @@ -15,7 +15,7 @@ Examples # delete all archives whose names contain "-2012-" $ borg delete --glob-archives '*-2012-*' /path/to/repo - # see what would be deleted if delete was run without --dry-run + # see what would be deleted if delete were run without --dry-run $ borg delete --list --dry-run -a '*-May-*' /path/to/repo # delete the whole repository and the related local cache: diff --git a/docs/usage/general.rst b/docs/usage/general.rst index 55146498f..537b917a4 100644 --- a/docs/usage/general.rst +++ b/docs/usage/general.rst @@ -32,16 +32,16 @@ All Borg commands share these options: .. include:: common-options.rst.inc Option ``--bypass-lock`` allows you to access the repository while bypassing -borg's locking mechanism. This is necessary if your repository is on a read-only +Borg's locking mechanism. This is necessary if your repository is on a read-only storage where you don't have write permissions or capabilities and therefore -cannot create a lock. Examples are repositories stored on a Bluray disc or a -read-only network storage. Avoid this option if you are able to use locks as +cannot create a lock. Examples are repositories stored on a Blu-ray disc or a +read-only network storage. Avoid this option if you are able to use locks, as that is the safer way; see the warning below. .. warning:: - If you do use ``--bypass-lock``, you are responsible to ensure that no other - borg instances have write access to the repository. Otherwise, you might + If you do use ``--bypass-lock``, you are responsible for ensuring that no other + Borg instances have write access to the repository. Otherwise, you might experience errors and read broken data if changes to that repository are being made at the same time. diff --git a/docs/usage/mount.rst b/docs/usage/mount.rst index aed7a05a4..0e30215e5 100644 --- a/docs/usage/mount.rst +++ b/docs/usage/mount.rst @@ -59,11 +59,11 @@ borgfs $ mount /tmp/myrepo $ mount /tmp/myarchive $ ls /tmp/myrepo - root-2016-02-01 root-2016-02-2015 + root-2016-02-01 root-2016-02-15 $ ls /tmp/myarchive bin boot etc home lib lib64 lost+found media mnt opt root sbin srv tmp usr var -.. Note:: +.. note:: ``borgfs`` will be automatically provided if you used a distribution package or ``pip`` to install Borg. Users of the standalone binary will have diff --git a/docs/usage/notes.rst b/docs/usage/notes.rst index e2d8a4953..4948908b2 100644 --- a/docs/usage/notes.rst +++ b/docs/usage/notes.rst @@ -1,7 +1,7 @@ Additional Notes ---------------- -Here are misc. notes about topics that are maybe not covered in enough detail in the usage section. +Here are miscellaneous notes about topics that might not be covered in enough detail in the usage section. .. _chunker-params: @@ -24,14 +24,14 @@ deduplication and creates a much smaller amount of chunks and thus uses less resources. This is good for relatively big data volumes and if the machine has a relatively low amount of free RAM and disk space. -``--chunker-params=fixed,4194304`` results in fixed 4MiB sized block +``--chunker-params=fixed,4194304`` results in fixed 4 MiB sized block deduplication and is more efficient than the previous example when used for -for block devices (like disks, partitions, LVM LVs) or raw disk image files. +block devices (like disks, partitions, LVM LVs) or raw disk image files. -``--chunker-params=fixed,4096,512`` results in fixed 4kiB sized blocks, -but the first header block will only be 512B long. This might be useful to -dedup files with 1 header + N fixed size data blocks. Be careful to not -produce a too big amount of chunks (like using small block size for huge +``--chunker-params=fixed,4096,512`` results in fixed 4 KiB sized blocks, +but the first header block will only be 512 B long. This might be useful to +deduplicate files with 1 header + N fixed-size data blocks. Be careful not to +produce too many chunks (such as using a small block size for huge files). If you already have made some archives in a repository and you then change @@ -46,7 +46,7 @@ Usually, it is not that bad though: - usually most files are not touched, so it will just re-use the old chunks it already has in the repo - files smaller than the (both old and new) minimum chunksize result in only - one chunk anyway, so the resulting chunks are same and deduplication will apply + one chunk anyway, so the resulting chunks are the same and deduplication will apply If you switch chunker params to save resources for an existing repo that already has some backup archives, you will see an increasing effect over time, @@ -85,17 +85,17 @@ use this option also for speeding up operations. ~~~~~~~~~~~ borg uses a safe default umask of 077 (that means the files borg creates have -only permissions for owner, but no permissions for group and others) - so there +only permissions for the owner, but no permissions for group and others) - so there should rarely be a need to change the default behaviour. This option only affects the process to which it is given. Thus, when you run borg in client/server mode and you want to change the behaviour on the server -side, you need to use ``borg serve --umask=XXX ...`` as a ssh forced command +side, you need to use ``borg serve --umask=XXX ...`` as an SSH forced command in ``authorized_keys``. The ``--umask`` value given on the client side is **not** transferred to the server side. Also, if you choose to use the ``--umask`` option, always be consistent and use -the same umask value so you do not create a mixup of permissions in a borg +the same umask value so you do not create a mix-up of permissions in a borg repository or with other files borg creates. ``--read-special`` @@ -120,13 +120,13 @@ You need to be careful about what you include when using ``--read-special``, e.g. if you include ``/dev/zero``, your backup will never terminate. Restoring such files' content is currently only supported one at a time via -``--stdout`` option (and you have to redirect stdout to where ever it shall go, +``--stdout`` option (and you have to redirect stdout to wherever it shall go, maybe directly into an existing device file of your choice or indirectly via ``dd``). To some extent, mounting a backup archive with the backups of special files via ``borg mount`` and then loop-mounting the image files from inside the mount -point will work. If you plan to access a lot of data in there, it likely will +point will work. If you plan to access a lot of data in there, it will likely scale and perform better if you do not work via the FUSE mount. Example @@ -211,7 +211,7 @@ but borg will also reject to delete the repository completely). If ``borg compact`` command is used on a repo in append-only mode, there will be no warning or error, but no compaction will happen. -append-only is useful for scenarios where a backup client machine backups +Append-only is useful for scenarios where a backup client machine backs up remotely to a backup server using ``borg serve``, since a hacked client machine cannot delete backups on the server permanently. @@ -233,7 +233,7 @@ in ``.ssh/authorized_keys``: :: - command="borg serve --append-only ..." ssh-rsa + command="borg serve --append-only ..." ssh-rsa command="borg serve ..." ssh-rsa Running ``borg init`` via a ``borg serve --append-only`` server will *not* create @@ -255,7 +255,7 @@ mode. A transaction log in this situation might look like this: transaction 13, UTC time 2016-03-31T15:55:55.472564 From your security logs you conclude the attacker gained access at 15:54:00 and all -the backups where deleted or replaced by compromised backups. From the log you know +the backups were deleted or replaced by compromised backups. From the log you know that transactions 11 and later are compromised. Note that the transaction ID is the name of the *last* file in the transaction. For example, transaction 11 spans files 6 to 11. @@ -320,7 +320,7 @@ repository. Make sure that backup client machines only get to access the reposit ``borg serve``. Ensure that no remote access is possible if the repository is temporarily set to normal mode -for e.g. regular pruning. +for example, regular pruning. Further protections can be implemented, but are outside of Borg's scope. For example, file system snapshots or wrapping ``borg serve`` to set special permissions or ACLs on diff --git a/docs/usage/prune.rst b/docs/usage/prune.rst index 626426165..e65b34162 100644 --- a/docs/usage/prune.rst +++ b/docs/usage/prune.rst @@ -3,38 +3,38 @@ Examples ~~~~~~~~ -Be careful, prune is a potentially dangerous command, it will remove backup +Be careful: prune is a potentially dangerous command; it will remove backup archives. -The default of prune is to apply to **all archives in the repository** unless -you restrict its operation to a subset of the archives using ``--glob-archives``. -When using ``--glob-archives``, be careful to choose a good matching pattern - -e.g. do not use "foo*" if you do not also want to match "foobar". +By default, prune applies to **all archives in the repository** unless you +restrict its operation to a subset of the archives using ``--glob-archives``. +When using ``--glob-archives``, be careful to choose a good matching pattern — +for example, do not use "foo*" if you do not also want to match "foobar". It is strongly recommended to always run ``prune -v --list --dry-run ...`` -first so you will see what it would do without it actually doing anything. +first, so you can see what it would do without actually doing anything. :: - # Keep 7 end of day and 4 additional end of week archives. + # Keep 7 end-of-day and 4 additional end-of-week archives. # Do a dry-run without actually deleting anything. $ borg prune -v --list --dry-run --keep-daily=7 --keep-weekly=4 /path/to/repo # Same as above but only apply to archive names starting with the hostname # of the machine followed by a "-" character: $ borg prune -v --list --keep-daily=7 --keep-weekly=4 --glob-archives='{hostname}-*' /path/to/repo - # actually free disk space: + # Actually free disk space: $ borg compact /path/to/repo - # Keep 7 end of day, 4 additional end of week archives, - # and an end of month archive for every month: + # Keep 7 end-of-day, 4 additional end-of-week archives, + # and an end-of-month archive for every month: $ borg prune -v --list --keep-daily=7 --keep-weekly=4 --keep-monthly=-1 /path/to/repo - # Keep all backups in the last 10 days, 4 additional end of week archives, - # and an end of month archive for every month: + # Keep all backups in the last 10 days, 4 additional end-of-week archives, + # and an end-of-month archive for every month: $ borg prune -v --list --keep-within=10d --keep-weekly=4 --keep-monthly=-1 /path/to/repo -There is also a visualized prune example in ``docs/misc/prune-example.txt``: +There is also a visual example of pruning in ``docs/misc/prune-example.txt``: .. highlight:: none .. include:: ../misc/prune-example.txt diff --git a/docs/usage/recreate.rst b/docs/usage/recreate.rst index 5ff2917fd..1f607807c 100644 --- a/docs/usage/recreate.rst +++ b/docs/usage/recreate.rst @@ -4,22 +4,22 @@ Examples ~~~~~~~~ :: - # Make old (Attic / Borg 0.xx) archives deduplicate with Borg 1.x archives. + # Make old (Attic/Borg 0.xx) archives deduplicate with Borg 1.x archives. # Archives created with Borg 1.1+ and the default chunker params are skipped # (archive ID stays the same). $ borg recreate /mnt/backup --chunker-params default --progress - # Create a backup with little but fast compression + # Create a backup with low but fast compression. $ borg create /mnt/backup::archive /some/files --compression lz4 - # Then compress it - this might take longer, but the backup has already completed, + # Then compress it — this might take longer, but the backup has already completed, # so no inconsistencies from a long-running backup job. $ borg recreate /mnt/backup::archive --recompress --compression zlib,9 # Remove unwanted files from all archives in a repository. - # Note the relative path for the --exclude option - archives only contain relative paths. + # Note the relative path for the --exclude option — archives only contain relative paths. $ borg recreate /mnt/backup --exclude home/icke/Pictures/drunk_photos - # Change archive comment + # Change the archive comment. $ borg create --comment "This is a comment" /mnt/backup::archivename ~ $ borg info /mnt/backup::archivename Name: archivename diff --git a/docs/usage/serve.rst b/docs/usage/serve.rst index 95166b92c..1d2452083 100644 --- a/docs/usage/serve.rst +++ b/docs/usage/serve.rst @@ -3,7 +3,7 @@ Examples ~~~~~~~~ -``borg serve`` has special support for ssh forced commands (see ``authorized_keys`` +``borg serve`` has special support for SSH forced commands (see ``authorized_keys`` example below): if the environment variable SSH_ORIGINAL_COMMAND is set it will ignore some options given on the command line and use the values from the variable instead. This only applies to a carefully controlled allowlist of safe @@ -15,31 +15,31 @@ options. This list currently contains: giving up and aborting the operation when another process is holding a lock. Environment variables (such as BORG_XXX) contained in the original -command sent by the client are *not* interpreted, but ignored. If BORG_XXX environment -variables should be set on the ``borg serve`` side, then these must be set in system-specific +command sent by the client are *not* interpreted; they are ignored. If BORG_XXX environment +variables need to be set on the ``borg serve`` side, then these must be set in system-specific locations like ``/etc/environment`` or in the forced command itself (example below). :: - # Allow an SSH keypair to only run borg, and only have access to /path/to/repo. + # Allow an SSH key pair to only run borg, and only have access to /path/to/repo. # Use key options to disable unneeded and potentially dangerous SSH functionality. - # This will help to secure an automated remote backup system. + # This helps secure an automated remote backup system. $ cat ~/.ssh/authorized_keys command="borg serve --restrict-to-path /path/to/repo",restrict ssh-rsa AAAAB3[...] - # Set a BORG_XXX environment variable on the "borg serve" side + # Set a BORG_XXX environment variable on the ``borg serve`` side. $ cat ~/.ssh/authorized_keys command="BORG_XXX=value borg serve [...]",restrict ssh-rsa [...] .. note:: - The examples above use the ``restrict`` directive and assume a POSIX - compliant shell set as the user's login shell. - This does automatically block potential dangerous ssh features, even when + The examples above use the ``restrict`` directive and assume a POSIX-compliant + shell set as the user's login shell. + This automatically blocks potentially dangerous SSH features, even when they are added in a future update. Thus, this option should be preferred. - If you're using openssh-server < 7.2, however, you have to explicitly specify - the ssh features to restrict and cannot simply use the restrict option as it - has been introduced in v7.2. We recommend to use + If you're using OpenSSH server < 7.2, however, you have to explicitly specify + the SSH features to restrict and cannot simply use the ``restrict`` option as it + was introduced in v7.2. We recommend using ``no-port-forwarding,no-X11-forwarding,no-pty,no-agent-forwarding,no-user-rc`` in this case. @@ -60,9 +60,9 @@ Either in the client side's ``~/.ssh/config`` file, or in the client's ``/etc/ss ServerAliveInterval 10 ServerAliveCountMax 30 -Replacing ``backupserver`` with the hostname, FQDN or IP address of the borg server. +Replace ``backupserver`` with the hostname, FQDN, or IP address of the Borg server. -This will cause the client to send a keepalive to the server every 10 seconds. If 30 consecutive keepalives are sent without a response (a time of 300 seconds), the ssh client process will be terminated, causing the borg process to terminate gracefully. +This will cause the client to send a keepalive to the server every 10 seconds. If 30 consecutive keepalives are sent without a response (a time of 300 seconds), the SSH client process will be terminated, causing the Borg process to terminate gracefully. On the server side's ``sshd`` configuration file (typically ``/etc/ssh/sshd_config``): :: @@ -70,8 +70,8 @@ On the server side's ``sshd`` configuration file (typically ``/etc/ssh/sshd_conf ClientAliveInterval 10 ClientAliveCountMax 30 -This will cause the server to send a keep alive to the client every 10 seconds. If 30 consecutive keepalives are sent without a response (a time of 300 seconds), the server's sshd process will be terminated, causing the ``borg serve`` process to terminate gracefully and release the lock on the repository. +This will cause the server to send a keepalive to the client every 10 seconds. If 30 consecutive keepalives are sent without a response (a time of 300 seconds), the server's sshd process will be terminated, causing the ``borg serve`` process to terminate gracefully and release the lock on the repository. -If you then run borg commands with ``--lock-wait 600``, this gives sufficient time for the borg serve processes to terminate after the SSH connection is torn down after the 300 second wait for the keepalives to fail. +If you then run Borg commands with ``--lock-wait 600``, this gives sufficient time for the ``borg serve`` processes to terminate after the SSH connection is torn down following the 300-second wait for the keepalives to fail. You may, of course, modify the timeout values demonstrated above to values that suit your environment and use case. diff --git a/docs/usage/tar.rst b/docs/usage/tar.rst index 34f307cdd..b1771adff 100644 --- a/docs/usage/tar.rst +++ b/docs/usage/tar.rst @@ -6,18 +6,18 @@ Examples ~~~~~~~~ :: - # export as uncompressed tar + # Export as an uncompressed tar. $ borg export-tar /path/to/repo::Monday Monday.tar - # exclude some types, compress using gzip + # Exclude some types; compress using gzip. $ borg export-tar /path/to/repo::Monday Monday.tar.gz --exclude '*.so' - # use higher compression level with gzip + # Use a higher compression level with gzip. $ borg export-tar --tar-filter="gzip -9" testrepo::linux Monday.tar.gz - # export a tar, but instead of storing it on disk, + # Export a tar, but instead of storing it on disk, # upload it to a remote site using curl. $ borg export-tar /path/to/repo::Monday - | curl --data-binary @- https://somewhere/to/POST - # remote extraction via "tarpipe" + # Remote extraction via "tarpipe". $ borg export-tar /path/to/repo::Monday - | ssh somewhere "cd extracted; tar x" diff --git a/docs/usage/upgrade.rst b/docs/usage/upgrade.rst index 044c81cf6..118be500e 100644 --- a/docs/usage/upgrade.rst +++ b/docs/usage/upgrade.rst @@ -16,14 +16,14 @@ Examples .. _borg_key_migrate-to-repokey: -Upgrading a passphrase encrypted attic repo +Upgrading a passphrase-encrypted Attic repo ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -attic offered a "passphrase" encryption mode, but this was removed in borg 1.0 +Attic offered a "passphrase" encryption mode, but this was removed in Borg 1.0 and replaced by the "repokey" mode (which stores the passphrase-protected -encryption key into the repository config). +encryption key in the repository config). -Thus, to upgrade a "passphrase" attic repo to a "repokey" borg repo, 2 steps +Thus, to upgrade a "passphrase" Attic repo to a "repokey" Borg repo, two steps are needed, in this order: - borg upgrade repo From a4b2bb3d7647085a01fdf9995095ed5d121695bb Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 11 Aug 2025 16:56:11 +0200 Subject: [PATCH 06/16] .pyx files: fix typos and grammar --- src/borg/chunker.pyx | 28 ++++++++++---------- src/borg/compress.pyx | 49 +++++++++++++++++------------------ src/borg/crypto/low_level.pyx | 36 ++++++++++++------------- src/borg/hashindex.pyx | 20 +++++++------- src/borg/item.pyx | 8 +++--- src/borg/platform/darwin.pyx | 6 ++--- src/borg/platform/freebsd.pyx | 30 ++++++++++----------- src/borg/platform/linux.pyx | 35 ++++++++++++------------- src/borg/platform/posix.pyx | 5 ++-- 9 files changed, 104 insertions(+), 113 deletions(-) diff --git a/src/borg/chunker.pyx b/src/borg/chunker.pyx index a60745b68..361f5e567 100644 --- a/src/borg/chunker.pyx +++ b/src/borg/chunker.pyx @@ -36,15 +36,15 @@ _Chunk.__doc__ = """\ meta is always a dictionary, data depends on allocation. data chunk read from a DATA range of a file (not from a sparse hole): - meta = {'allocation' = CH_DATA, 'size' = size_of_chunk } + meta = {'allocation': CH_DATA, 'size': size_of_chunk} data = read_data [bytes or memoryview] all-zero chunk read from a DATA range of a file (not from a sparse hole, but detected to be all-zero): - meta = {'allocation' = CH_ALLOC, 'size' = size_of_chunk } + meta = {'allocation': CH_ALLOC, 'size': size_of_chunk} data = None all-zero chunk from a HOLE range of a file (from a sparse hole): - meta = {'allocation' = CH_HOLE, 'size' = size_of_chunk } + meta = {'allocation': CH_HOLE, 'size': size_of_chunk} data = None """ @@ -57,8 +57,8 @@ def dread(offset, size, fd=None, fh=-1): if use_fh: data = os.read(fh, size) if hasattr(os, 'posix_fadvise'): - # UNIX only and, in case of block sizes that are not a multiple of the - # system's page size, better be used with a bug fixed linux kernel > 4.6.0, + # UNIX-only and, in case of block sizes that are not a multiple of the + # system's page size, it is better used with a bug-fixed Linux kernel >= 4.6.0, # see comment/workaround in _chunker.c and borgbackup issue #907. os.posix_fadvise(fh, offset, len(data), os.POSIX_FADV_DONTNEED) return data @@ -86,14 +86,14 @@ def dpos_curr_end(fd=None, fh=-1): def sparsemap(fd=None, fh=-1): """ - generator yielding a (start, length, is_data) tuple for each range. - is_data is indicating data ranges (True) or hole ranges (False). + Generator yielding a (start, length, is_data) tuple for each range. + is_data indicates data ranges (True) or hole ranges (False). - note: - the map is generated starting from the current seek position (it + Note: + The map is generated starting from the current seek position (it is not required to be 0 / to be at the start of the file) and - work from there up to the end of the file. - when the generator is finished, the file pointer position will be + works from there up to the end of the file. + When the generator is finished, the file pointer position will be reset to where it was before calling this function. """ curr, file_len = dpos_curr_end(fd, fh) @@ -107,7 +107,7 @@ def sparsemap(fd=None, fh=-1): except OSError as e: if e.errno == errno.ENXIO: if not is_data and start < file_len: - # if there is a hole at the end of a file, we can not find the file end by SEEK_DATA + # If there is a hole at the end of a file, we cannot find the file end by SEEK_DATA # (because we run into ENXIO), thus we must manually deal with this case: end = file_len yield (start, end - start, is_data) @@ -120,7 +120,7 @@ def sparsemap(fd=None, fh=-1): start = end whence = os.SEEK_DATA if is_data else os.SEEK_HOLE finally: - # seek to same position as before calling this function + # Seek to the same position as before calling this function dseek(curr, os.SEEK_SET, fd, fh) @@ -271,7 +271,7 @@ cdef class Chunker: got = len(data) # we do not have SEEK_DATA/SEEK_HOLE support in chunker_process C code, # but we can just check if data was all-zero (and either came from a hole - # or from stored zeros - we can not detect that here). + # or from stored zeros - we cannot detect that here). if zeros.startswith(data): data = None allocation = CH_ALLOC diff --git a/src/borg/compress.pyx b/src/borg/compress.pyx index ba83ab661..aad1e8d55 100644 --- a/src/borg/compress.pyx +++ b/src/borg/compress.pyx @@ -55,10 +55,10 @@ cdef class CompressorBase: """ base class for all (de)compression classes, also handles compression format auto detection and - adding/stripping the ID header (which enable auto detection). + adding/stripping the ID header (which enables auto detection). """ ID = b'\xFF\xFF' # reserved and not used - # overwrite with a unique 2-bytes bytestring in child classes + # overwrite with a unique 2-byte byte string in child classes name = 'baseclass' @classmethod @@ -147,7 +147,7 @@ cdef class DecidingCompressor(CompressorBase): class CNONE(CompressorBase): """ - none - no compression, just pass through data + None - no compression; just pass through data. """ ID = b'\x00\x00' name = 'none' @@ -167,9 +167,9 @@ class LZ4(DecidingCompressor): raw LZ4 compression / decompression (liblz4). Features: - - lz4 is super fast - - wrapper releases CPython's GIL to support multithreaded code - - uses safe lz4 methods that never go beyond the end of the output buffer + - LZ4 is super fast + - The wrapper releases CPython's GIL to support multithreaded code + - Uses safe LZ4 methods that never go beyond the end of the output buffer """ ID = b'\x01\x00' name = 'lz4' @@ -196,7 +196,7 @@ class LZ4(DecidingCompressor): osize = LZ4_compress_default(source, dest, isize, osize) if not osize: raise Exception('lz4 compress failed') - # only compress if the result actually is smaller + # only compress if the result is actually smaller if osize < isize: return self, dest[:osize] else: @@ -234,7 +234,7 @@ class LZ4(DecidingCompressor): class LZMA(DecidingCompressor): """ - lzma compression / decompression + LZMA compression/decompression. """ ID = b'\x02\x00' name = 'lzma' @@ -251,7 +251,7 @@ class LZMA(DecidingCompressor): *lzma_data* is the LZMA result if *compressor* is LZMA as well, otherwise it is None. """ - # we do not need integrity checks in lzma, we do that already + # We do not need integrity checks in LZMA; we do that already. lzma_data = lzma.compress(data, preset=self.level, check=lzma.CHECK_NONE) if len(lzma_data) < len(data): return self, lzma_data @@ -267,10 +267,10 @@ class LZMA(DecidingCompressor): class ZSTD(DecidingCompressor): - """zstd compression / decompression (pypi: zstandard, gh: python-zstandard)""" - # This is a NOT THREAD SAFE implementation. - # Only ONE python context must be created at a time. - # It should work flawlessly as long as borg will call ONLY ONE compression job at time. + """Zstd compression/decompression (PyPI: zstandard, GH: python-zstandard).""" + # This is NOT THREAD-SAFE. + # Only ONE Python context must be created at a time. + # It should work flawlessly as long as borg calls ONLY ONE compression job at a time. ID = b'\x03\x00' name = 'zstd' @@ -298,7 +298,7 @@ class ZSTD(DecidingCompressor): osize = ZSTD_compress(dest, osize, source, isize, level) if ZSTD_isError(osize): raise Exception('zstd compress failed: %s' % ZSTD_getErrorName(osize)) - # only compress if the result actually is smaller + # only compress if the result is actually smaller if osize < isize: return self, dest[:osize] else: @@ -334,7 +334,7 @@ class ZSTD(DecidingCompressor): class ZLIB(CompressorBase): """ - zlib compression / decompression (python stdlib) + Zlib compression/decompression (Python stdlib). """ ID = b'\x08\x00' # not used here, see detect() # avoid all 0x.8.. IDs elsewhere! @@ -353,11 +353,11 @@ class ZLIB(CompressorBase): self.level = level def compress(self, data): - # note: for compatibility no super call, do not add ID bytes + # Note: for compatibility, no super call; do not add ID bytes. return zlib.compress(data, self.level) def decompress(self, data): - # note: for compatibility no super call, do not strip ID bytes + # Note: for compatibility, no super call; do not strip ID bytes. try: return zlib.decompress(data) except zlib.error as e: @@ -417,17 +417,17 @@ class Auto(CompressorBase): compressor, cheap_compressed_data = self._decide(data) if compressor in (LZ4_COMPRESSOR, NONE_COMPRESSOR): # we know that trying to compress with expensive compressor is likely pointless, - # so we fallback to return the cheap compressed data. + # so we fall back to return the cheap compressed data. return cheap_compressed_data # if we get here, the decider decided to try the expensive compressor. - # we also know that the compressed data returned by the decider is lz4 compressed. + # we also know that the compressed data returned by the decider is LZ4-compressed. expensive_compressed_data = compressor.compress(data) ratio = len(expensive_compressed_data) / len(cheap_compressed_data) if ratio < 0.99: - # the expensive compressor managed to squeeze the data significantly better than lz4. + # the expensive compressor managed to squeeze the data significantly better than LZ4. return expensive_compressed_data else: - # otherwise let's just store the lz4 data, which decompresses extremely fast. + # otherwise let's just store the LZ4 data, which decompresses extremely fast. return cheap_compressed_data def decompress(self, data): @@ -463,8 +463,7 @@ class ObfuscateSize(CompressorBase): self._obfuscate = self._padme_obfuscate def _obfuscate(self, compr_size): - # implementations need to return the size of obfuscation data, - # that the caller shall add. + # Implementations need to return the size of the obfuscation data that the caller shall add. raise NotImplementedError def _relative_random_reciprocal_obfuscate(self, compr_size): @@ -538,8 +537,8 @@ LZ4_COMPRESSOR = get_compressor('lz4') class Compressor: """ - compresses using a compressor with given name and parameters - decompresses everything we can handle (autodetect) + Compresses using a compressor with a given name and parameters. + Decompresses everything we can handle (autodetect). """ def __init__(self, name='null', **kwargs): self.params = kwargs diff --git a/src/borg/crypto/low_level.pyx b/src/borg/crypto/low_level.pyx index dc7350732..bddded61b 100644 --- a/src/borg/crypto/low_level.pyx +++ b/src/borg/crypto/low_level.pyx @@ -1,4 +1,4 @@ -"""An AEAD style OpenSSL wrapper +"""An AEAD-style OpenSSL wrapper. API: @@ -15,10 +15,10 @@ Envelope layout: |------------- #header_len ------>| S means a cryptographic signature function (like HMAC or GMAC). -E means a encryption function (like AES). +E means an encryption function (like AES). iv is the initialization vector / nonce, if needed. -The split of header into not authenticated data and aad (additional authenticated +The split of header into unauthenticated data and AAD (additional authenticated data) is done to support the legacy envelope layout as used in attic and early borg (where the TYPE byte was not authenticated) and avoid unneeded memcpy and string garbage. @@ -136,7 +136,7 @@ class UNENCRYPTED: def encrypt(self, data, header=b'', iv=None): """ - IMPORTANT: it is called encrypt to satisfy the crypto api naming convention, + IMPORTANT: It is called encrypt to satisfy the crypto API naming convention, but this does NOT encrypt and it does NOT compute and store a MAC either. """ if iv is not None: @@ -146,7 +146,7 @@ class UNENCRYPTED: def decrypt(self, envelope): """ - IMPORTANT: it is called decrypt to satisfy the crypto api naming convention, + IMPORTANT: It is called decrypt to satisfy the crypto API naming convention, but this does NOT decrypt and it does NOT verify a MAC either, because data is not encrypted and there is no MAC. """ @@ -220,8 +220,8 @@ cdef class AES256_CTR_BASE: def encrypt(self, data, header=b'', iv=None): """ - encrypt data, compute mac over aad + iv + cdata, prepend header. - aad_offset is the offset into the header where aad starts. + Encrypt data, compute MAC over AAD + IV + cdata, prepend header. + aad_offset is the offset into the header where AAD starts. """ if iv is not None: self.set_iv(iv) @@ -270,7 +270,7 @@ cdef class AES256_CTR_BASE: def decrypt(self, envelope): """ - authenticate aad + iv + cdata, decrypt cdata, ignore header bytes up to aad_offset. + Authenticate AAD + IV + cdata, decrypt cdata, ignore header bytes up to aad_offset. """ cdef int ilen = len(envelope) cdef int hlen = self.header_len @@ -314,7 +314,7 @@ cdef class AES256_CTR_BASE: return num_cipher_blocks(length, self.cipher_blk_len) def set_iv(self, iv): - # set_iv needs to be called before each encrypt() call + # Call set_iv before each encrypt() call. if isinstance(iv, int): iv = iv.to_bytes(self.iv_len, byteorder='big') assert isinstance(iv, bytes) and len(iv) == self.iv_len @@ -322,16 +322,16 @@ cdef class AES256_CTR_BASE: self.blocks = 0 # how many AES blocks got encrypted with this IV? def next_iv(self): - # call this after encrypt() to get the next iv (int) for the next encrypt() call + # Call this after encrypt() to get the next IV (int) for the next encrypt() call iv = int.from_bytes(self.iv[:self.iv_len], byteorder='big') return iv + self.blocks cdef fetch_iv(self, unsigned char * iv_in): - # fetch lower self.iv_len_short bytes of iv and add upper zero bytes + # Fetch lower self.iv_len_short bytes of IV and add upper zero bytes. return b'\0' * (self.iv_len - self.iv_len_short) + iv_in[0:self.iv_len_short] cdef store_iv(self, unsigned char * iv_out, unsigned char * iv): - # store only lower self.iv_len_short bytes, upper bytes are assumed to be 0 + # Store only lower self.iv_len_short bytes, upper bytes are assumed to be 0. cdef int i for i in range(self.iv_len_short): iv_out[i] = iv[(self.iv_len-self.iv_len_short)+i] @@ -405,7 +405,7 @@ ctypedef const EVP_CIPHER * (* CIPHER)() cdef class AES: - """A thin wrapper around the OpenSSL EVP cipher API - for legacy code, like key file encryption""" + """A thin wrapper around the OpenSSL EVP cipher API - for legacy code, like key file encryption.""" cdef CIPHER cipher cdef EVP_CIPHER_CTX *ctx cdef unsigned char enc_key[32] @@ -476,8 +476,8 @@ cdef class AES: raise Exception('EVP_DecryptUpdate failed') offset += olen if EVP_DecryptFinal_ex(self.ctx, odata+offset, &olen) <= 0: - # this error check is very important for modes with padding or - # authentication. for them, a failure here means corrupted data. + # This error check is very important for modes with padding or + # authentication. For them, a failure here means corrupted data. # CTR mode does not use padding nor authentication. raise Exception('EVP_DecryptFinal failed') offset += olen @@ -491,8 +491,8 @@ cdef class AES: return num_cipher_blocks(length, self.cipher_blk_len) def set_iv(self, iv): - # set_iv needs to be called before each encrypt() call, - # because encrypt does a full initialisation of the cipher context. + # Call set_iv before each encrypt() call, + # because encrypt() does a full initialization of the cipher context. if isinstance(iv, int): iv = iv.to_bytes(self.iv_len, byteorder='big') assert isinstance(iv, bytes) and len(iv) == self.iv_len @@ -500,7 +500,7 @@ cdef class AES: self.blocks = 0 # number of cipher blocks encrypted with this IV def next_iv(self): - # call this after encrypt() to get the next iv (int) for the next encrypt() call + # Call this after encrypt() to get the next IV (int) for the next encrypt() call iv = int.from_bytes(self.iv[:self.iv_len], byteorder='big') return iv + self.blocks diff --git a/src/borg/hashindex.pyx b/src/borg/hashindex.pyx index b5e26eb45..562e4fca0 100644 --- a/src/borg/hashindex.pyx +++ b/src/borg/hashindex.pyx @@ -60,17 +60,17 @@ cdef extern from "cache_sync/cache_sync.c": cdef _NoDefault = object() """ -The HashIndex is *not* a general purpose data structure. The value size must be at least 4 bytes, and these -first bytes are used for in-band signalling in the data structure itself. +The HashIndex is *not* a general-purpose data structure. The value size must be at least 4 bytes, and these +first bytes are used for in-band signaling in the data structure itself. -The constant MAX_VALUE defines the valid range for these 4 bytes when interpreted as an uint32_t from 0 +The constant MAX_VALUE defines the valid range for these 4 bytes when interpreted as a uint32_t from 0 to MAX_VALUE (inclusive). The following reserved values beyond MAX_VALUE are currently in use (byte order is LE):: - 0xffffffff marks empty entries in the hashtable - 0xfffffffe marks deleted entries in the hashtable + 0xffffffff marks empty entries in the hash table + 0xfffffffe marks deleted entries in the hash table -None of the publicly available classes in this module will accept nor return a reserved value; +None of the publicly available classes in this module will neither accept nor return a reserved value; AssertionError is raised instead. """ @@ -169,7 +169,7 @@ cdef class IndexBase: cdef class FuseVersionsIndex(IndexBase): - # 4 byte version + 16 byte file contents hash + # 4-byte version + 16-byte file contents hash value_size = 20 _key_size = 16 @@ -276,7 +276,7 @@ ChunkIndexEntry = namedtuple('ChunkIndexEntry', 'refcount size csize') cdef class ChunkIndex(IndexBase): """ - Mapping of 32 byte keys to (refcount, size, csize), which are all 32-bit unsigned. + Mapping of 32-byte keys to (refcount, size, csize), which are all 32-bit unsigned. The reference count cannot overflow. If an overflow would occur, the refcount is fixed to MAX_VALUE and will neither increase nor decrease by incref(), decref() @@ -321,7 +321,7 @@ cdef class ChunkIndex(IndexBase): return data != NULL def incref(self, key): - """Increase refcount for 'key', return (refcount, size, csize)""" + """Increase refcount for 'key', return (refcount, size, csize).""" assert len(key) == self.key_size data = hashindex_get(self.index, key) if not data: @@ -334,7 +334,7 @@ cdef class ChunkIndex(IndexBase): return refcount, _le32toh(data[1]), _le32toh(data[2]) def decref(self, key): - """Decrease refcount for 'key', return (refcount, size, csize)""" + """Decrease refcount for 'key', return (refcount, size, csize).""" assert len(key) == self.key_size data = hashindex_get(self.index, key) if not data: diff --git a/src/borg/item.pyx b/src/borg/item.pyx index e7493ac27..df9fa018d 100644 --- a/src/borg/item.pyx +++ b/src/borg/item.pyx @@ -230,7 +230,7 @@ class Item(PropDict): try: master = getattr(self, 'source') except AttributeError: - # not a hardlink slave, likely a directory or special file w/o chunks + # not a hardlink slave, likely a directory or special file without chunks chunks = None else: # hardlink slave, try to fetch hardlink master's chunks list @@ -296,12 +296,12 @@ class EncryptedKey(PropDict): """ EncryptedKey abstraction that deals with validation and the low-level details internally: - A EncryptedKey is created either from msgpack unpacker output, from another dict, from kwargs or + An EncryptedKey is created either from msgpack unpacker output, from another dict, from kwargs or built step-by-step by setting attributes. msgpack gives us a dict with bytes-typed keys, just give it to EncryptedKey(d) and use enc_key.xxx later. - If a EncryptedKey shall be serialized, give as_dict() method output to msgpack packer. + If an EncryptedKey shall be serialized, give as_dict() method output to msgpack packer. """ VALID_KEYS = {'version', 'algorithm', 'iterations', 'salt', 'hash', 'data'} # str-typed keys @@ -362,7 +362,7 @@ class ArchiveItem(PropDict): msgpack gives us a dict with bytes-typed keys, just give it to ArchiveItem(d) and use arch.xxx later. - If a ArchiveItem shall be serialized, give as_dict() method output to msgpack packer. + If an ArchiveItem shall be serialized, give as_dict() method output to msgpack packer. """ VALID_KEYS = ARCHIVE_KEYS # str-typed keys diff --git a/src/borg/platform/darwin.pyx b/src/borg/platform/darwin.pyx index 64cd87d9e..32a69e566 100644 --- a/src/borg/platform/darwin.pyx +++ b/src/borg/platform/darwin.pyx @@ -101,8 +101,7 @@ def setxattr(path, name, value, *, follow_symlinks=False): def _remove_numeric_id_if_possible(acl): - """Replace the user/group field with the local uid/gid if possible - """ + """Replace the user/group field with the local uid/gid if possible.""" entries = [] for entry in safe_decode(acl).split('\n'): if entry: @@ -118,8 +117,7 @@ def _remove_numeric_id_if_possible(acl): def _remove_non_numeric_identifier(acl): - """Remove user and group names from the acl - """ + """Remove user and group names from the ACL.""" entries = [] for entry in safe_decode(acl).split('\n'): if entry: diff --git a/src/borg/platform/freebsd.pyx b/src/borg/platform/freebsd.pyx index f663eb2f2..6c0e55f51 100644 --- a/src/borg/platform/freebsd.pyx +++ b/src/borg/platform/freebsd.pyx @@ -57,18 +57,18 @@ NS_ID_MAP = {b"user": EXTATTR_NAMESPACE_USER, } def split_ns(ns_name, default_ns): - # split ns_name (which is in the form of b"namespace.name") into namespace and name. - # if there is no namespace given in ns_name, default to default_ns. - # note: + # Split ns_name (which is in the form of b"namespace.name") into namespace and name. + # If there is no namespace given in ns_name, default to default_ns. + # Note: # borg < 1.1.10 on FreeBSD did not prefix the namespace to the names, see #3952. - # we also need to deal with "unexpected" namespaces here, they could come + # We also need to deal with "unexpected" namespaces here; they could come # from borg archives made on other operating systems. ns_name_tuple = ns_name.split(b".", 1) if len(ns_name_tuple) == 2: - # we have a namespace prefix in the given name + # We have a namespace prefix in the given name. ns, name = ns_name_tuple else: - # no namespace given in ns_name (== no dot found), maybe data coming from an old borg archive. + # No namespace given in ns_name (no dot found), maybe data coming from an old borg archive. ns, name = default_ns, ns_name return ns, name @@ -100,7 +100,7 @@ def getxattr(path, name, *, follow_symlinks=False): return c_extattr_get_link(path, ns_id, name, buf, size) ns, name = split_ns(name, b"user") - ns_id = NS_ID_MAP[ns] # this will raise a KeyError it the namespace is unsupported + ns_id = NS_ID_MAP[ns] # this will raise a KeyError if the namespace is unsupported n, buf = _getxattr_inner(func, path, name) return bytes(buf[:n]) @@ -117,7 +117,7 @@ def setxattr(path, name, value, *, follow_symlinks=False): ns, name = split_ns(name, b"user") try: - ns_id = NS_ID_MAP[ns] # this will raise a KeyError it the namespace is unsupported + ns_id = NS_ID_MAP[ns] # this will raise a KeyError if the namespace is unsupported except KeyError: pass else: @@ -142,9 +142,9 @@ cdef _get_acl(p, type, item, attribute, flags, fd=None): acl_free(acl) def acl_get(path, item, st, numeric_ids=False, fd=None): - """Saves ACL Entries + """Save ACL entries. - If `numeric_ids` is True the user/group field is not preserved only uid/gid + If numeric_ids is True, the user/group field is not preserved; only uid/gid. """ cdef int flags = ACL_TEXT_APPEND_ID flags |= ACL_TEXT_NUMERIC_IDS if numeric_ids else 0 @@ -154,7 +154,7 @@ def acl_get(path, item, st, numeric_ids=False, fd=None): if ret < 0: raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path)) if ret == 0: - # there is no ACL defining permissions other than those defined by the traditional file permission bits. + # There is no ACL defining permissions other than those defined by the traditional file permission bits. return ret = lpathconf(path, _PC_ACL_NFS4) if ret < 0: @@ -192,8 +192,7 @@ cdef _set_acl(p, type, item, attribute, numeric_ids=False, fd=None): cdef _nfs4_use_stored_uid_gid(acl): - """Replace the user/group field with the stored uid/gid - """ + """Replace the user/group field with the stored uid/gid.""" entries = [] for entry in safe_decode(acl).split('\n'): if entry: @@ -206,10 +205,9 @@ cdef _nfs4_use_stored_uid_gid(acl): def acl_set(path, item, numeric_ids=False, fd=None): - """Restore ACL Entries + """Restore ACL entries. - If `numeric_ids` is True the stored uid/gid is used instead - of the user/group names + If numeric_ids is True, the stored uid/gid is used instead of the user/group names. """ if isinstance(path, str): path = os.fsencode(path) diff --git a/src/borg/platform/linux.pyx b/src/borg/platform/linux.pyx index 35ad1bde5..38d67b875 100644 --- a/src/borg/platform/linux.pyx +++ b/src/borg/platform/linux.pyx @@ -134,7 +134,7 @@ def set_flags(path, bsd_flags, fd=None): if fd is None: st = os.stat(path, follow_symlinks=False) if stat.S_ISBLK(st.st_mode) or stat.S_ISCHR(st.st_mode) or stat.S_ISLNK(st.st_mode): - # see comment in get_flags() + # See comment in get_flags(). return cdef int flags = 0 for bsd_flag, linux_flag in BSD_TO_LINUX_FLAGS.items(): @@ -155,8 +155,8 @@ def set_flags(path, bsd_flags, fd=None): def get_flags(path, st, fd=None): if stat.S_ISBLK(st.st_mode) or stat.S_ISCHR(st.st_mode) or stat.S_ISLNK(st.st_mode): - # avoid opening devices files - trying to open non-present devices can be rather slow. - # avoid opening symlinks, O_NOFOLLOW would make the open() fail anyway. + # Avoid opening device files - trying to open non-present devices can be rather slow. + # Avoid opening symlinks; O_NOFOLLOW would make the open() fail anyway. return 0 cdef int linux_flags open_fd = fd is None @@ -179,8 +179,7 @@ def get_flags(path, st, fd=None): def acl_use_local_uid_gid(acl): - """Replace the user/group field with the local uid/gid if possible - """ + """Replace the user/group field with the local uid/gid if possible.""" entries = [] for entry in safe_decode(acl).split('\n'): if entry: @@ -194,8 +193,7 @@ def acl_use_local_uid_gid(acl): cdef acl_append_numeric_ids(acl): - """Extend the "POSIX 1003.1e draft standard 17" format with an additional uid/gid field - """ + """Extend the "POSIX 1003.1e draft standard 17" format with an additional uid/gid field.""" entries = [] for entry in _comment_re.sub('', safe_decode(acl)).split('\n'): if entry: @@ -210,8 +208,7 @@ cdef acl_append_numeric_ids(acl): cdef acl_numeric_ids(acl): - """Replace the "POSIX 1003.1e draft standard 17" user/group field with uid/gid - """ + """Replace the "POSIX 1003.1e draft standard 17" user/group field with uid/gid.""" entries = [] for entry in _comment_re.sub('', safe_decode(acl)).split('\n'): if entry: @@ -243,8 +240,8 @@ def acl_get(path, item, st, numeric_ids=False, fd=None): if ret < 0: raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path)) if ret == 0: - # there is no ACL defining permissions other than those defined by the traditional file permission bits. - # note: this should also be the case for symlink fs objects, as they can not have ACLs. + # There is no ACL defining permissions other than those defined by the traditional file permission bits. + # Note: this should also be the case for symlink file system objects, as they cannot have ACLs. return if numeric_ids: converter = acl_numeric_ids @@ -265,7 +262,7 @@ def acl_get(path, item, st, numeric_ids=False, fd=None): acl_free(access_text) acl_free(access_acl) if stat.S_ISDIR(st.st_mode): - # only directories can have a default ACL. there is no fd-based api to get it. + # Only directories can have a default ACL. There is no fd-based API to get it. try: default_acl = acl_get_file(path, ACL_TYPE_DEFAULT) if default_acl == NULL: @@ -284,7 +281,7 @@ def acl_set(path, item, numeric_ids=False, fd=None): cdef acl_t default_acl = NULL if stat.S_ISLNK(item.get('mode', 0)): - # Linux does not support setting ACLs on symlinks + # Linux does not support setting ACLs on symlinks. return if isinstance(path, str): @@ -313,7 +310,7 @@ def acl_set(path, item, numeric_ids=False, fd=None): default_acl = acl_from_text(converter(default_text)) if default_acl == NULL: raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path)) - # only directories can get a default ACL. there is no fd-based api to set it. + # Only directories can get a default ACL. There is no fd-based API to set it. if acl_set_file(path, ACL_TYPE_DEFAULT, default_acl) == -1: raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path)) finally: @@ -333,9 +330,9 @@ cdef unsigned PAGE_MASK = sysconf(_SC_PAGESIZE) - 1 if 'basesyncfile' in workarounds or not SYNC_FILE_RANGE_LOADED: class SyncFile(BaseSyncFile): - # if we are on platforms with a broken or not implemented sync_file_range, + # If we are on platforms with a broken or not implemented sync_file_range, # use the more generic BaseSyncFile to avoid issues. - # see basesyncfile description in our docs for details. + # See BaseSyncFile description in our docs for details. pass else: # a real Linux, so we can do better. :) @@ -343,7 +340,7 @@ else: """ Implemented using sync_file_range for asynchronous write-out and fdatasync for actual durability. - "write-out" means that dirty pages (= data that was written) are submitted to an I/O queue and will be send to + "write-out" means that dirty pages (= data that was written) are submitted to an I/O queue and will be sent to disk in the immediate future. """ @@ -369,6 +366,6 @@ else: def sync(self): self.f.flush() os.fdatasync(self.fd) - # tell the OS that it does not need to cache what we just wrote, - # avoids spoiling the cache for the OS and other processes. + # Tell the OS that it does not need to cache what we just wrote, + # This avoids spoiling the cache for the OS and other processes. safe_fadvise(self.fd, 0, 0, 'DONTNEED') diff --git a/src/borg/platform/posix.pyx b/src/borg/platform/posix.pyx index 87640dac6..7191f5b59 100644 --- a/src/borg/platform/posix.pyx +++ b/src/borg/platform/posix.pyx @@ -39,7 +39,7 @@ def process_alive(host, pid, thread): Check if the (host, pid, thread_id) combination corresponds to a potentially alive process. If the process is local, then this will be accurate. If the process is not local, then this - returns always True, since there is no real way to check. + always returns True, since there is no real way to check. """ from . import local_pid_alive from . import hostid @@ -110,8 +110,7 @@ def group2gid(group, default=None): def posix_acl_use_stored_uid_gid(acl): - """Replace the user/group field with the stored uid/gid - """ + """Replace the user/group field with the stored uid/gid.""" from ..helpers import safe_decode, safe_encode entries = [] for entry in safe_decode(acl).split('\n'): From 71888265f923c928844c0093ecbe17a9acda9062 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 11 Aug 2025 17:51:26 +0200 Subject: [PATCH 07/16] epilogs: fix typos and grammar --- src/borg/archiver.py | 256 +++++++++++++++++++++---------------------- 1 file changed, 128 insertions(+), 128 deletions(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 0720af034..9839cb060 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -3125,19 +3125,19 @@ class Archiver: This can be useful for browsing or restoring individual files. When restoring, take into account that the current FUSE implementation does - not support special fs flags and ACLs. + not support special filesystem flags and ACLs. When mounting a repository, the top directories will be named like the archives and the directory structure below these will be loaded on-demand from the repository when entering these directories, so expect some delay. Unless the ``--foreground`` option is given the command will run in the - background until the filesystem is ``umounted``. + background until the filesystem is ``unmounted``. Performance tips: - when doing a "whole repository" mount: - do not enter archive dirs if not needed, this avoids on-demand loading. + do not enter archive directories if not needed; this avoids on-demand loading. - only mount a specific archive, not the whole repository. - only mount specific paths in a specific archive, not the complete archive. @@ -3160,7 +3160,7 @@ class Archiver: uid=1000,gid=1000``). The man page references ``user_id`` and ``group_id`` mount options - (implemented by fuse) which specify the user and group id of the mount owner + (implemented by FUSE) which specify the user and group id of the mount owner (aka, the user who does the mounting). It is set automatically by libfuse (or the filesystem if libfuse is not used). However, you should not specify these manually. Unlike the ``uid`` and ``gid`` mount options which affect all files, @@ -3170,7 +3170,7 @@ class Archiver: Additional mount options supported by borg: - ``versions``: when used with a repository mount, this gives a merged, versioned - view of the files in the archives. EXPERIMENTAL, layout may change in future. + view of the files in the archives. EXPERIMENTAL; the layout may change in the future. - ``allow_damaged_files``: by default damaged files (where missing chunks were replaced with runs of zeros by ``borg check --repair``) are not readable and return EIO (I/O error). Set this option to read such files. @@ -3202,7 +3202,7 @@ class Archiver: subparsers = parser.add_subparsers(title='required arguments', metavar='') # borg benchmark - benchmark_epilog = process_epilog("These commands do various benchmarks.") + benchmark_epilog = process_epilog("These commands perform various benchmarks.") subparser = subparsers.add_parser('benchmark', parents=[mid_common_parser], add_help=False, description='benchmark command', @@ -3216,7 +3216,7 @@ class Archiver: bench_crud_epilog = process_epilog(""" This command benchmarks borg CRUD (create, read, update, delete) operations. - It creates input data below the given PATH and backups this data into the given REPO. + It creates input data below the given PATH and backs up this data into the given REPO. The REPO must already exist (it could be a fresh empty repo or an existing repo, the command will create / read / update / delete some archives named borg-benchmark-crud\\* there. @@ -3232,17 +3232,17 @@ class Archiver: Also, due to the kind of content used, no compression is used in these benchmarks. C- == borg create (1st archive creation, no compression, do not use files cache) - C-Z- == all-zero files. full dedup, this is primarily measuring reader/chunker/hasher. - C-R- == random files. no dedup, measuring throughput through all processing stages. + C-Z- == all-zero files. full deduplication; this primarily measures reader/chunker/hasher. + C-R- == random files. no deduplication, measuring throughput through all processing stages. R- == borg extract (extract archive, dry-run, do everything, but do not write files to disk) - R-Z- == all zero files. Measuring heavily duplicated files. + R-Z- == all-zero files. Measuring heavily duplicated files. R-R- == random files. No duplication here, measuring throughput through all processing stages, except writing to disk. U- == borg create (2nd archive creation of unchanged input files, measure files cache speed) The throughput value is kind of virtual here, it does not actually read the file. - U-Z- == needs to check the 2 all-zero chunks' existence in the repo. + U-Z- == needs to check the two all-zero chunks' existence in the repo. U-R- == needs to check existence of a lot of different chunks in the repo. D- == borg delete archive (delete last remaining archive, measure deletion + compaction) @@ -3250,7 +3250,7 @@ class Archiver: D-R- == many chunks to delete / many segments to compact/remove. Please note that there might be quite some variance in these measurements. - Try multiple measurements and having a otherwise idle machine (and network, if you use it). + Try multiple measurements and have an otherwise idle machine (and network, if you use it). """) subparser = benchmark_parsers.add_parser('crud', parents=[common_parser], add_help=False, description=self.do_benchmark_crud.__doc__, @@ -3263,13 +3263,13 @@ class Archiver: type=location_validator(archive=False), help='repository to use for benchmark (must exist)') - subparser.add_argument('path', metavar='PATH', help='path were to create benchmark input data') + subparser.add_argument('path', metavar='PATH', help='path where to create benchmark input data') # borg break-lock break_lock_epilog = process_epilog(""" This command breaks the repository and cache locks. - Please use carefully and only while no borg process (on any machine) is - trying to access the Cache or the Repository. + Please use with care and only when no borg process (on any machine) is + trying to access the cache or the repository. """) subparser = subparsers.add_parser('break-lock', parents=[common_parser], add_help=False, description=self.do_break_lock.__doc__, @@ -3324,7 +3324,7 @@ class Archiver: **Warning:** Please note that partial repository checks (i.e. running it with ``--max-duration``) can only perform non-cryptographic checksum checks on the segment files. A full repository check (i.e. without ``--max-duration``) can - also do a repository index check. Enabling partial repository checks excepts + also do a repository index check. Enabling partial repository checks excludes archive checks for the same reason. Therefore partial checks may be useful with very large repositories only where a full check would take too long. @@ -3333,13 +3333,13 @@ class Archiver: data from the repository, decrypting and decompressing it. It is a complete cryptographic verification and hence very time consuming, but will detect any accidental and malicious corruption. Tamper-resistance is only guaranteed for - encrypted repositories against attackers without access to the keys. You can - not use ``--verify-data`` with ``--repository-only``. + encrypted repositories against attackers without access to the keys. You cannot + use ``--verify-data`` with ``--repository-only``. About repair mode +++++++++++++++++ - The check command is a readonly task by default. If any corruption is found, + The check command is a read-only task by default. If any corruption is found, Borg will report the issue and proceed with checking. To actually repair the issues found, pass ``--repair``. @@ -3440,7 +3440,7 @@ class Archiver: When using ``--verbose``, borg will output an estimate of the freed space. After upgrading borg (server) to 1.2+, you can use ``borg compact --cleanup-commits`` - to clean up the numerous 17byte commit-only segments that borg 1.1 did not clean up + to clean up the numerous 17-byte commit-only segments that borg 1.1 did not clean up due to a bug. It is enough to do that once per repository. After cleaning up the commits, borg will also do a normal compaction. @@ -3468,7 +3468,7 @@ class Archiver: For security reasons, this command only works on local repositories. To delete a config value entirely, use ``--delete``. To list the values - of the configuration file or the default values, use ``--list``. To get an existing + of the configuration file or the default values, use ``--list``. To get an existing key, pass only the key name. To set a key, pass both the key name and the new value. Keys can be specified in the format "section.name" or simply "name"; the section will default to "repository" and "cache" for @@ -3505,18 +3505,18 @@ class Archiver: create_epilog = process_epilog(""" This command creates a backup archive containing all files found while recursively traversing all paths specified. Paths are added to the archive as they are given, - that means if relative paths are desired, the command has to be run from the correct + which means that if relative paths are desired, the command has to be run from the correct directory. The slashdot hack in paths (recursion roots) is triggered by using ``/./``: - ``/this/gets/stripped/./this/gets/archived`` means to process that fs object, but + ``/this/gets/stripped/./this/gets/archived`` means to process that filesystem object, but strip the prefix on the left side of ``./`` from the archived items (in this case, ``this/gets/archived`` will be the path in the archived item). - When giving '-' as path, borg will read data from standard input and create a - file 'stdin' in the created archive from that data. In some cases it's more - appropriate to use --content-from-command, however. See section *Reading from - stdin* below for details. + When giving '-' as a path, Borg will read data from standard input and create a + file 'stdin' in the created archive from that data. In some cases, it is more + appropriate to use ``--content-from-command``. See section "Reading from stdin" + below for details. The archive will consume almost no disk space for files or parts of files that have already been stored in other archives. @@ -3536,33 +3536,33 @@ class Archiver: This comparison can operate in different modes as given by ``--files-cache``: - ctime,size,inode (default) - - mtime,size,inode (default behaviour of borg versions older than 1.1.0rc4) + - mtime,size,inode (default behavior of borg versions older than 1.1.0rc4) - ctime,size (ignore the inode number) - mtime,size (ignore the inode number) - rechunk,ctime (all files are considered modified - rechunk, cache ctime) - rechunk,mtime (all files are considered modified - rechunk, cache mtime) - disabled (disable the files cache, all files considered modified - rechunk) - inode number: better safety, but often unstable on network filesystems + inode number: better safety, but often unstable on network file systems Normally, detecting file modifications will take inode information into consideration to improve the reliability of file change detection. - This is problematic for files located on sshfs and similar network file - systems which do not provide stable inode numbers, such files will always + This is problematic for files located on SSHFS and similar network file + systems which do not provide stable inode numbers; such files will always be considered modified. You can use modes without `inode` in this case to - improve performance, but reliability of change detection might be reduced. + improve performance, but the reliability of change detection might be reduced. ctime vs. mtime: safety vs. speed - ctime is a rather safe way to detect changes to a file (metadata and contents) - as it can not be set from userspace. But, a metadata-only change will already + as it cannot be set from user space. However, a metadata-only change will already update the ctime, so there might be some unnecessary chunking/hashing even - without content changes. Some filesystems do not support ctime (change time). - E.g. doing a chown or chmod to a file will change its ctime. + without content changes. Some file systems do not support ctime (change time). + For example, doing a chown or chmod to a file will change its ctime. - mtime usually works and only updates if file contents were changed. But mtime - can be arbitrarily set from userspace, e.g. to set mtime back to the same value + can be arbitrarily set from user space, e.g., to set mtime back to the same value it had before a content change happened. This can be used maliciously as well as - well-meant, but in both cases mtime based cache modes can be problematic. + well-meant, but in both cases mtime-based cache modes can be problematic. The ``--files-changed`` option controls how Borg detects if a file has changed during backup: @@ -3572,13 +3572,13 @@ class Archiver: This is not recommended unless you know what you're doing, as it could lead to inconsistent backups if files change during the backup process. - The mount points of filesystems or filesystem snapshots should be the same for every + The mount points of file systems or file system snapshots should be the same for every creation of a new archive to ensure fast operation. This is because the file cache that - is used to determine changed files quickly uses absolute filenames. + is used to determine changed files quickly uses absolute file names. If this is not possible, consider creating a bind mount to a stable location. The ``--progress`` option shows (from left to right) Original, Compressed and Deduplicated - (O, C and D, respectively), then the Number of files (N) processed so far, followed by + (O, C and D, respectively), then the number of files (N) processed so far, followed by the currently processed path. When using ``--stats``, you will get some statistics about how much data was @@ -3606,15 +3606,15 @@ class Archiver: and not include any other contents of the containing folder, this can be enabled through using the ``--keep-exclude-tags`` option. - The ``-x`` or ``--one-file-system`` option excludes directories, that are mountpoints (and everything in them). - It detects mountpoints by comparing the device number from the output of ``stat()`` of the directory and its + The ``-x`` or ``--one-file-system`` option excludes directories that are mount points (and everything in them). + It detects mount points by comparing the device number from the output of ``stat()`` of the directory and its parent directory. Specifically, it excludes directories for which ``stat()`` reports a device number different from the device number of their parent. - In general: be aware that there are directories with device number different from their parent, which the kernel - does not consider a mountpoint and also the other way around. - Linux examples for this are bind mounts (possibly same device number, but always a mountpoint) and ALL - subvolumes of a btrfs (different device number from parent but not necessarily a mountpoint). - macOS examples are the apfs mounts of a typical macOS installation. + In general: be aware that there are directories with device numbers different from their parent, which the kernel + does not consider mount points, and vice versa. + Linux examples for this are bind mounts (possibly same device number, but always a mount point) and all + subvolumes of a Btrfs file system (different device numbers from the parent but not necessarily mount points). + macOS examples are the APFS mounts of a typical macOS installation. Therefore, when using ``--one-file-system``, you should double-check that the backup works as intended. @@ -3819,10 +3819,10 @@ class Archiver: # borg debug debug_epilog = process_epilog(""" - These commands are not intended for normal use and potentially very + These commands are not intended for normal use and are potentially very dangerous if used incorrectly. - They exist to improve debugging capabilities without direct system access, e.g. + They exist to improve debugging capabilities without direct system access, e.g., in case you ever run into some severe malfunction. Use them only if you know what you are doing or if a trusted developer tells you what to do.""") @@ -3891,7 +3891,7 @@ class Archiver: help='file to dump data into') debug_dump_repo_objs_epilog = process_epilog(""" - This command dumps raw (but decrypted and decompressed) repo objects to files. + This command dumps raw (but decrypted and decompressed) repository objects to files. """) subparser = debug_parsers.add_parser('dump-repo-objs', parents=[common_parser], add_help=False, description=self.do_debug_dump_repo_objs.__doc__, @@ -3910,7 +3910,7 @@ class Archiver: help='used together with --ghost: limit processing to given offset.') debug_search_repo_objs_epilog = process_epilog(""" - This command searches raw (but decrypted and decompressed) repo objects for a specific bytes sequence. + This command searches raw (but decrypted and decompressed) repository objects for a specific byte sequence. """) subparser = debug_parsers.add_parser('search-repo-objs', parents=[common_parser], add_help=False, description=self.do_debug_search_repo_objs.__doc__, @@ -3925,7 +3925,7 @@ class Archiver: help='term to search the repo for, either 0x1234abcd hex term or a string') debug_id_hash_epilog = process_epilog(""" - This command computes the id-hash for some file content. + This command computes the ID hash for some file content. """) subparser = debug_parsers.add_parser('id-hash', parents=[common_parser], add_help=False, description=self.do_debug_id_hash.__doc__, @@ -3987,7 +3987,7 @@ class Archiver: help='hex object ID(s) to delete from the repo') debug_refcount_obj_epilog = process_epilog(""" - This command displays the reference count for objects from the repository. + This command displays the reference count for objects in the repository. """) subparser = debug_parsers.add_parser('refcount-obj', parents=[common_parser], add_help=False, description=self.do_debug_refcount_obj.__doc__, @@ -4017,7 +4017,7 @@ class Archiver: help='file to dump data into') debug_convert_profile_epilog = process_epilog(""" - Convert a Borg profile to a Python cProfile compatible profile. + Convert a Borg profile to a Python cProfile-compatible profile. """) subparser = debug_parsers.add_parser('convert-profile', parents=[common_parser], add_help=False, description=self.do_debug_convert_profile.__doc__, @@ -4093,17 +4093,17 @@ class Archiver: This command finds differences (file contents, user/group/mode) between archives. A repository location and an archive name must be specified for REPO::ARCHIVE1. - ARCHIVE2 is just another archive name in same repository (no repository location + ARCHIVE2 is just another archive name in the same repository (no repository location allowed). - For archives created with Borg 1.1 or newer diff automatically detects whether - the archives are created with the same chunker params. If so, only chunk IDs + For archives created with Borg 1.1 or newer, diff automatically detects whether + the archives were created with the same chunker parameters. If so, only chunk IDs are compared, which is very fast. - For archives prior to Borg 1.1 chunk contents are compared by default. - If you did not create the archives with different chunker params, + For archives prior to Borg 1.1, chunk contents are compared by default. + If you did not create the archives with different chunker parameters, pass ``--same-chunker-params``. - Note that the chunker params changed from Borg 0.xx to 1.0. + Note that the chunker parameters changed from Borg 0.xx to 1.0. For more help on include/exclude patterns, see the :ref:`borg_patterns` command output. """) @@ -4165,7 +4165,7 @@ class Archiver: Timestamp resolution is limited to whole seconds, not the nanosecond resolution otherwise supported by Borg. - A ``--sparse`` option (as found in borg extract) is not supported. + A ``--sparse`` option (as found in ``borg extract``) is not supported. By default the entire archive is extracted but a subset of files and directories can be selected by passing a list of ``PATHs`` as arguments. @@ -4205,8 +4205,8 @@ class Archiver: For more help on include/exclude patterns, see the :ref:`borg_patterns` command output. By using ``--dry-run``, you can do all extraction steps except actually writing the - output data: reading metadata and data chunks from the repo, checking the hash/hmac, - decrypting, decompressing. + output data: reading metadata and data chunks from the repository, checking the hash/HMAC, + decrypting, and decompressing. ``--progress`` can be slower than no progress display, since it makes one additional pass over the archive metadata. @@ -4217,8 +4217,8 @@ class Archiver: so make sure you ``cd`` to the right place before calling ``borg extract``. When parent directories are not extracted (because of using file/directory selection - or any other reason), borg can not restore parent directories' metadata, e.g. owner, - group, permission, etc. + or any other reason), Borg cannot restore parent directories' metadata, e.g., owner, + group, permissions, etc. """) subparser = subparsers.add_parser('extract', parents=[common_parser], add_help=False, description=self.do_extract.__doc__, @@ -4268,11 +4268,11 @@ class Archiver: Please note that the deduplicated sizes of the individual archives do not add up to the deduplicated size of the repository ("all archives"), because the two - are meaning different things: + mean different things: This archive / deduplicated size = amount of data stored ONLY for this archive = unique chunks of this archive. - All archives / deduplicated size = amount of data stored in the repo + All archives / deduplicated size = amount of data stored in the repository = all chunks in the repository. Borg archives can only contain a limited amount of file metadata. @@ -4295,12 +4295,12 @@ class Archiver: # borg version version_epilog = process_epilog(""" - This command displays the borg client version / borg server version. + This command displays the Borg client version / Borg server version. - If a local repo is given, the client code directly accesses the repository, + If a local repository is given, the client code directly accesses the repository, thus we show the client version also as the server version. - If a remote repo is given (e.g. ssh:), the remote borg is queried and + If a remote repository is given (e.g., ssh:), the remote Borg is queried and its version is displayed as the server version. Examples:: @@ -4313,10 +4313,10 @@ class Archiver: $ borg version ssh://borg@borgbackup:repo 1.4.0a / 1.2.7 - Due to the version tuple format used in borg client/server negotiation, only - a simplified version is displayed (as provided by borg.version.format_version). + Due to the version tuple format used in Borg client/server negotiation, only + a simplified version is displayed (as provided by ``borg.version.format_version``). - There is also borg --version to display a potentially more precise client version. + There is also ``borg --version`` to display a potentially more precise client version. """) subparser = subparsers.add_parser('version', parents=[common_parser], add_help=False, description=self.do_version.__doc__, epilog=version_epilog, @@ -4350,9 +4350,9 @@ class Archiver: Borg will: 1. Ask you to come up with a passphrase. - 2. Create a borg key (which contains 3 random secrets. See :ref:`key_files`). + 2. Create a Borg key (which contains three random secrets. See :ref:`key_files`). 3. Encrypt the key with your passphrase. - 4. Store the encrypted borg key inside the repository directory (in the repo config). + 4. Store the encrypted Borg key inside the repository directory (in the repo config). This is why it is essential to use a secure passphrase. 5. Encrypt and sign your backups to prevent anyone from reading or forging them unless they have the key and know the passphrase. Make sure to keep a backup of @@ -4360,7 +4360,7 @@ class Archiver: "leaving your keys inside your car" (see :ref:`borg_key_export`). For remote backups the encryption is done locally - the remote machine never sees your passphrase, your unencrypted key or your unencrypted files. - Chunking and id generation are also based on your key to improve + Chunking and ID generation are also based on your key to improve your privacy. 6. Use the key when extracting files to decrypt them and to verify that the contents of the backups have not been accidentally or maliciously altered. @@ -4370,20 +4370,20 @@ class Archiver: Make sure you use a good passphrase. Not too short, not too simple. The real encryption / decryption key is encrypted with / locked by your passphrase. - If an attacker gets your key, he can't unlock and use it without knowing the + If an attacker gets your key, they can't unlock and use it without knowing the passphrase. - Be careful with special or non-ascii characters in your passphrase: + Be careful with special or non-ASCII characters in your passphrase: - - Borg processes the passphrase as unicode (and encodes it as utf-8), + - Borg processes the passphrase as Unicode (and encodes it as UTF-8), so it does not have problems dealing with even the strangest characters. - BUT: that does not necessarily apply to your OS / VM / keyboard configuration. - So better use a long passphrase made from simple ascii chars than one that - includes non-ascii stuff or characters that are hard/impossible to enter on + So it is better to use a long passphrase made from simple ASCII characters than one that + includes non-ASCII characters or characters that are hard or impossible to enter on a different keyboard layout. - You can change your passphrase for existing repos at any time, it won't affect + You can change your passphrase for existing repositories at any time; it won't affect the encryption/decryption key or other secrets. More encryption modes @@ -4496,25 +4496,25 @@ class Archiver: key_export_epilog = process_epilog(""" If repository encryption is used, the repository is inaccessible - without the key. This command allows one to backup this essential key. + without the key. This command allows one to back up this essential key. Note that the backup produced does not include the passphrase itself - (i.e. the exported key stays encrypted). In order to regain access to a + (i.e., the exported key stays encrypted). In order to regain access to a repository, one needs both the exported key and the original passphrase. There are three backup formats. The normal backup format is suitable for digital storage as a file. The ``--paper`` backup format is optimized - for printing and typing in while importing, with per line checks to - reduce problems with manual input. The ``--qr-html`` creates a printable + for printing and typing in while importing, with per-line checks to + reduce problems with manual input. The ``--qr-html`` option creates a printable HTML template with a QR code and a copy of the ``--paper``-formatted key. - For repositories using keyfile encryption the key is saved locally + For repositories using key file encryption the key is saved locally on the system that is capable of doing backups. To guard against loss of this key, the key needs to be backed up independently of the main data backup. - For repositories using the repokey encryption the key is saved in the - repository in the config file. A backup is thus not strictly needed, - but guards against the repository becoming inaccessible if the file + For repositories using repokey encryption, the key is saved in the + repository's config file. A backup is thus not strictly needed, + but it guards against the repository becoming inaccessible if the file is damaged for some reason. Examples:: @@ -4549,12 +4549,12 @@ class Archiver: If the ``--paper`` option is given, the import will be an interactive process in which each line is checked for plausibility before - proceeding to the next line. For this format PATH must not be given. + proceeding to the next line. For this format, PATH must not be provided. - For repositories using keyfile encryption, the key file which ``borg key + For repositories using key file encryption, the key file which ``borg key import`` writes to depends on several factors. If the ``BORG_KEY_FILE`` environment variable is set and non-empty, ``borg key import`` creates - or overwrites that file named by ``$BORG_KEY_FILE``. Otherwise, ``borg + or overwrites the file named by ``$BORG_KEY_FILE``. Otherwise, ``borg key import`` searches in the ``$BORG_KEYS_DIR`` directory for a key file associated with the repository. If a key file is found in ``$BORG_KEYS_DIR``, ``borg key import`` overwrites it; otherwise, ``borg @@ -4574,12 +4574,12 @@ class Archiver: help='interactively import from a backup done with ``--paper``') change_passphrase_epilog = process_epilog(""" - The key files used for repository encryption are optionally passphrase + The key files used for repository encryption are optionally passphrase- protected. This command can be used to change this passphrase. Please note that this command only changes the passphrase, but not any - secret protected by it (like e.g. encryption/MAC keys or chunker seed). - Thus, changing the passphrase after passphrase and borg key got compromised + secret protected by it (e.g., encryption/MAC keys or the chunker seed). + Thus, changing the passphrase after the passphrase and Borg key were compromised does not protect future (nor past) backups to the same repository. """) subparser = key_parsers.add_parser('change-passphrase', parents=[common_parser], add_help=False, @@ -4595,10 +4595,10 @@ class Archiver: This command migrates a repository from passphrase mode (removed in Borg 1.0) to repokey mode. - You will be first asked for the repository passphrase (to open it in passphrase - mode). This is the same passphrase as you used to use for this repo before 1.0. + You will first be asked for the repository passphrase (to open it in passphrase + mode). This is the same passphrase you used for this repository before 1.0. - It will then derive the different secrets from this passphrase. + The different secrets will then be derived from this passphrase. Then you will be asked for a new passphrase (twice, for safety). This passphrase will be used to protect the repokey (which contains these same @@ -4606,7 +4606,7 @@ class Archiver: use, but you may also use a different one. After migrating to repokey mode, you can change the passphrase at any time. - But please note: the secrets will always stay the same and they could always + Please note: the secrets will always stay the same, and they could always be derived from your (old) passphrase-mode passphrase. """) subparser = key_parsers.add_parser('migrate-to-repokey', parents=[common_parser], add_help=False, @@ -4629,7 +4629,7 @@ class Archiver: The FORMAT specifier syntax +++++++++++++++++++++++++++ - The ``--format`` option uses python's `format string syntax + The ``--format`` option uses Python's `format string syntax `_. Examples: @@ -4748,9 +4748,9 @@ class Archiver: The rules are applied from secondly to yearly, and backups selected by previous rules do not count towards those of later rules. The time that each backup starts is used for pruning purposes. Dates and times are interpreted in - the local timezone, and weeks go from Monday to Sunday. Specifying a - negative number of archives to keep means that there is no limit. As of borg - 1.2.0, borg will retain the oldest archive if any of the secondly, minutely, + the local time zone, and weeks go from Monday to Sunday. Specifying a + negative number of archives to keep means that there is no limit. As of Borg + 1.2.0, Borg will retain the oldest archive if any of the secondly, minutely, hourly, daily, weekly, monthly, quarterly, or yearly rules was not otherwise able to meet its retention target. This enables the first chronological archive to continue aging until it is replaced by a newer archive that meets the @@ -4818,7 +4818,7 @@ class Archiver: recreate_epilog = process_epilog(""" Recreate the contents of existing archives. - recreate is a potentially dangerous function and might lead to data loss + Recreate is a potentially dangerous operation and might lead to data loss (if used wrongly). BE VERY CAREFUL! Important: Repository disk space is **not** freed until you run ``borg compact``. @@ -4836,34 +4836,34 @@ class Archiver: incorrect information for archives that were not recreated at the same time. There is no risk of data loss by this. - ``--chunker-params`` will re-chunk all files in the archive, this can be + ``--chunker-params`` will re-chunk all files in the archive; this can be used to have upgraded Borg 0.xx or Attic archives deduplicate with Borg 1.x archives. **USE WITH CAUTION.** Depending on the PATHs and patterns given, recreate can be used to permanently delete files from archives. - When in doubt, use ``--dry-run --verbose --list`` to see how patterns/PATHS are + When in doubt, use ``--dry-run --verbose --list`` to see how patterns/PATHs are interpreted. See :ref:`list_item_flags` in ``borg create`` for details. The archive being recreated is only removed after the operation completes. The archive that is built during the operation exists at the same time at ".recreate". The new archive will have a different archive ID. - With ``--target`` the original archive is not replaced, instead a new archive is created. + With ``--target`` the original archive is not replaced; instead, a new archive is created. When rechunking (or recompressing), space usage can be substantial - expect at least the entire deduplicated size of the archives using the previous - chunker (or compression) params. + chunker (or compression) parameters. - If you recently ran borg check --repair and it had to fix lost chunks with all-zero + If you recently ran ``borg check --repair`` and it had to fix lost chunks with all-zero replacement chunks, please first run another backup for the same data and re-run - borg check --repair afterwards to heal any archives that had lost chunks which are + ``borg check --repair`` afterwards to heal any archives that had lost chunks which are still generated from the input data. - Important: running borg recreate to re-chunk will remove the chunks_healthy + Important: running ``borg recreate`` to re-chunk will remove the ``chunks_healthy`` metadata of all items with replacement chunks, so healing will not be possible - any more after re-chunking (it is also unlikely it would ever work: due to the + anymore after re-chunking (it is also unlikely it would ever work: due to the change of chunking parameters, the missing chunk likely will never be seen again even if you still have the data that produced it). """) @@ -4979,7 +4979,7 @@ class Archiver: # borg umount umount_epilog = process_epilog(""" - This command un-mounts a FUSE filesystem that was mounted with ``borg mount``. + This command unmounts a FUSE filesystem that was mounted with ``borg mount``. This is a convenience wrapper that just calls the platform-specific shell command - usually this is either umount or fusermount -u. @@ -5020,8 +5020,8 @@ class Archiver: This is a convenient method to just trust all archives present - if an archive does not have TAM authentication yet, a TAM will be added. Archives created by old borg versions < 1.0.9 do not have TAMs. - Archives created by newer borg version should have TAMs already. - If you have a high risk environment, you should not just run this, + Archives created by newer borg versions should have TAMs already. + If you have a high-risk environment, you should not just run this, but first verify that the archives are authentic and not malicious (== have good content, have a good timestamp). Borg 1.2.5+ needs all archives to be TAM authenticated for safety reasons. @@ -5063,7 +5063,7 @@ class Archiver: See ``--chunker-params`` option of ``borg create`` and ``borg recreate``. ``borg upgrade`` will change the magic strings in the repository's - segments to match the new Borg magic strings. The keyfiles found in + segments to match the new Borg magic strings. The key files found in $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and copied to $BORG_KEYS_DIR or ~/.config/borg/keys. @@ -5071,11 +5071,11 @@ class Archiver: ~/.cache/attic to $BORG_CACHE_DIR or ~/.cache/borg, but the cache layout between Borg and Attic changed, so it is possible the first backup after the conversion takes longer than expected - due to the cache resync. + due to the cache re-sync. - Upgrade should be able to resume if interrupted, although it + The upgrade should be able to resume if interrupted, although it will still iterate over all segments. If you want to start - from scratch, use `borg delete` over the copied repository to + from scratch, use ``borg delete`` over the copied repository to make sure the cache files are also removed:: borg delete borg @@ -5129,14 +5129,14 @@ class Archiver: $ borg with-lock /mnt/borgrepo rsync -av /mnt/borgrepo /somewhere/else/borgrepo It will first try to acquire the lock (make sure that no other operation is - running in the repo), then execute the given command as a subprocess and wait - for its termination, release the lock and return the user command's return - code as borg's return code. + running in the repository), then execute the given command as a subprocess and wait + for its termination, release the lock, and return the user command's return + code as Borg's return code. .. note:: If you copy a repository with the lock held, the lock will be present in - the copy. Thus, before using borg on the copy from a different host, + the copy. Thus, before using Borg on the copy from a different host, you need to use "borg break-lock" on the copied repository, because Borg is cautious and does not automatically remove stale locks made by a different host. """) @@ -5158,9 +5158,9 @@ class Archiver: import_tar_epilog = process_epilog(""" This command creates a backup archive from a tarball. - When giving '-' as path, Borg will read a tar stream from standard input. + When giving '-' as a path, Borg will read a tar stream from standard input. - By default (--tar-filter=auto) Borg will detect whether the file is compressed + By default (``--tar-filter=auto``) Borg will detect whether the file is compressed based on its file extension and pipe the file through an appropriate filter: - .tar.gz or .tgz: gzip -d @@ -5169,11 +5169,11 @@ class Archiver: - .tar.zstd or .tar.zst: zstd -d - .tar.lz4: lz4 -d - Alternatively, a --tar-filter program may be explicitly specified. It should + Alternatively, a ``--tar-filter`` program may be explicitly specified. It should read compressed data from stdin and output an uncompressed tar stream on stdout. - Most documentation of borg create applies. Note that this command does not + Most documentation of ``borg create`` applies. Note that this command does not support excluding files. import-tar is a lossy conversion: From c956e1f50e49998ba437e8dc5a898704bed0d685 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 11 Aug 2025 18:01:24 +0200 Subject: [PATCH 08/16] changes.rst: fix typos and grammar --- docs/changes.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index 3bc2fe85c..bc073c891 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -1232,7 +1232,7 @@ Fixes: fix potential memory leak with ssh: remote repository with partial extraction. - remove empty shadowed_segments lists, #5275 - fix bad default: manifest.archives.list(consider_checkpoints=False), - fixes tracebacks / KeyErros for missing objects in ChunkIndex, #5668 + fixes tracebacks / KeyErrors for missing objects in ChunkIndex, #5668 New features: @@ -1243,7 +1243,7 @@ New features: - detect all-zero chunks in read data in "buzhash" and "fixed" chunkers - cached_hash: use a small LRU cache to accelerate all-zero chunks hashing - use cached_hash also to generate all-zero replacement chunks -- create --remote-buffer, add a upload buffer for remote repos, #5574 +- create --remote-buffer, add an upload buffer for remote repos, #5574 - prune: keep oldest archive when retention target not met Other changes: @@ -1255,7 +1255,7 @@ Other changes: - create: add repository location to --stats output, #5491 - check: debug log the segment filename - delete: add a --list switch to borg delete, #5116 -- borg debug dump-hints - implemented to e.g. to look at shadow_index +- borg debug dump-hints - implemented e.g. to look at shadow_index - Tab completion support for additional archives for 'borg delete' - refactor: have one borg.constants.zero all-zero bytes object - refactor shadow_index updating repo.put/delete, #5661, #5636. @@ -1291,12 +1291,12 @@ Fixes: New features: - create: implement --paths-from-stdin and --paths-from-command, see #5492. - These switches read paths to archive from stdin. Delimiter can specified - by --paths-delimiter=DELIM. Paths read will be added honoring every - option but exclusion options and --one-file-system. borg won't recurse + These switches read paths to archive from stdin. Delimiter can be specified + by --paths-delimiter=DELIM. Paths read will be added honoring all + options except exclusion options and --one-file-system. borg won't recurse into directories. - 'obfuscate' pseudo compressor obfuscates compressed chunk size in repo -- add pyfuse3 (successor of llfuse) as an alternative lowlevel fuse +- add pyfuse3 (successor of llfuse) as an alternative low-level FUSE implementation to llfuse (deprecated), #5407. FUSE implementation can be switched via env var BORG_FUSE_IMPL. - allow appending to the files cache filename with BORG_FILES_CACHE_SUFFIX From eab8be76a086d8866b8c4f9470fdde465d99574e Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 11 Aug 2025 21:15:14 +0200 Subject: [PATCH 09/16] _*.c: fix typos and grammar --- src/borg/_chunker.c | 30 +++++++++++++++--------------- src/borg/_hashindex.c | 22 +++++++++++----------- src/borg/crypto/_crypto_helpers.c | 2 +- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/borg/_chunker.c b/src/borg/_chunker.c index e5d9b54f8..73bbb0a1b 100644 --- a/src/borg/_chunker.c +++ b/src/borg/_chunker.c @@ -19,15 +19,15 @@ Some properties of buzhash / of this implementation: the hash function, e.g. in "X X", the last X would cancel out the influence of the first X on the hash value. -(2) the hash table is supposed to have (according to the BUZ) exactly a 50% distribution of - 0/1 bit values per position, but the hard coded table below doesn't fit that property. +(2) The hash table is supposed to have (according to the BUZ) exactly a 50% distribution of + 0/1 bit values per position, but the hard-coded table below doesn't fit that property. -(3) if you would use a window size divisible by 64, the seed would cancel itself out completely. - this is why we use a window size of 4095 bytes. +(3) If you would use a window size divisible by 64, the seed would cancel itself out completely. + This is why we use a window size of 4095 bytes. Another quirk is that, even with the 4095 byte window, XORing the entire table by a constant -is equivalent to XORing the hash output with a different constant. but since the seed is stored -encrypted, i think it still serves its purpose. +is equivalent to XORing the hash output with a different constant. But since the seed is stored +encrypted, I think it still serves its purpose. */ static uint32_t table_base[] = @@ -174,7 +174,7 @@ chunker_fill(Chunker *c) off_t offset = c->bytes_read; #endif - // if we have a os-level file descriptor, use os-level API + // If we have an OS-level file descriptor, use an OS-level API n = read(c->fh, c->data + c->position + c->remaining, n); if(n > 0) { c->remaining += n; @@ -197,23 +197,23 @@ chunker_fill(Chunker *c) if (pagemask == 0) pagemask = getpagesize() - 1; - // We tell the OS that we do not need the data that we just have read any + // We tell the OS that we no longer need the data we have just read any // more (that it maybe has in the cache). This avoids that we spoil the // complete cache with data that we only read once and (due to cache - // size limit) kick out data from the cache that might be still useful + // size limit) kick out data from the cache that might still be useful // for the OS or other processes. - // We rollback the initial offset back to the start of the page, - // to avoid it not being truncated as a partial page request. + // We roll back the initial offset to the start of the page, + // to avoid it being truncated as a partial page request. int overshoot; if (length > 0) { // All Linux kernels (at least up to and including 4.6(.0)) have a bug where - // they truncate last partial page of POSIX_FADV_DONTNEED request, so we need + // they truncate the last partial page of a POSIX_FADV_DONTNEED request, so we need // to page-align it ourselves. We'll need the rest of this page on the next // read (assuming this was not EOF). overshoot = (offset + length) & pagemask; } else { // For length == 0 we set overshoot 0, so the below - // length - overshoot is 0, which means till end of file for + // length - overshoot is 0, which means to the end of the file for // fadvise. This will cancel the final page and is not part // of the above workaround. overshoot = 0; @@ -225,7 +225,7 @@ chunker_fill(Chunker *c) PyEval_RestoreThread(thread_state); } else { - // no os-level file descriptor, use Python file object API + // No OS-level file descriptor, use Python file object API data = PyObject_CallMethod(c->fd, "read", "i", n); if(!data) { return 0; @@ -266,7 +266,7 @@ chunker_process(Chunker *c) return NULL; } } - /* here we either are at eof ... */ + /* Here we are either at EOF ... */ if(c->eof) { c->done = 1; if(c->remaining) { diff --git a/src/borg/_hashindex.c b/src/borg/_hashindex.c index deeadaab9..93e08a181 100644 --- a/src/borg/_hashindex.c +++ b/src/borg/_hashindex.c @@ -59,18 +59,18 @@ typedef struct { #endif } HashIndex; -/* prime (or w/ big prime factors) hash table sizes - * not sure we need primes for borg's usage (as we have a hash function based - * on sha256, we can assume an even, seemingly random distribution of values), +/* Prime (or with big prime factors) hash table sizes + * Not sure we need primes for Borg's usage (as we have a hash function based + * on SHA-256, we can assume an even, seemingly random distribution of values), * but OTOH primes don't harm. - * also, growth of the sizes starts with fast-growing 2x steps, but slows down - * more and more down to 1.1x. this is to avoid huge jumps in memory allocation, + * Also, growth of the sizes starts with fast-growing 2x steps but slows down + * more and more down to 1.1x. This is to avoid huge jumps in memory allocation, * like e.g. 4G -> 8G. * these values are generated by hash_sizes.py. * - * update: no, we don't need primes or w/ big prime factors, we followed some + * Update: no, we don't need primes or with big prime factors; we followed some * incomplete / irrelevant advice here that did not match our use case. - * otoh, for now, we do not need to change the sizes as they do no harm. + * OTOH, for now, we do not need to change the sizes as they do no harm. * see ticket #2830. */ static int hash_sizes[] = { @@ -82,7 +82,7 @@ static int hash_sizes[] = { 306647623, 337318939, 370742809, 408229973, 449387209, 493428073, 543105119, 596976533, 657794869, 722676499, 795815791, 874066969, 962279771, 1057701643, 1164002657, 1280003147, 1407800297, 1548442699, - 1703765389, 1873768367, 2062383853, /* 32bit int ends about here */ + 1703765389, 1873768367, 2062383853, /* 32-bit int ends about here */ }; #define HASH_MIN_LOAD .25 @@ -326,7 +326,7 @@ hashindex_read(PyObject *file_py, int permit_compact) Py_XDECREF(tmp); if(PyErr_Occurred()) { if(PyErr_ExceptionMatches(PyExc_AttributeError)) { - /* Be able to work with regular file objects which do not have a hash_part method. */ + /* Be able to work with regular file objects that do not have a hash_part method. */ PyErr_Clear(); } else { goto fail_decref_header; @@ -341,7 +341,7 @@ hashindex_read(PyObject *file_py, int permit_compact) length = PyNumber_AsSsize_t(length_object, PyExc_OverflowError); Py_DECREF(length_object); if(PyErr_Occurred()) { - /* This shouldn't generally happen; but can if seek() returns something that's not a number */ + /* This shouldn't generally happen, but it can if seek() returns something that's not a number */ goto fail_decref_header; } @@ -528,7 +528,7 @@ hashindex_write(HashIndex *index, PyObject *file_py) Py_XDECREF(tmp); if(PyErr_Occurred()) { if(PyErr_ExceptionMatches(PyExc_AttributeError)) { - /* Be able to work with regular file objects which do not have a hash_part method. */ + /* Be able to work with regular file objects that do not have a hash_part method. */ PyErr_Clear(); } else { return; diff --git a/src/borg/crypto/_crypto_helpers.c b/src/borg/crypto/_crypto_helpers.c index 856736aba..265fcb4d7 100644 --- a/src/borg/crypto/_crypto_helpers.c +++ b/src/borg/crypto/_crypto_helpers.c @@ -1,4 +1,4 @@ -/* some helpers, so our code also works with OpenSSL 1.0.x */ +/* Some helpers so that our code also works with OpenSSL 1.0.x. */ #include #include From 1b68676b8e095d1224f9f92deb4478b1a9e064e3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 11 Aug 2025 22:21:49 +0200 Subject: [PATCH 10/16] github templates: fix typos and grammar --- .github/ISSUE_TEMPLATE.md | 29 ++++++++++++++--------------- .github/PULL_REQUEST_TEMPLATE | 10 +++++----- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e20150c80..cfb87142f 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,56 +1,55 @@ -## Have you checked borgbackup docs, FAQ, and open Github issues? +## Have you checked the BorgBackup docs, FAQ, and open GitHub issues? No -## Is this a BUG / ISSUE report or a QUESTION? +## Is this a bug/issue report or a question? -Invalid +Bug/Issue/Question -## System information. For client/server mode post info for both machines. +## System information. For client/server mode, post info for both machines. #### Your borg version (borg -V). #### Operating system (distribution) and version. -#### Hardware / network configuration, and filesystems used. +#### Hardware/network configuration and filesystems used. #### How much data is handled by borg? -#### Full borg commandline that lead to the problem (leave away excludes and passwords) +#### Full borg command line that led to the problem (leave out excludes and passwords). ## Describe the problem you're observing. #### Can you reproduce the problem? If so, describe how. If not, describe troubleshooting steps you took before opening the issue. -#### Include any warning/errors/backtraces from the system logs +#### Include any warnings/errors/backtraces from the system logs diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE index f01ff53c6..d940a67be 100644 --- a/.github/PULL_REQUEST_TEMPLATE +++ b/.github/PULL_REQUEST_TEMPLATE @@ -1,8 +1,8 @@ -Thank you for contributing code to Borg, your help is appreciated! +Thank you for contributing code to Borg; your help is appreciated! -Please, before you submit a pull request, make sure it complies with the -guidelines given in our documentation: +Before you submit a pull request, please make sure it complies with the +guidelines in our documentation: -https://borgbackup.readthedocs.io/en/latest/development.html#contributions +https://borgbackup.readthedocs.io/en/stable/development.html#contributions -**Please remove all above text before submitting your pull request.** +**Please remove the text above before submitting your pull request.** From 1bb32057d2f2a47938e342960aa3aa9e88ef8b75 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 12 Aug 2025 11:15:11 +0200 Subject: [PATCH 11/16] github workflows: fix typos and grammar --- .github/workflows/ci.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dc490a013..b55f823eb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,4 +1,4 @@ -# badge: https://github.com/borgbackup/borg/workflows/CI/badge.svg?branch=master +# badge: https://github.com/borgbackup/borg/workflows/CI/badge.svg?branch=1.4-maint name: CI @@ -67,7 +67,7 @@ jobs: toxenv: py314-fuse3 - os: macos-14 python-version: '3.11' - toxenv: py311-none # note: no fuse testing, due to #6099, see also #6196. + toxenv: py311-none # Note: no FUSE testing due to #6099; see also #6196. env: # Configure pkg-config to use OpenSSL from Homebrew @@ -80,7 +80,7 @@ jobs: steps: - uses: actions/checkout@v4 with: - # just fetching 1 commit is not enough for setuptools-scm, so we fetch all + # Just fetching one commit is not enough for setuptools-scm, so we fetch all fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 @@ -112,20 +112,20 @@ jobs: run: | python -m pip install --upgrade pip setuptools wheel pip install -r requirements.d/development.txt - - name: Install borgbackup + - name: Install BorgBackup env: - # we already have that in the global env, but something is broken and overwrites that. - # so, set it here, again. + # We already have this in the global environment, but something overrides it. + # So set it here again. PKG_CONFIG_PATH: "/opt/homebrew/opt/openssl@3.0/lib/pkgconfig:$PKG_CONFIG_PATH" run: | pip install -ve . - - name: run pytest via tox + - name: Run pytest via tox env: - # we already have that in the global env, but something is broken and overwrites that. - # so, set it here, again. + # We already have this in the global environment, but something overrides it. + # So set it here again. PKG_CONFIG_PATH: "/opt/homebrew/opt/openssl@3.0/lib/pkgconfig:$PKG_CONFIG_PATH" run: | - # do not use fakeroot, but run as root. avoids the dreaded EISDIR sporadic failures. see #2482. + # Do not use fakeroot; run as root. Avoids the dreaded sporadic EISDIR failures; see #2482. #sudo -E bash -c "tox -e py" tox --skip-missing-interpreters - name: Upload coverage to Codecov From 48ad1a8fa8bd58fedadb551ccf20b09d47585897 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 12 Aug 2025 11:21:50 +0200 Subject: [PATCH 12/16] README: fix typos and grammar --- README.rst | 52 +++++++++++++++++++++++----------------------- README_WINDOWS.rst | 34 +++++++++++++++--------------- 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/README.rst b/README.rst index 2600cf222..f0930da3f 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,7 @@ See the `installation manual`_ or, if you have already downloaded Borg, ``docs/installation.rst`` to get started with Borg. There is also an `offline documentation`_ available, in multiple formats. -.. _installation manual: https://borgbackup.readthedocs.org/en/stable/installation.html +.. _installation manual: https://borgbackup.readthedocs.io/en/stable/installation.html .. _offline documentation: https://readthedocs.org/projects/borgbackup/downloads Main features @@ -57,10 +57,10 @@ Main features **Data encryption** All data can be protected using 256-bit AES encryption, data integrity and - authenticity is verified using HMAC-SHA256. Data is encrypted clientside. + authenticity is verified using HMAC-SHA256. Data is encrypted client-side. **Obfuscation** - Optionally, borg can actively obfuscate e.g. the size of files / chunks to + Optionally, Borg can actively obfuscate, e.g., the size of files/chunks to make fingerprinting attacks more difficult. **Compression** @@ -73,24 +73,24 @@ Main features * lzma (low speed, high compression) **Off-site backups** - Borg can store data on any remote host accessible over SSH. If Borg is - installed on the remote host, big performance gains can be achieved - compared to using a network filesystem (sshfs, nfs, ...). + Borg can store data on any remote host accessible over SSH. If Borg is + installed on the remote host, significant performance gains can be achieved + compared to using a network file system (sshfs, NFS, ...). -**Backups mountable as filesystems** - Backup archives are mountable as userspace filesystems for easy interactive - backup examination and restores (e.g. by using a regular file manager). +**Backups mountable as file systems** + Backup archives are mountable as user-space file systems for easy interactive + backup examination and restores (e.g., by using a regular file manager). **Easy installation on multiple platforms** We offer single-file binaries that do not require installing anything - you can just run them on these platforms: * Linux - * Mac OS X + * macOS * FreeBSD * OpenBSD and NetBSD (no xattrs/ACLs support or binaries yet) * Cygwin (experimental, no binaries yet) - * Linux Subsystem of Windows 10 (experimental) + * Windows Subsystem for Linux (WSL) on Windows 10 (experimental) **Free and Open Source Software** * security and functionality can be audited independently @@ -128,9 +128,9 @@ Now doing another backup, just to show off the great deduplication:: ----------------------------------------------------------------------------- -For a graphical frontend refer to our complementary project `BorgWeb `_. +For a graphical frontend, refer to our complementary project `BorgWeb `_. -Helping, Donations and Bounties, becoming a Patron +Helping, donations and bounties, becoming a Patron -------------------------------------------------- Your help is always welcome! @@ -144,17 +144,17 @@ https://www.borgbackup.org/support/fund.html Links ----- -* `Main Web Site `_ +* `Main website `_ * `Releases `_, `PyPI packages `_ and - `ChangeLog `_ -* `Offline Documentation `_ + `Changelog `_ +* `Offline documentation `_ * `GitHub `_ and - `Issue Tracker `_. -* `Web-Chat (IRC) `_ and - `Mailing List `_ -* `License `_ -* `Security contact `_ + `Issue tracker `_. +* `Web chat (IRC) `_ and + `Mailing list `_ +* `License `_ +* `Security contact `_ Compatibility notes ------------------- @@ -175,15 +175,15 @@ see ``docs/support.rst`` in the source distribution). .. |doc| image:: https://readthedocs.org/projects/borgbackup/badge/?version=stable :alt: Documentation - :target: https://borgbackup.readthedocs.org/en/stable/ + :target: https://borgbackup.readthedocs.io/en/stable/ -.. |build| image:: https://github.com/borgbackup/borg/workflows/CI/badge.svg?branch=master - :alt: Build Status (master) +.. |build| image:: https://github.com/borgbackup/borg/workflows/CI/badge.svg?branch=1.4-maint + :alt: Build Status (1.4-maint) :target: https://github.com/borgbackup/borg/actions -.. |coverage| image:: https://codecov.io/github/borgbackup/borg/coverage.svg?branch=master +.. |coverage| image:: https://codecov.io/github/borgbackup/borg/coverage.svg?branch=1.4-maint :alt: Test Coverage - :target: https://codecov.io/github/borgbackup/borg?branch=master + :target: https://codecov.io/github/borgbackup/borg?branch=1.4-maint .. |screencast_basic| image:: https://asciinema.org/a/133292.png :alt: BorgBackup Basic Usage diff --git a/README_WINDOWS.rst b/README_WINDOWS.rst index 520bf47c9..a61324687 100644 --- a/README_WINDOWS.rst +++ b/README_WINDOWS.rst @@ -1,48 +1,48 @@ -Borg Native on Windows +Borg native on Windows ====================== -Running borg natively on windows is in a early alpha stage. Expect many things to fail. -Do not use the native windows build on any data which you do not want to lose! +Running Borg natively on Windows is in an early alpha stage. Expect many things to fail. +Do not use the native Windows build on any data that you do not want to lose! Build Requirements ------------------ - VC 14.0 Compiler -- OpenSSL Library v1.1.1c, 64bit (available at https://github.com/python/cpython-bin-deps) - Please use the `win-download-openssl.ps1` script to download and extract the library to +- OpenSSL Library v1.1.1c, 64-bit (available at https://github.com/python/cpython-bin-deps) + Use the `win-download-openssl.ps1` script to download and extract the library to the correct location. See also the OpenSSL section below. -- Patience and a lot of coffee / beer +- Patience and a lot of coffee/beer What's working -------------- .. note:: The following examples assume that the `BORG_REPO` and `BORG_PASSPHRASE` environment variables are set - if the repo or passphrase is not explicitly given. + when the repository or passphrase is not explicitly provided. - Borg does not crash if called with ``borg`` -- ``borg init --encryption repokey-blake2 ./demoRepo`` runs without an error/warning. - Note that absolute paths only work if the protocol is explicitly set to file:// +- ``borg init --encryption repokey-blake2 ./demoRepo`` runs without errors or warnings. + Note that absolute paths only work if the protocol is explicitly set to ``file://`` - ``borg create ::backup-{now} D:\DemoData`` works as expected. - ``borg list`` works as expected. -- ``borg extract --strip-components 1 ::backup-XXXX`` works. - If absolute paths are extracted, it's important to pass ``--strip-components 1`` as +- ``borg extract --strip-components 1 ::backup-XXXX`` works. + If absolute paths are extracted, it is important to pass ``--strip-components 1``, otherwise the data is restored to the original location! What's NOT working ------------------ -- Extracting a backup which was created on windows machine on a non windows machine will fail. -- And many things more. +- Extracting a backup created on a Windows machine on a non-Windows machine will fail. +- Many other things. OpenSSL, Windows and Python --------------------------- Windows does not ship OpenSSL by default, so we need to get the library from somewhere else. -However, a default python installation does include `libcrypto` which is required by borg. -The only things which are missing to build borg are the header and `*.lib` files. -Luckily the python developers provide all required files in a separate repository. +However, a default Python installation does include `libcrypto`, which is required by Borg. +The only things missing to build Borg are the header and `*.lib` files. +Luckily, the Python developers provide all required files in a separate repository. The `win-download-openssl.ps1` script can be used to download the package from https://github.com/python/cpython-bin-deps and extract the files to the correct location. -For Anaconda, the required libraries can be installed with `conda install -c anaconda openssl`. +For Anaconda, the required libraries can be installed with ``conda install -c anaconda openssl``. From 87af3a4b47975c3bd64ad25cd3b266fb7cf7d343 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 12 Aug 2025 16:56:00 +0200 Subject: [PATCH 13/16] paperkey.html: fix typos, grammar, html structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original markup included a paragraph element wrapping a block-level pre element, which is invalid per HTML’s content model (a p can only contain phrasing content; pre is flow content). The fix separated text and pre blocks into valid sibling elements, ensuring no pre is nested inside a p. --- src/borg/paperkey.html | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/src/borg/paperkey.html b/src/borg/paperkey.html index 8dd818cbd..4143bc7cd 100644 --- a/src/borg/paperkey.html +++ b/src/borg/paperkey.html @@ -2,21 +2,21 @@ BorgBackup Printable Key Template - +