diff --git a/.travis/install.sh b/.travis/install.sh
index 73e292ddd..6ae408992 100755
--- a/.travis/install.sh
+++ b/.travis/install.sh
@@ -15,6 +15,7 @@ if [[ "$(uname -s)" == 'Darwin' ]]; then
fi
brew install lz4
+ brew install xz # required for python lzma module
brew outdated pyenv || brew upgrade pyenv
case "${TOXENV}" in
diff --git a/README.rst b/README.rst
index c0eed00ce..2417b24d5 100644
--- a/README.rst
+++ b/README.rst
@@ -107,7 +107,7 @@ Now doing another backup, just to show off the great deduplication::
-----------------------------------------------------------------------------
-For a graphical frontend refer to our complementary project `BorgWeb `_.
+For a graphical frontend refer to our complementary project `BorgWeb `_.
Links
=====
diff --git a/Vagrantfile b/Vagrantfile
index 8ce3d3548..7a26c26f1 100644
--- a/Vagrantfile
+++ b/Vagrantfile
@@ -54,14 +54,15 @@ def packages_darwin
# install all the (security and other) updates
sudo softwareupdate --install --all
# get osxfuse 3.0.x pre-release code from github:
- curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.0.9/osxfuse-3.0.9.dmg >osxfuse.dmg
+ curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.2.0/osxfuse-3.2.0.dmg >osxfuse.dmg
MOUNTDIR=$(echo `hdiutil mount osxfuse.dmg | tail -1 | awk '{$1="" ; print $0}'` | xargs -0 echo) \
- && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.0.9.pkg" -target /
+ && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.2.0.pkg" -target /
sudo chown -R vagrant /usr/local # brew must be able to create stuff here
ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
brew update
brew install openssl
brew install lz4
+ brew install xz # required for python lzma module
brew install fakeroot
brew install git
brew install pkgconfig
diff --git a/borg/_chunker.c b/borg/_chunker.c
index 0f9494e79..7f772ca4b 100644
--- a/borg/_chunker.c
+++ b/borg/_chunker.c
@@ -184,9 +184,9 @@ chunker_fill(Chunker *c)
length = c->bytes_read - offset;
#if ( ( _XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L ) && defined(POSIX_FADV_DONTNEED) )
- // Only do it once per run.
- if (pagemask == 0)
- pagemask = getpagesize() - 1;
+ // Only do it once per run.
+ if (pagemask == 0)
+ pagemask = getpagesize() - 1;
// We tell the OS that we do not need the data that we just have read any
// more (that it maybe has in the cache). This avoids that we spoil the
@@ -207,7 +207,7 @@ chunker_fill(Chunker *c)
// fadvise. This will cancel the final page and is not part
// of the above workaround.
overshoot = 0;
- }
+ }
posix_fadvise(c->fh, offset & ~pagemask, length - overshoot, POSIX_FADV_DONTNEED);
#endif
diff --git a/borg/archive.py b/borg/archive.py
index 0b5bc773d..b5a105b1a 100644
--- a/borg/archive.py
+++ b/borg/archive.py
@@ -15,13 +15,14 @@ import sys
import time
from io import BytesIO
from . import xattr
-from .compress import Compressor, COMPR_BUFFER
+from .compress import COMPR_BUFFER
from .constants import * # NOQA
from .helpers import Chunk, Error, uid2user, user2uid, gid2group, group2gid, \
parse_timestamp, to_localtime, format_time, format_timedelta, safe_encode, safe_decode, \
Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \
ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \
- PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume
+ PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume, \
+ CompressionDecider1, CompressionDecider2, CompressionSpec
from .repository import Repository
from .platform import acl_get, acl_set
from .chunker import Chunker
@@ -125,7 +126,7 @@ class Archive:
def __init__(self, repository, key, manifest, name, cache=None, create=False,
checkpoint_interval=300, numeric_owner=False, progress=False,
- chunker_params=CHUNKER_PARAMS, start=None, end=None):
+ chunker_params=CHUNKER_PARAMS, start=None, end=None, compression=None, compression_files=None):
self.cwd = os.getcwd()
self.key = key
self.repository = repository
@@ -148,6 +149,9 @@ class Archive:
if create:
self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
+ self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'),
+ compression_files or [])
+ key.compression_decider2 = CompressionDecider2(compression or CompressionSpec('none'))
if name in manifest.archives:
raise self.AlreadyExists(name)
self.last_checkpoint = time.time()
@@ -601,11 +605,15 @@ Number of files: {0.stats.nfiles}'''.format(
}
# Only chunkify the file if needed
if chunks is None:
+ compress = self.compression_decider1.decide(path)
+ logger.debug('%s -> compression %s', path, compress['name'])
fh = Archive._open_rb(path)
with os.fdopen(fh, 'rb') as fd:
chunks = []
for data in self.chunker.chunkify(fd, fh):
- chunks.append(cache.add_chunk(self.key.id_hash(data), Chunk(data), self.stats))
+ chunks.append(cache.add_chunk(self.key.id_hash(data),
+ Chunk(data, compress=compress),
+ self.stats))
if self.show_progress:
self.stats.show_progress(item=item, dt=0.2)
cache.memorize_file(path_hash, st, [c.id for c in chunks])
@@ -948,7 +956,7 @@ class ArchiveRecreater:
def __init__(self, repository, manifest, key, cache, matcher,
exclude_caches=False, exclude_if_present=None, keep_tag_files=False,
- chunker_params=None, compression=None,
+ chunker_params=None, compression=None, compression_files=None,
dry_run=False, stats=False, progress=False, file_status_printer=None):
self.repository = repository
self.key = key
@@ -961,12 +969,12 @@ class ArchiveRecreater:
self.keep_tag_files = keep_tag_files
self.chunker_params = chunker_params or CHUNKER_PARAMS
- self.compression = compression or dict(name='none')
- self.seen_chunks = set()
self.recompress = bool(compression)
- compr_args = dict(buffer=COMPR_BUFFER)
- compr_args.update(self.compression)
- key.compressor = Compressor(**compr_args)
+ self.compression = compression or CompressionSpec('none')
+ self.seen_chunks = set()
+ self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'),
+ compression_files or [])
+ key.compression_decider2 = CompressionDecider2(compression or CompressionSpec('none'))
self.autocommit_threshold = max(self.AUTOCOMMIT_THRESHOLD, self.cache.chunks_stored_size() / 100)
logger.debug("Autocommit threshold: %s", format_file_size(self.autocommit_threshold))
@@ -1054,6 +1062,7 @@ class ArchiveRecreater:
def process_chunks(self, archive, target, item):
"""Return new chunk ID list for 'item'."""
+ # TODO: support --compression-from
if not self.recompress and not target.recreate_rechunkify:
for chunk_id, size, csize in item[b'chunks']:
self.cache.chunk_incref(chunk_id, target.stats)
@@ -1248,7 +1257,7 @@ class ArchiveRecreater:
def create_target_archive(self, name):
target = Archive(self.repository, self.key, self.manifest, name, create=True,
progress=self.progress, chunker_params=self.chunker_params, cache=self.cache,
- checkpoint_interval=0)
+ checkpoint_interval=0, compression=self.compression)
target.recreate_partial_chunks = None
target.recreate_uncomitted_bytes = 0
return target
diff --git a/borg/archiver.py b/borg/archiver.py
index a6a0e3241..7c378567e 100644
--- a/borg/archiver.py
+++ b/borg/archiver.py
@@ -9,9 +9,11 @@ import hashlib
import inspect
import io
import os
+import re
import shlex
import signal
import stat
+import subprocess
import sys
import textwrap
import traceback
@@ -34,6 +36,7 @@ from .constants import * # NOQA
from .key import key_creator, RepoKey, PassphraseKey
from .archive import Archive, ArchiveChecker, ArchiveRecreater
from .remote import RepositoryServer, RemoteRepository, cache_if_remote
+from .selftest import selftest
from .hashindex import ChunkIndexEntry
has_lchflags = hasattr(os, 'lchflags')
@@ -285,14 +288,12 @@ class Archiver:
dry_run = args.dry_run
t0 = datetime.utcnow()
if not dry_run:
- compr_args = dict(buffer=COMPR_BUFFER)
- compr_args.update(args.compression)
- key.compressor = Compressor(**compr_args)
with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache:
archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
create=True, checkpoint_interval=args.checkpoint_interval,
numeric_owner=args.numeric_owner, progress=args.progress,
- chunker_params=args.chunker_params, start=t0)
+ chunker_params=args.chunker_params, start=t0,
+ compression=args.compression, compression_files=args.compression_files)
create_inner(archive, cache)
else:
create_inner(None, None)
@@ -788,9 +789,20 @@ class Archiver:
'"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
'"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
return self.exit_code
- archives = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list
+ archives_checkpoints = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list
if args.prefix:
- archives = [archive for archive in archives if archive.name.startswith(args.prefix)]
+ archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)]
+ is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search
+ checkpoints = [arch for arch in archives_checkpoints if is_checkpoint(arch.name)]
+ # keep the latest checkpoint, if there is no later non-checkpoint archive
+ if archives_checkpoints and checkpoints and archives_checkpoints[0] is checkpoints[0]:
+ keep_checkpoints = checkpoints[:1]
+ else:
+ keep_checkpoints = []
+ checkpoints = set(checkpoints)
+ # ignore all checkpoint archives to avoid keeping one (which is an incomplete backup)
+ # that is newer than a successfully completed backup - and killing the successful backup.
+ archives = [arch for arch in archives_checkpoints if arch not in checkpoints]
keep = []
if args.within:
keep += prune_within(archives, args.within)
@@ -808,11 +820,10 @@ class Archiver:
keep += prune_split(archives, '%Y-%m', args.monthly, keep)
if args.yearly:
keep += prune_split(archives, '%Y', args.yearly, keep)
-
- to_delete = set(archives) - set(keep)
+ to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
stats = Statistics()
with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache:
- for archive in archives:
+ for archive in archives_checkpoints:
if archive in to_delete:
if args.dry_run:
if args.output_list:
@@ -874,8 +885,8 @@ class Archiver:
recreater = ArchiveRecreater(repository, manifest, key, cache, matcher,
exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
- keep_tag_files=args.keep_tag_files,
- compression=args.compression, chunker_params=args.chunker_params,
+ keep_tag_files=args.keep_tag_files, chunker_params=args.chunker_params,
+ compression=args.compression, compression_files=args.compression_files,
progress=args.progress, stats=args.stats,
file_status_printer=self.print_file_status,
dry_run=args.dry_run)
@@ -902,6 +913,21 @@ class Archiver:
cache.commit()
return self.exit_code
+ @with_repository(manifest=False)
+ def do_with_lock(self, args, repository):
+ """run a user specified command with the repository lock held"""
+ # re-write manifest to start a repository transaction - this causes a
+ # lock upgrade to exclusive for remote (and also for local) repositories.
+ # by using manifest=False in the decorator, we avoid having to require
+ # the encryption key (and can operate just with encrypted data).
+ data = repository.get(Manifest.MANIFEST_ID)
+ repository.put(Manifest.MANIFEST_ID, data)
+ try:
+ # we exit with the return code we get from the subprocess
+ return subprocess.call([args.command] + args.args)
+ finally:
+ repository.rollback()
+
@with_repository()
def do_debug_dump_archive_items(self, args, repository, manifest, key):
"""dump (decrypted, decompressed) archive items metadata (not: data)"""
@@ -1265,6 +1291,12 @@ class Archiver:
traversing all paths specified. The archive will consume almost no disk space for
files or parts of files that have already been stored in other archives.
+ The archive name needs to be unique. It must not end in '.checkpoint' or
+ '.checkpoint.N' (with N being a number), because these names are used for
+ checkpoints and treated in special ways.
+
+ In the archive name, you may use the following format tags:
+ {now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}
To speed up pulling backups over sshfs and similar network file systems which do
not provide correct inode information the --ignore-inode flag can be used. This
@@ -1350,11 +1382,16 @@ class Archiver:
type=CompressionSpec, default=dict(name='none'), metavar='COMPRESSION',
help='select compression algorithm (and level):\n'
'none == no compression (default),\n'
+ 'auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L]\n'
+ ' being any valid compression algorithm (and optional level),\n'
'lz4 == lz4,\n'
'zlib == zlib (default level 6),\n'
'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
'lzma == lzma (default level 6),\n'
'lzma,0 .. lzma,9 == lzma (with level 0..9).')
+ archive_group.add_argument('--compression-from', dest='compression_files',
+ type=argparse.FileType('r'), action='append',
+ metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line')
subparser.add_argument('location', metavar='ARCHIVE',
type=location_validator(archive=True),
@@ -1369,6 +1406,10 @@ class Archiver:
be restricted by using the ``--exclude`` option.
See the output of the "borg help patterns" command for more help on exclude patterns.
+
+ By using ``--dry-run``, you can do all extraction steps except actually writing the
+ output data: reading metadata and data chunks from the repo, checking the hash/hmac,
+ decrypting, decompressing.
""")
subparser = subparsers.add_parser('extract', parents=[common_parser], add_help=False,
description=self.do_extract.__doc__,
@@ -1603,11 +1644,20 @@ class Archiver:
any of the specified retention options. This command is normally used by
automated backup scripts wanting to keep a certain number of historic backups.
+ Also, prune automatically removes checkpoint archives (incomplete archives left
+ behind by interrupted backup runs) except if the checkpoint is the latest
+ archive (and thus still needed). Checkpoint archives are not considered when
+ comparing archive counts against the retention limits (--keep-*).
+
If a prefix is set with -P, then only archives that start with the prefix are
considered for deletion and only those archives count towards the totals
specified by the rules.
Otherwise, *all* archives in the repository are candidates for deletion!
+ If you have multiple sequences of archives with different data sets (e.g.
+ from different machines) in one shared repository, use one prune call per
+ data set that matches only the respective archives using the -P option.
+
The "--keep-within" option takes an argument of the form "",
where char is "H", "d", "w", "m", "y". For example, "--keep-within 2d" means
to keep all archives that were created within the past 48 hours.
@@ -1816,11 +1866,16 @@ class Archiver:
type=CompressionSpec, default=None, metavar='COMPRESSION',
help='select compression algorithm (and level):\n'
'none == no compression (default),\n'
+ 'auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L]\n'
+ ' being any valid compression algorithm (and optional level),\n'
'lz4 == lz4,\n'
'zlib == zlib (default level 6),\n'
'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
'lzma == lzma (default level 6),\n'
'lzma,0 .. lzma,9 == lzma (with level 0..9).')
+ archive_group.add_argument('--compression-from', dest='compression_files',
+ type=argparse.FileType('r'), action='append',
+ metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line')
archive_group.add_argument('--chunker-params', dest='chunker_params',
type=ChunkerParams, default=None,
metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE',
@@ -1832,6 +1887,32 @@ class Archiver:
subparser.add_argument('paths', metavar='PATH', nargs='*', type=str,
help='paths to recreate; patterns are supported')
+ with_lock_epilog = textwrap.dedent("""
+ This command runs a user-specified command while the repository lock is held.
+
+ It will first try to acquire the lock (make sure that no other operation is
+ running in the repo), then execute the given command as a subprocess and wait
+ for its termination, release the lock and return the user command's return
+ code as borg's return code.
+
+ Note: if you copy a repository with the lock held, the lock will be present in
+ the copy, obviously. Thus, before using borg on the copy, you need to
+ use "borg break-lock" on it.
+ """)
+ subparser = subparsers.add_parser('with-lock', parents=[common_parser], add_help=False,
+ description=self.do_with_lock.__doc__,
+ epilog=with_lock_epilog,
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ help='run user command with lock held')
+ subparser.set_defaults(func=self.do_with_lock)
+ subparser.add_argument('location', metavar='REPOSITORY',
+ type=location_validator(archive=False),
+ help='repository to lock')
+ subparser.add_argument('command', metavar='COMMAND',
+ help='command to run')
+ subparser.add_argument('args', metavar='ARGS', nargs=argparse.REMAINDER,
+ help='command arguments')
+
subparser = subparsers.add_parser('help', parents=[common_parser], add_help=False,
description='Extra help')
subparser.add_argument('--epilog-only', dest='epilog_only',
@@ -1926,13 +2007,17 @@ class Archiver:
update_excludes(args)
return args
+ def prerun_checks(self, logger):
+ check_extension_modules()
+ selftest(logger)
+
def run(self, args):
os.umask(args.umask) # early, before opening files
self.lock_wait = args.lock_wait
setup_logging(level=args.log_level, is_serve=args.func == self.do_serve) # do not use loggers before this!
if args.show_version:
logger.info('borgbackup version %s' % __version__)
- check_extension_modules()
+ self.prerun_checks(logger)
if is_slow_msgpack():
logger.warning("Using a pure-python msgpack! This will result in lower performance.")
return args.func(args)
diff --git a/borg/crypto.pyx b/borg/crypto.pyx
index 8bee39fe4..dd0fa14ac 100644
--- a/borg/crypto.pyx
+++ b/borg/crypto.pyx
@@ -1,15 +1,9 @@
-"""A thin OpenSSL wrapper
+"""A thin OpenSSL wrapper"""
-This could be replaced by PyCrypto maybe?
-"""
from libc.stdlib cimport malloc, free
from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release
-API_VERSION = 2
-
-
-cdef extern from "openssl/rand.h":
- int RAND_bytes(unsigned char *buf, int num)
+API_VERSION = 3
cdef extern from "openssl/evp.h":
diff --git a/borg/helpers.py b/borg/helpers.py
index ae33f0077..ad4f6d2db 100644
--- a/borg/helpers.py
+++ b/borg/helpers.py
@@ -36,6 +36,7 @@ from . import hashindex
from . import chunker
from .constants import * # NOQA
from . import crypto
+from .compress import COMPR_BUFFER, get_compressor
from . import shellpattern
import msgpack
import msgpack.fallback
@@ -83,7 +84,7 @@ def check_extension_modules():
raise ExtensionModuleError
if chunker.API_VERSION != 2:
raise ExtensionModuleError
- if crypto.API_VERSION != 2:
+ if crypto.API_VERSION != 3:
raise ExtensionModuleError
if platform.API_VERSION != 2:
raise ExtensionModuleError
@@ -285,8 +286,7 @@ def load_excludes(fh):
"""Load and parse exclude patterns from file object. Lines empty or starting with '#' after stripping whitespace on
both line ends are ignored.
"""
- patterns = (line for line in (i.strip() for i in fh) if not line.startswith('#'))
- return [parse_pattern(pattern) for pattern in patterns if pattern]
+ return [parse_pattern(pattern) for pattern in clean_lines(fh)]
def update_excludes(args):
@@ -539,6 +539,12 @@ def CompressionSpec(s):
else:
raise ValueError
return dict(name=name, level=level)
+ if name == 'auto':
+ if 2 <= count <= 3:
+ compression = ','.join(values[1:])
+ else:
+ raise ValueError
+ return dict(name=name, spec=CompressionSpec(compression))
raise ValueError
@@ -1484,3 +1490,96 @@ except ImportError:
def scandir_inorder(path='.'):
return sorted(scandir(path), key=lambda dirent: dirent.inode())
+
+
+def clean_lines(lines, lstrip=None, rstrip=None, remove_empty=True, remove_comments=True):
+ """
+ clean lines (usually read from a config file):
+
+ 1. strip whitespace (left and right), 2. remove empty lines, 3. remove comments.
+
+ note: only "pure comment lines" are supported, no support for "trailing comments".
+
+ :param lines: input line iterator (e.g. list or open text file) that gives unclean input lines
+ :param lstrip: lstrip call arguments or False, if lstripping is not desired
+ :param rstrip: rstrip call arguments or False, if rstripping is not desired
+ :param remove_comments: remove comment lines (lines starting with "#")
+ :param remove_empty: remove empty lines
+ :return: yields processed lines
+ """
+ for line in lines:
+ if lstrip is not False:
+ line = line.lstrip(lstrip)
+ if rstrip is not False:
+ line = line.rstrip(rstrip)
+ if remove_empty and not line:
+ continue
+ if remove_comments and line.startswith('#'):
+ continue
+ yield line
+
+
+class CompressionDecider1:
+ def __init__(self, compression, compression_files):
+ """
+ Initialize a CompressionDecider instance (and read config files, if needed).
+
+ :param compression: default CompressionSpec (e.g. from --compression option)
+ :param compression_files: list of compression config files (e.g. from --compression-from) or
+ a list of other line iterators
+ """
+ self.compression = compression
+ if not compression_files:
+ self.matcher = None
+ else:
+ self.matcher = PatternMatcher(fallback=compression)
+ for file in compression_files:
+ try:
+ for line in clean_lines(file):
+ try:
+ compr_spec, fn_pattern = line.split(':', 1)
+ except:
+ continue
+ self.matcher.add([parse_pattern(fn_pattern)], CompressionSpec(compr_spec))
+ finally:
+ if hasattr(file, 'close'):
+ file.close()
+
+ def decide(self, path):
+ if self.matcher is not None:
+ return self.matcher.match(path)
+ return self.compression
+
+
+class CompressionDecider2:
+ def __init__(self, compression):
+ self.compression = compression
+
+ def decide(self, chunk):
+ # nothing fancy here yet: we either use what the metadata says or the default
+ # later, we can decide based on the chunk data also.
+ # if we compress the data here to decide, we can even update the chunk data
+ # and modify the metadata as desired.
+ compr_spec = chunk.meta.get('compress', self.compression)
+ compr_args = dict(buffer=COMPR_BUFFER)
+ compr_args.update(compr_spec)
+ if compr_args['name'] == 'auto':
+ # we did not decide yet, use heuristic:
+ compr_args, chunk = self.heuristic_lz4(compr_args, chunk)
+ return compr_args, chunk
+
+ def heuristic_lz4(self, compr_args, chunk):
+ meta, data = chunk
+ lz4 = get_compressor('lz4', buffer=compr_args['buffer'])
+ cdata = lz4.compress(data)
+ data_len = len(data)
+ cdata_len = len(cdata)
+ if cdata_len < data_len:
+ compr_spec = compr_args['spec']
+ else:
+ # uncompressible - we could have a special "uncompressible compressor"
+ # that marks such data as uncompressible via compression-type metadata.
+ compr_spec = CompressionSpec('none')
+ compr_args.update(compr_spec)
+ logger.debug("len(data) == %d, len(lz4(data)) == %d, choosing %s", data_len, cdata_len, compr_spec)
+ return compr_args, Chunk(data, **meta)
diff --git a/borg/key.py b/borg/key.py
index ad960b796..81bf69756 100644
--- a/borg/key.py
+++ b/borg/key.py
@@ -7,13 +7,13 @@ import textwrap
from hmac import compare_digest
from hashlib import sha256, pbkdf2_hmac
-from .helpers import Chunk, IntegrityError, get_keys_dir, Error, yes, bin_to_hex
+from .helpers import Chunk, IntegrityError, get_keys_dir, Error, yes, bin_to_hex, CompressionDecider2, CompressionSpec
from .logger import create_logger
logger = create_logger()
from .constants import * # NOQA
from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks, hmac_sha256
-from .compress import Compressor, COMPR_BUFFER
+from .compress import Compressor, COMPR_BUFFER, get_compressor
import msgpack
PREFIX = b'\0' * 8
@@ -35,6 +35,14 @@ class KeyfileNotFoundError(Error):
"""No key file for repository {} found in {}."""
+class KeyfileInvalidError(Error):
+ """Invalid key file for repository {} found in {}."""
+
+
+class KeyfileMismatchError(Error):
+ """Mismatch between repository {} and key file {}."""
+
+
class RepoKeyNotFoundError(Error):
"""No key entry found in the config of repository {}."""
@@ -71,12 +79,20 @@ class KeyBase:
self.TYPE_STR = bytes([self.TYPE])
self.repository = repository
self.target = None # key location file path / repo obj
- self.compressor = Compressor('none', buffer=COMPR_BUFFER)
+ self.compression_decider2 = CompressionDecider2(CompressionSpec('none'))
+ self.compressor = Compressor('none', buffer=COMPR_BUFFER) # for decompression
def id_hash(self, data):
"""Return HMAC hash using the "id" HMAC key
"""
+ def compress(self, chunk):
+ compr_args, chunk = self.compression_decider2.decide(chunk)
+ compressor = Compressor(**compr_args)
+ meta, data = chunk
+ data = compressor.compress(data)
+ return Chunk(data, **meta)
+
def encrypt(self, chunk):
pass
@@ -102,8 +118,8 @@ class PlaintextKey(KeyBase):
return sha256(data).digest()
def encrypt(self, chunk):
- meta, data = chunk
- return b''.join([self.TYPE_STR, self.compressor.compress(data)])
+ chunk = self.compress(chunk)
+ return b''.join([self.TYPE_STR, chunk.data])
def decrypt(self, id, data):
if data[0] != self.TYPE:
@@ -135,9 +151,9 @@ class AESKeyBase(KeyBase):
return hmac_sha256(self.id_key, data)
def encrypt(self, chunk):
- data = self.compressor.compress(chunk.data)
+ chunk = self.compress(chunk)
self.enc_cipher.reset()
- data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
+ data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(chunk.data)))
hmac = hmac_sha256(self.enc_hmac_key, data)
return b''.join((self.TYPE_STR, hmac, data))
@@ -396,17 +412,33 @@ class KeyfileKey(KeyfileKeyBase):
TYPE = 0x00
FILE_ID = 'BORG_KEY'
+ def sanity_check(self, filename, id):
+ with open(filename, 'r') as fd:
+ line = fd.readline().strip()
+ if not line.startswith(self.FILE_ID):
+ raise KeyfileInvalidError(self.repository._location.canonical_path(), filename)
+ if line[len(self.FILE_ID) + 1:] != id:
+ raise KeyfileMismatchError(self.repository._location.canonical_path(), filename)
+ return filename
+
def find_key(self):
+ id = self.repository.id_str
+ keyfile = os.environ.get('BORG_KEY_FILE')
+ if keyfile:
+ return self.sanity_check(keyfile, id)
keys_dir = get_keys_dir()
for name in os.listdir(keys_dir):
filename = os.path.join(keys_dir, name)
- with open(filename, 'r') as fd:
- line = fd.readline().strip()
- if line.startswith(self.FILE_ID) and line[len(self.FILE_ID) + 1:] == self.repository.id_str:
- return filename
+ try:
+ return self.sanity_check(filename, id)
+ except (KeyfileInvalidError, KeyfileMismatchError):
+ pass
raise KeyfileNotFoundError(self.repository._location.canonical_path(), get_keys_dir())
def get_new_target(self, args):
+ keyfile = os.environ.get('BORG_KEY_FILE')
+ if keyfile:
+ return keyfile
filename = args.location.to_key_filename()
path = filename
i = 1
diff --git a/borg/selftest.py b/borg/selftest.py
new file mode 100644
index 000000000..2093b89a9
--- /dev/null
+++ b/borg/selftest.py
@@ -0,0 +1,79 @@
+"""
+Self testing module
+===================
+
+The selftest() function runs a small test suite of relatively fast tests that are meant to discover issues
+with the way Borg was compiled or packaged and also bugs in Borg itself.
+
+Theses tests are a subset of the borg/testsuite and are run with Pythons built-in unittest, hence none of
+the tests used for this can or should be ported to py.test currently.
+
+To assert that self test discovery works correctly the number of tests is kept in the SELFTEST_COUNT
+variable. SELFTEST_COUNT must be updated if new tests are added or removed to or from any of the tests
+used here.
+"""
+
+
+import sys
+import time
+from unittest import TestResult, TestSuite, defaultTestLoader
+
+from .testsuite.hashindex import HashIndexDataTestCase, HashIndexRefcountingTestCase, HashIndexTestCase
+from .testsuite.crypto import CryptoTestCase
+from .testsuite.chunker import ChunkerTestCase
+
+SELFTEST_CASES = [
+ HashIndexDataTestCase,
+ HashIndexRefcountingTestCase,
+ HashIndexTestCase,
+ CryptoTestCase,
+ ChunkerTestCase,
+]
+
+SELFTEST_COUNT = 27
+
+
+class SelfTestResult(TestResult):
+ def __init__(self):
+ super().__init__()
+ self.successes = []
+
+ def addSuccess(self, test):
+ super().addSuccess(test)
+ self.successes.append(test)
+
+ def test_name(self, test):
+ return test.shortDescription() or str(test)
+
+ def log_results(self, logger):
+ for test, failure in self.errors + self.failures + self.unexpectedSuccesses:
+ logger.error('self test %s FAILED:\n%s', self.test_name(test), failure)
+ for test, reason in self.skipped:
+ logger.warning('self test %s skipped: %s', self.test_name(test), reason)
+
+ def successful_test_count(self):
+ return len(self.successes)
+
+
+def selftest(logger):
+ selftest_started = time.perf_counter()
+ result = SelfTestResult()
+ test_suite = TestSuite()
+ for test_case in SELFTEST_CASES:
+ test_suite.addTest(defaultTestLoader.loadTestsFromTestCase(test_case))
+ test_suite.run(result)
+ result.log_results(logger)
+ successful_tests = result.successful_test_count()
+ count_mismatch = successful_tests != SELFTEST_COUNT
+ if result.wasSuccessful() and count_mismatch:
+ # only print this if all tests succeeded
+ logger.error("self test count (%d != %d) mismatch, either test discovery is broken or a test was added "
+ "without updating borg.selftest",
+ successful_tests, SELFTEST_COUNT)
+ if not result.wasSuccessful() or count_mismatch:
+ logger.error("self test failed\n"
+ "This is a bug either in Borg or in the package / distribution you use.")
+ sys.exit(2)
+ assert False, "sanity assertion failed: ran beyond sys.exit()"
+ selftest_elapsed = time.perf_counter() - selftest_started
+ logger.debug("%d self tests completed in %.2f seconds", successful_tests, selftest_elapsed)
diff --git a/borg/testsuite/__init__.py b/borg/testsuite/__init__.py
index be676cd21..c3ba5801b 100644
--- a/borg/testsuite/__init__.py
+++ b/borg/testsuite/__init__.py
@@ -9,7 +9,8 @@ import sysconfig
import time
import unittest
from ..xattr import get_all
-from ..logger import setup_logging
+
+# Note: this is used by borg.selftest, do not use or import py.test functionality here.
try:
import llfuse
@@ -18,6 +19,11 @@ try:
except ImportError:
have_fuse_mtime_ns = False
+try:
+ from pytest import raises
+except ImportError:
+ raises = None
+
has_lchflags = hasattr(os, 'lchflags')
@@ -32,9 +38,6 @@ else:
if sys.platform.startswith('netbsd'):
st_mtime_ns_round = -4 # only >1 microsecond resolution here?
-# Ensure that the loggers exist for all tests
-setup_logging()
-
class BaseTestCase(unittest.TestCase):
"""
@@ -43,9 +46,13 @@ class BaseTestCase(unittest.TestCase):
assert_not_in = unittest.TestCase.assertNotIn
assert_equal = unittest.TestCase.assertEqual
assert_not_equal = unittest.TestCase.assertNotEqual
- assert_raises = unittest.TestCase.assertRaises
assert_true = unittest.TestCase.assertTrue
+ if raises:
+ assert_raises = staticmethod(raises)
+ else:
+ assert_raises = unittest.TestCase.assertRaises
+
@contextmanager
def assert_creates_file(self, path):
self.assert_true(not os.path.exists(path), '{} should not exist'.format(path))
diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py
index 77fb44277..1b89515c5 100644
--- a/borg/testsuite/archiver.py
+++ b/borg/testsuite/archiver.py
@@ -61,6 +61,7 @@ def exec_cmd(*args, archiver=None, fork=False, exe=None, **kw):
sys.stdout = sys.stderr = output = StringIO()
if archiver is None:
archiver = Archiver()
+ archiver.prerun_checks = lambda *args: None
archiver.exit_code = EXIT_SUCCESS
args = archiver.parse_args(list(args))
ret = archiver.run(args)
@@ -987,16 +988,39 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd('init', self.repository_location)
self.cmd('create', self.repository_location + '::test1', src_dir)
self.cmd('create', self.repository_location + '::test2', src_dir)
+ # these are not really a checkpoints, but they look like some:
+ self.cmd('create', self.repository_location + '::test3.checkpoint', src_dir)
+ self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir)
+ self.cmd('create', self.repository_location + '::test4.checkpoint', src_dir)
output = self.cmd('prune', '-v', '--list', '--dry-run', self.repository_location, '--keep-daily=2')
- self.assert_in('Keeping archive: test2', output)
self.assert_in('Would prune: test1', output)
+ # must keep the latest non-checkpoint archive:
+ self.assert_in('Keeping archive: test2', output)
+ # must keep the latest checkpoint archive:
+ self.assert_in('Keeping archive: test4.checkpoint', output)
output = self.cmd('list', self.repository_location)
self.assert_in('test1', output)
self.assert_in('test2', output)
+ self.assert_in('test3.checkpoint', output)
+ self.assert_in('test3.checkpoint.1', output)
+ self.assert_in('test4.checkpoint', output)
self.cmd('prune', self.repository_location, '--keep-daily=2')
output = self.cmd('list', self.repository_location)
self.assert_not_in('test1', output)
+ # the latest non-checkpoint archive must be still there:
self.assert_in('test2', output)
+ # only the latest checkpoint archive must still be there:
+ self.assert_not_in('test3.checkpoint', output)
+ self.assert_not_in('test3.checkpoint.1', output)
+ self.assert_in('test4.checkpoint', output)
+ # now we supercede the latest checkpoint by a successful backup:
+ self.cmd('create', self.repository_location + '::test5', src_dir)
+ self.cmd('prune', self.repository_location, '--keep-daily=2')
+ output = self.cmd('list', self.repository_location)
+ # all checkpoints should be gone now:
+ self.assert_not_in('checkpoint', output)
+ # the latest archive must be still there
+ self.assert_in('test5', output)
def test_prune_repository_save_space(self):
self.cmd('init', self.repository_location)
@@ -1088,6 +1112,64 @@ class ArchiverTestCase(ArchiverTestCaseBase):
size, csize, path = output.split("\n")[1].split(" ")
assert int(csize) < int(size)
+ def _get_sizes(self, compression, compressible, size=10000):
+ if compressible:
+ contents = b'X' * size
+ else:
+ contents = os.urandom(size)
+ self.create_regular_file('file', contents=contents)
+ self.cmd('init', '--encryption=none', self.repository_location)
+ archive = self.repository_location + '::test'
+ self.cmd('create', '-C', compression, archive, 'input')
+ output = self.cmd('list', '--format', '{size} {csize} {path}{NL}', archive)
+ size, csize, path = output.split("\n")[1].split(" ")
+ return int(size), int(csize)
+
+ def test_compression_none_compressible(self):
+ size, csize = self._get_sizes('none', compressible=True)
+ assert csize >= size
+ assert csize == size + 3
+
+ def test_compression_none_uncompressible(self):
+ size, csize = self._get_sizes('none', compressible=False)
+ assert csize >= size
+ assert csize == size + 3
+
+ def test_compression_zlib_compressible(self):
+ size, csize = self._get_sizes('zlib', compressible=True)
+ assert csize < size * 0.1
+ assert csize == 35
+
+ def test_compression_zlib_uncompressible(self):
+ size, csize = self._get_sizes('zlib', compressible=False)
+ assert csize >= size
+
+ def test_compression_auto_compressible(self):
+ size, csize = self._get_sizes('auto,zlib', compressible=True)
+ assert csize < size * 0.1
+ assert csize == 35 # same as compression 'zlib'
+
+ def test_compression_auto_uncompressible(self):
+ size, csize = self._get_sizes('auto,zlib', compressible=False)
+ assert csize >= size
+ assert csize == size + 3 # same as compression 'none'
+
+ def test_compression_lz4_compressible(self):
+ size, csize = self._get_sizes('lz4', compressible=True)
+ assert csize < size * 0.1
+
+ def test_compression_lz4_uncompressible(self):
+ size, csize = self._get_sizes('lz4', compressible=False)
+ assert csize >= size
+
+ def test_compression_lzma_compressible(self):
+ size, csize = self._get_sizes('lzma', compressible=True)
+ assert csize < size * 0.1
+
+ def test_compression_lzma_uncompressible(self):
+ size, csize = self._get_sizes('lzma', compressible=False)
+ assert csize >= size
+
def test_break_lock(self):
self.cmd('init', self.repository_location)
self.cmd('break-lock', self.repository_location)
@@ -1398,6 +1480,12 @@ class ArchiverTestCase(ArchiverTestCaseBase):
info_after = self.cmd('info', self.repository_location + '::test')
assert info_before == info_after # includes archive ID
+ def test_with_lock(self):
+ self.cmd('init', self.repository_location)
+ lock_path = os.path.join(self.repository_path, 'lock.exclusive')
+ cmd = 'python3', '-c', 'import os, sys; sys.exit(42 if os.path.exists("%s") else 23)' % lock_path
+ self.cmd('with-lock', self.repository_location, *cmd, fork=True, exit_code=42)
+
@unittest.skipUnless('binary' in BORG_EXES, 'no borg.exe available')
class ArchiverTestCaseBinary(ArchiverTestCase):
diff --git a/borg/testsuite/chunker.py b/borg/testsuite/chunker.py
index 0db7203d5..2a14bd604 100644
--- a/borg/testsuite/chunker.py
+++ b/borg/testsuite/chunker.py
@@ -4,6 +4,9 @@ from ..chunker import Chunker, buzhash, buzhash_update
from ..constants import * # NOQA
from . import BaseTestCase
+# Note: these tests are part of the self test, do not use or import py.test functionality here.
+# See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT
+
class ChunkerTestCase(BaseTestCase):
diff --git a/borg/testsuite/conftest.py b/borg/testsuite/conftest.py
new file mode 100644
index 000000000..0c350fb7f
--- /dev/null
+++ b/borg/testsuite/conftest.py
@@ -0,0 +1,4 @@
+from ..logger import setup_logging
+
+# Ensure that the loggers exist for all tests
+setup_logging()
diff --git a/borg/testsuite/crypto.py b/borg/testsuite/crypto.py
index 9609e259a..e3eff8bec 100644
--- a/borg/testsuite/crypto.py
+++ b/borg/testsuite/crypto.py
@@ -3,6 +3,9 @@ from binascii import hexlify, unhexlify
from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, hmac_sha256
from . import BaseTestCase
+# Note: these tests are part of the self test, do not use or import py.test functionality here.
+# See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT
+
class CryptoTestCase(BaseTestCase):
diff --git a/borg/testsuite/hashindex.py b/borg/testsuite/hashindex.py
index 3aac0c7db..000dfe4c3 100644
--- a/borg/testsuite/hashindex.py
+++ b/borg/testsuite/hashindex.py
@@ -1,15 +1,16 @@
import base64
import hashlib
import os
-import struct
import tempfile
import zlib
-import pytest
from ..hashindex import NSIndex, ChunkIndex
from .. import hashindex
from . import BaseTestCase
+# Note: these tests are part of the self test, do not use or import py.test functionality here.
+# See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT
+
def H(x):
# make some 32byte long thing that depends on x
@@ -194,7 +195,7 @@ class HashIndexRefcountingTestCase(BaseTestCase):
def test_decref_zero(self):
idx1 = ChunkIndex()
idx1[H(1)] = 0, 0, 0
- with pytest.raises(AssertionError):
+ with self.assert_raises(AssertionError):
idx1.decref(H(1))
def test_incref_decref(self):
@@ -208,18 +209,18 @@ class HashIndexRefcountingTestCase(BaseTestCase):
def test_setitem_raises(self):
idx1 = ChunkIndex()
- with pytest.raises(AssertionError):
+ with self.assert_raises(AssertionError):
idx1[H(1)] = hashindex.MAX_VALUE + 1, 0, 0
def test_keyerror(self):
idx = ChunkIndex()
- with pytest.raises(KeyError):
+ with self.assert_raises(KeyError):
idx.incref(H(1))
- with pytest.raises(KeyError):
+ with self.assert_raises(KeyError):
idx.decref(H(1))
- with pytest.raises(KeyError):
+ with self.assert_raises(KeyError):
idx[H(1)]
- with pytest.raises(OverflowError):
+ with self.assert_raises(OverflowError):
idx.add(H(1), -1, 0, 0)
@@ -269,10 +270,11 @@ class HashIndexDataTestCase(BaseTestCase):
assert idx1[H(3)] == (hashindex.MAX_VALUE, 6, 7)
-def test_nsindex_segment_limit():
- idx = NSIndex()
- with pytest.raises(AssertionError):
- idx[H(1)] = hashindex.MAX_VALUE + 1, 0
- assert H(1) not in idx
- idx[H(2)] = hashindex.MAX_VALUE, 0
- assert H(2) in idx
+class NSIndexTestCase(BaseTestCase):
+ def test_nsindex_segment_limit(self):
+ idx = NSIndex()
+ with self.assert_raises(AssertionError):
+ idx[H(1)] = hashindex.MAX_VALUE + 1, 0
+ assert H(1) not in idx
+ idx[H(2)] = hashindex.MAX_VALUE, 0
+ assert H(2) in idx
diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py
index c0b9a049a..c86a0b3b2 100644
--- a/borg/testsuite/helpers.py
+++ b/borg/testsuite/helpers.py
@@ -10,11 +10,12 @@ import msgpack
import msgpack.fallback
import time
-from ..helpers import Location, format_file_size, format_timedelta, make_path_safe, \
+from ..helpers import Location, format_file_size, format_timedelta, make_path_safe, clean_lines, \
prune_within, prune_split, get_cache_dir, get_keys_dir, Statistics, is_slow_msgpack, \
yes, TRUISH, FALSISH, DEFAULTISH, \
- StableDict, int_to_bigint, bigint_to_int, bin_to_hex, parse_timestamp, CompressionSpec, ChunkerParams, Chunk, \
+ StableDict, int_to_bigint, bigint_to_int, bin_to_hex, parse_timestamp, ChunkerParams, Chunk, \
ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \
+ CompressionSpec, CompressionDecider1, CompressionDecider2, \
PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, partial_format, ChunkIteratorFileWrapper
from . import BaseTestCase, environment_variable, FakeInputs
@@ -915,3 +916,50 @@ def test_chunk_file_wrapper():
cfw = ChunkIteratorFileWrapper(iter([]))
assert cfw.read(2) == b''
assert cfw.exhausted
+
+
+def test_clean_lines():
+ conf = """\
+#comment
+data1 #data1
+data2
+
+ data3
+""".splitlines(keepends=True)
+ assert list(clean_lines(conf)) == ['data1 #data1', 'data2', 'data3', ]
+ assert list(clean_lines(conf, lstrip=False)) == ['data1 #data1', 'data2', ' data3', ]
+ assert list(clean_lines(conf, rstrip=False)) == ['data1 #data1\n', 'data2\n', 'data3\n', ]
+ assert list(clean_lines(conf, remove_empty=False)) == ['data1 #data1', 'data2', '', 'data3', ]
+ assert list(clean_lines(conf, remove_comments=False)) == ['#comment', 'data1 #data1', 'data2', 'data3', ]
+
+
+def test_compression_decider1():
+ default = CompressionSpec('zlib')
+ conf = """
+# use super-fast lz4 compression on huge VM files in this path:
+lz4:/srv/vm_disks
+
+# jpeg or zip files do not compress:
+none:*.jpeg
+none:*.zip
+""".splitlines()
+
+ cd = CompressionDecider1(default, []) # no conf, always use default
+ assert cd.decide('/srv/vm_disks/linux')['name'] == 'zlib'
+ assert cd.decide('test.zip')['name'] == 'zlib'
+ assert cd.decide('test')['name'] == 'zlib'
+
+ cd = CompressionDecider1(default, [conf, ])
+ assert cd.decide('/srv/vm_disks/linux')['name'] == 'lz4'
+ assert cd.decide('test.zip')['name'] == 'none'
+ assert cd.decide('test')['name'] == 'zlib' # no match in conf, use default
+
+
+def test_compression_decider2():
+ default = CompressionSpec('zlib')
+
+ cd = CompressionDecider2(default)
+ compr_spec, chunk = cd.decide(Chunk(None))
+ assert compr_spec['name'] == 'zlib'
+ compr_spec, chunk = cd.decide(Chunk(None, compress=CompressionSpec('lzma')))
+ assert compr_spec['name'] == 'lzma'
diff --git a/borg/testsuite/key.py b/borg/testsuite/key.py
index 9e01103ad..11eb35061 100644
--- a/borg/testsuite/key.py
+++ b/borg/testsuite/key.py
@@ -7,7 +7,7 @@ from binascii import hexlify, unhexlify
from ..crypto import bytes_to_long, num_aes_blocks
from ..key import PlaintextKey, PassphraseKey, KeyfileKey
from ..helpers import Location, Chunk, bin_to_hex
-from . import BaseTestCase
+from . import BaseTestCase, environment_variable
class KeyTestCase(BaseTestCase):
@@ -34,9 +34,11 @@ class KeyTestCase(BaseTestCase):
def setUp(self):
self.tmppath = tempfile.mkdtemp()
os.environ['BORG_KEYS_DIR'] = self.tmppath
+ self.tmppath2 = tempfile.mkdtemp()
def tearDown(self):
shutil.rmtree(self.tmppath)
+ shutil.rmtree(self.tmppath2)
class MockRepository:
class _Location:
@@ -71,6 +73,20 @@ class KeyTestCase(BaseTestCase):
chunk = Chunk(b'foo')
self.assert_equal(chunk, key2.decrypt(key.id_hash(chunk.data), key.encrypt(chunk)))
+ def test_keyfile_kfenv(self):
+ keyfile = os.path.join(self.tmppath2, 'keyfile')
+ with environment_variable(BORG_KEY_FILE=keyfile, BORG_PASSPHRASE='testkf'):
+ assert not os.path.exists(keyfile)
+ key = KeyfileKey.create(self.MockRepository(), self.MockArgs())
+ assert os.path.exists(keyfile)
+ chunk = Chunk(b'XXX')
+ chunk_id = key.id_hash(chunk.data)
+ chunk_cdata = key.encrypt(chunk)
+ key = KeyfileKey.detect(self.MockRepository(), chunk_cdata)
+ self.assert_equal(chunk, key.decrypt(chunk_id, chunk_cdata))
+ os.unlink(keyfile)
+ self.assert_raises(FileNotFoundError, KeyfileKey.detect, self.MockRepository(), chunk_cdata)
+
def test_keyfile2(self):
with open(os.path.join(os.environ['BORG_KEYS_DIR'], 'keyfile'), 'w') as fd:
fd.write(self.keyfile2_key_file)
@@ -78,6 +94,14 @@ class KeyTestCase(BaseTestCase):
key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata)
self.assert_equal(key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data, b'payload')
+ def test_keyfile2_kfenv(self):
+ keyfile = os.path.join(self.tmppath2, 'keyfile')
+ with open(keyfile, 'w') as fd:
+ fd.write(self.keyfile2_key_file)
+ with environment_variable(BORG_KEY_FILE=keyfile, BORG_PASSPHRASE='passphrase'):
+ key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata)
+ self.assert_equal(key.decrypt(self.keyfile2_id, self.keyfile2_cdata).data, b'payload')
+
def test_passphrase(self):
os.environ['BORG_PASSPHRASE'] = 'test'
key = PassphraseKey.create(self.MockRepository(), None)
diff --git a/docs/changes.rst b/docs/changes.rst
index bebaf3b26..1e6d1a6f1 100644
--- a/docs/changes.rst
+++ b/docs/changes.rst
@@ -70,6 +70,24 @@ Other changes:
- ChunkBuffer: add test for leaving partial chunk in buffer, fixes #945
+Version 1.0.3 (not released yet)
+--------------------------------
+
+Bug fixes:
+
+- prune: ignore checkpoints, #997
+- prune: fix bad validator, #942
+- fix capabilities extraction on Linux (set xattrs last, after chown())
+
+Other changes:
+
+- update readthedocs URLs, #991
+- add missing docs for "borg break-lock", #992
+- borg create help: add some words to about the archive name
+- borg create help: document format tags, #894
+- Vagrantfile: OS X: update osxfuse / install lzma package, #933
+
+
Version 1.0.2
-------------
diff --git a/docs/development.rst b/docs/development.rst
index 6a930e0c5..4d47347f0 100644
--- a/docs/development.rst
+++ b/docs/development.rst
@@ -139,7 +139,9 @@ Usage::
# To create and provision the VM:
vagrant up OS
# To create an ssh session to the VM:
- vagrant ssh OS command
+ vagrant ssh OS
+ # To execute a command via ssh in the VM:
+ vagrant ssh OS -c "command args"
# To shut down the VM:
vagrant halt OS
# To shut down and destroy the VM:
diff --git a/docs/faq.rst b/docs/faq.rst
index 0051a48c0..23dab5382 100644
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -133,6 +133,50 @@ into the repository.
Yes, as an attacker with access to the remote server could delete (or
otherwise make unavailable) all your backups.
+How can I protect against a hacked backup client?
+-------------------------------------------------
+
+Assume you backup your backup client machine C to the backup server S and
+C gets hacked. In a simple push setup, the attacker could then use borg on
+C to delete all backups residing on S.
+
+These are your options to protect against that:
+
+- Do not allow to permanently delete data from the repo, see :ref:`append-only-mode`.
+- Use a pull-mode setup using ``ssh -R``, see :issue:`900`.
+- Mount C's filesystem on another machine and then create a backup of it.
+- Do not give C filesystem-level access to S.
+
+How can I protect against a hacked backup server?
+-------------------------------------------------
+
+Just in case you got the impression that pull-mode backups are way more safe
+than push-mode, you also need to consider the case that your backup server S
+gets hacked. In case S has access to a lot of clients C, that might bring you
+into even bigger trouble than a hacked backup client in the previous FAQ entry.
+
+These are your options to protect against that:
+
+- Use the standard push-mode setup (see also previous FAQ entry).
+- Mount (the repo part of) S's filesystem on C.
+- Do not give S file-system level access to C.
+- Have your backup server at a well protected place (maybe not reachable from
+ the internet), configure it safely, apply security updates, monitor it, ...
+
+How can I protect against theft, sabotage, lightning, fire, ...?
+----------------------------------------------------------------
+
+In general: if your only backup medium is nearby the backupped machine and
+always connected, you can easily get into trouble: they likely share the same
+fate if something goes really wrong.
+
+Thus:
+
+- have multiple backup media
+- have media disconnected from network, power, computer
+- have media at another place
+- have a relatively recent backup on your media
+
Why do I get "connection closed by remote" after a while?
---------------------------------------------------------
@@ -140,8 +184,7 @@ When doing a backup to a remote server (using a ssh: repo URL), it sometimes
stops after a while (some minutes, hours, ... - not immediately) with
"connection closed by remote" error message. Why?
-That's a good question and we are trying to find a good answer in
-`ticket 636 `_.
+That's a good question and we are trying to find a good answer in :issue:`636`.
The borg cache eats way too much disk space, what can I do?
-----------------------------------------------------------
@@ -180,12 +223,25 @@ Yes, |project_name| supports resuming backups.
During a backup a special checkpoint archive named ``.checkpoint``
is saved every checkpoint interval (the default value for this is 5
-minutes) containing all the data backed-up until that point. This means
+minutes) containing all the data backed-up until that point. This checkpoint
+archive is a valid archive, but it is only a partial backup. Having it
+in the repo until a successful, full backup is completed is useful because it
+references all the transmitted chunks up to the checkpoint time. This means
that at most worth of data needs to be retransmitted
-if a backup needs to be restarted.
+if you restart the backup.
+
+If a backup was interrupted, you do not need to do any special considerations,
+just invoke ``borg create`` as you always do. You may use the same archive name
+as in previous attempt or a different one (e.g. if you always include the current
+datetime), it does not matter.
+|project_name| always does full single-pass backups, so it will start again
+from the beginning - but it will be much faster, because some of the data was
+already stored into the repo (and is still referenced by the checkpoint
+archive), so it does not need to get transmitted and stored again.
Once your backup has finished successfully, you can delete all
-``.checkpoint`` archives.
+``.checkpoint`` archives. If you run ``borg prune``, it will
+also care for deleting unneeded checkpoints.
If it crashes with a UnicodeError, what can I do?
-------------------------------------------------
@@ -217,7 +273,7 @@ control which we do not have (and also can't get, even if we wanted).
So, if you need that, consider RAID or a filesystem that offers redundant
storage or just make backups to different locations / different hardware.
-See also `ticket 225 `_.
+See also :issue:`225`.
Can |project_name| verify data integrity of a backup archive?
-------------------------------------------------------------
diff --git a/docs/installation.rst b/docs/installation.rst
index d3863a27b..5c5c16988 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -49,7 +49,7 @@ Ubuntu `16.04`_, backports (PPA): `15.10`_, `14.04`_ ``apt install borgbac
.. _[community]: https://www.archlinux.org/packages/?name=borg
.. _jessie-backports: https://packages.debian.org/jessie-backports/borgbackup
.. _stretch: https://packages.debian.org/stretch/borgbackup
-.. _unstable/sid: https://packages.debian.org/sid/borgbackup
+.. _sid: https://packages.debian.org/sid/borgbackup
.. _ebuild: https://packages.gentoo.org/packages/app-backup/borgbackup
.. _Ports-Tree: http://www.freshports.org/archivers/py-borgbackup/
.. _pkgsrc: http://pkgsrc.se/sysutils/py-borgbackup
diff --git a/docs/misc/compression.conf b/docs/misc/compression.conf
new file mode 100644
index 000000000..881f5fe9a
--- /dev/null
+++ b/docs/misc/compression.conf
@@ -0,0 +1,56 @@
+# example config file for --compression-from option
+#
+# Format of non-comment / non-empty lines:
+# :
+# compression-spec is same format as for --compression option
+# path/filename pattern is same format as for --exclude option
+
+# archives / files:
+none:*.gz
+none:*.tgz
+none:*.bz2
+none:*.tbz2
+none:*.xz
+none:*.txz
+none:*.lzma
+none:*.lzo
+none:*.zip
+none:*.rar
+none:*.7z
+
+# audio:
+none:*.mp3
+none:*.ogg
+none:*.oga
+none:*.flac
+none:*.aac
+none:*.m4a
+
+# video:
+none:*.mp4
+none:*.mkv
+none:*.m4v
+none:*.avi
+none:*.mpg
+none:*.mpeg
+none:*.webm
+none:*.vob
+none:*.ts
+none:*.ogv
+none:*.mov
+none:*.flv
+none:*.ogm
+
+# pictures/images
+none:*.jpg
+none:*.jpeg
+none:*.png
+none:*.gif
+
+# disk images
+none:*.dmg
+
+# software archives
+none:*.rpm
+none:*.deb
+none:*.msi
diff --git a/docs/quickstart.rst b/docs/quickstart.rst
index 1d15f5d23..c8456b85d 100644
--- a/docs/quickstart.rst
+++ b/docs/quickstart.rst
@@ -105,23 +105,27 @@ server. The script also uses the :ref:`borg_prune` subcommand to maintain a
certain number of old archives::
#!/bin/sh
- REPOSITORY=username@remoteserver.com:backup
- # Backup all of /home and /var/www except a few
- # excluded directories
- borg create -v --stats \
- $REPOSITORY::`hostname`-`date +%Y-%m-%d` \
- /home \
- /var/www \
- --exclude '/home/*/.cache' \
- --exclude /home/Ben/Music/Justin\ Bieber \
+ # setting this, so the repo does not need to be given on the commandline:
+ export BORG_REPO=username@remoteserver.com:backup
+
+ # setting this, so you won't be asked for your passphrase - make sure the
+ # script has appropriate owner/group and mode, e.g. root.root 600:
+ export BORG_PASSPHRASE=mysecret
+
+ # Backup most important stuff:
+ borg create -v --stats -C lz4 ::`hostname`-`date +%Y-%m-%d` \
+ /etc \
+ /home \
+ /var \
+ --exclude '/home/*/.cache' \
--exclude '*.pyc'
# Use the `prune` subcommand to maintain 7 daily, 4 weekly and 6 monthly
- # archives of THIS machine. --prefix `hostname`- is very important to
+ # archives of THIS machine. Using --prefix is very important to
# limit prune's operation to this machine's archives and not apply to
# other machine's archives also.
- borg prune -v $REPOSITORY --prefix `hostname`- \
+ borg prune -v --prefix `hostname`- \
--keep-daily=7 --keep-weekly=4 --keep-monthly=6
.. backup_compression:
diff --git a/docs/resources.rst b/docs/resources.rst
index 4113c11d4..59fa0310a 100644
--- a/docs/resources.rst
+++ b/docs/resources.rst
@@ -36,6 +36,6 @@ Some of them refer to attic, but you can do the same stuff (and more) with borgb
Software
--------
-- `BorgWeb - a very simple web UI for BorgBackup `_
+- `BorgWeb - a very simple web UI for BorgBackup `_
- some other stuff found at the `BorgBackup Github organisation `_
- `atticmatic `_ (includes borgmatic)
diff --git a/docs/usage.rst b/docs/usage.rst
index 5d91656e5..600af4fea 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -101,9 +101,11 @@ Some automatic "answerers" (if set, they automatically answer confirmation quest
answer or ask you interactively, depending on whether retries are allowed (they by default are
allowed). So please test your scripts interactively before making them a non-interactive script.
-Directories:
+Directories and files:
BORG_KEYS_DIR
Default to '~/.config/borg/keys'. This directory contains keys for encrypted repositories.
+ BORG_KEY_FILE
+ When set, use the given filename as repository key file.
BORG_CACHE_DIR
Default to '~/.cache/borg'. This directory contains the local cache and might need a lot
of space for dealing with big repositories).
@@ -309,10 +311,9 @@ Examples
# Even slower, even higher compression (N = 0..9)
$ borg create --compression lzma,N /path/to/repo::arch ~
- # Format tags available for archive name:
- # {now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}
- # add short hostname, backup username and current unixtime (seconds from epoch)
- $ borg create /path/to/repo::{hostname}-{user}-{now:%s} ~
+ # Use short hostname, user name and current time in archive name
+ $ borg create /path/to/repo::{hostname}-{user}-{now} ~
+ $ borg create /path/to/repo::{hostname}-{user}-{now:%Y-%m-%d_%H:%M:%S} ~
.. include:: usage/extract.rst.inc
@@ -326,6 +327,9 @@ Examples
# Extract entire archive and list files while processing
$ borg extract -v --list /path/to/repo::my-files
+ # Verify whether an archive could be successfully extracted, but do not write files to disk
+ $ borg extract --dry-run /path/to/repo::my-files
+
# Extract the "src" directory
$ borg extract /path/to/repo::my-files home/USERNAME/src
@@ -645,6 +649,12 @@ Examples
...
+.. include:: usage/with-lock.rst.inc
+
+
+.. include:: usage/break-lock.rst.inc
+
+
Miscellaneous Help
------------------
@@ -814,13 +824,16 @@ Now, let's see how to restore some LVs from such a backup. ::
$ borg extract --stdout /path/to/repo::arch dev/vg0/home-snapshot > /dev/vg0/home
+.. _append-only-mode:
+
Append-only mode
~~~~~~~~~~~~~~~~
A repository can be made "append-only", which means that Borg will never overwrite or
-delete committed data. This is useful for scenarios where multiple machines back up to
-a central backup server using ``borg serve``, since a hacked machine cannot delete
-backups permanently.
+delete committed data (append-only refers to the segment files, but borg will also
+reject to delete the repository completely). This is useful for scenarios where a
+backup client machine backups remotely to a backup server using ``borg serve``, since
+a hacked client machine cannot delete backups on the server permanently.
To activate append-only mode, edit the repository ``config`` file and add a line
``append_only=1`` to the ``[repository]`` section (or edit the line if it exists).
@@ -881,6 +894,6 @@ repository. Make sure that backup client machines only get to access the reposit
Ensure that no remote access is possible if the repository is temporarily set to normal mode
for e.g. regular pruning.
-Further protections can be implemented, but are outside of Borgs scope. For example,
+Further protections can be implemented, but are outside of Borg's scope. For example,
file system snapshots or wrapping ``borg serve`` to set special permissions or ACLs on
new data files.
diff --git a/docs/usage/with-lock.rst.inc b/docs/usage/with-lock.rst.inc
new file mode 100644
index 000000000..3037ee809
--- /dev/null
+++ b/docs/usage/with-lock.rst.inc
@@ -0,0 +1,32 @@
+.. _borg_with-lock:
+
+borg with-lock
+--------------
+::
+
+ borg with-lock REPOSITORY COMMAND ARGS
+
+positional arguments
+ REPOSITORY
+ repository to lock
+ COMMAND
+ command to run
+ ARGS
+ command arguments
+
+`Common options`_
+ |
+
+Description
+~~~~~~~~~~~
+
+This command runs a user-specified command while the repository lock is held.
+
+It will first try to acquire the lock (make sure that no other operation is
+running in the repo), then execute the given command as a subprocess and wait
+for its termination, release the lock and return the user command's return
+code as borg's return code.
+
+Note: if you copy a repository with the lock held, the lock will be present in
+ the copy, obviously. Thus, before using borg on the copy, you need to
+ use "borg break-lock" on it.
diff --git a/setup.py b/setup.py
index 4a85bb0a2..5ad58c0a5 100644
--- a/setup.py
+++ b/setup.py
@@ -117,12 +117,13 @@ if sys.platform == 'win32':
windowsIncludeDirs.append(os.path.abspath(os.path.join(gccpath, "..")))
windowsIncludeDirs.append(os.path.abspath(os.path.join(gccpath, "..", "..")))
-
possible_openssl_prefixes = None
if sys.platform == 'win32':
possible_openssl_prefixes = windowsIncludeDirs
else:
- possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl', '/usr/local/borg', '/opt/local']
+ possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl',
+ '/usr/local/borg', '/opt/local', '/opt/pkg', ]
+
if os.environ.get('BORG_OPENSSL_PREFIX'):
possible_openssl_prefixes.insert(0, os.environ.get('BORG_OPENSSL_PREFIX'))
ssl_prefix = detect_openssl(possible_openssl_prefixes)
@@ -135,7 +136,9 @@ possible_lz4_prefixes = None
if sys.platform == 'win32':
possible_lz4_prefixes = windowsIncludeDirs
else:
- possible_lz4_prefixes = ['/usr', '/usr/local', '/usr/local/opt/lz4', '/usr/local/lz4', '/usr/local/borg', '/opt/local']
+ possible_lz4_prefixes = ['/usr', '/usr/local', '/usr/local/opt/lz4', '/usr/local/lz4',
+ '/usr/local/borg', '/opt/local', '/opt/pkg', ]
+
if os.environ.get('BORG_LZ4_PREFIX'):
possible_lz4_prefixes.insert(0, os.environ.get('BORG_LZ4_PREFIX'))
lz4_prefix = detect_lz4(possible_lz4_prefixes)
@@ -327,7 +330,7 @@ setup(
},
author='The Borg Collective (see AUTHORS file)',
author_email='borgbackup@python.org',
- url='https://borgbackup.readthedocs.org/',
+ url='https://borgbackup.readthedocs.io/',
description='Deduplicated, encrypted, authenticated and compressed backups',
long_description=long_description,
license='BSD',