Merge branch 'master' into multithreading

This commit is contained in:
Thomas Waldmann 2015-08-15 21:45:52 +02:00
commit 91d2cfa671
56 changed files with 2380 additions and 767 deletions

17
.coveragerc Normal file
View file

@ -0,0 +1,17 @@
[run]
branch = True
source = borg
omit =
borg/__init__.py
borg/__main__.py
borg/_version.py
[report]
exclude_lines =
pragma: no cover
def __repr__
raise AssertionError
raise NotImplementedError
if 0:
if __name__ == .__main__.:
ignore_errors = True

6
.gitignore vendored
View file

@ -6,6 +6,7 @@ env
.tox
hashindex.c
chunker.c
compress.c
crypto.c
platform_darwin.c
platform_freebsd.c
@ -16,3 +17,8 @@ platform_linux.c
*.so
docs/usage/*.inc
.idea/
.cache/
borg.build/
borg.dist/
borg.exe
.coverage

View file

@ -1,12 +1,47 @@
sudo: required
language: python
python:
- "3.2"
- "3.3"
- "3.4"
# command to install dependencies
cache:
directories:
- $HOME/.cache/pip
matrix:
include:
- python: 3.2
os: linux
env: TOXENV=py32
- python: 3.3
os: linux
env: TOXENV=py33
- python: 3.4
os: linux
env: TOXENV=py34
- language: generic
os: osx
osx_image: xcode6.4
env: TOXENV=py32
- language: generic
os: osx
osx_image: xcode6.4
env: TOXENV=py33
- language: generic
os: osx
osx_image: xcode6.4
env: TOXENV=py34
install:
- "sudo apt-get install -y libacl1-dev"
- "pip install --use-mirrors Cython"
- "pip install -e ."
# command to run tests
script: fakeroot -u py.test
- ./.travis/install.sh
script:
- ./.travis/run.sh
after_success:
- ./.travis/upload_coverage.sh
notifications:
irc:
channels:
- "irc.freenode.org#borgbackup"
use_notice: true
skip_join: true

47
.travis/install.sh Executable file
View file

@ -0,0 +1,47 @@
#!/bin/bash
set -e
set -x
if [[ "$(uname -s)" == 'Darwin' ]]; then
brew update || brew update
if [[ "${OPENSSL}" != "0.9.8" ]]; then
brew outdated openssl || brew upgrade openssl
fi
if which pyenv > /dev/null; then
eval "$(pyenv init -)"
fi
brew install lz4
brew outdated pyenv || brew upgrade pyenv
case "${TOXENV}" in
py32)
pyenv install 3.2.6
pyenv global 3.2.6
;;
py33)
pyenv install 3.3.6
pyenv global 3.3.6
;;
py34)
pyenv install 3.4.3
pyenv global 3.4.3
;;
esac
pyenv rehash
python -m pip install --user virtualenv
else
pip install virtualenv
sudo add-apt-repository -y ppa:gezakovacs/lz4
sudo apt-get update
sudo apt-get install -y liblz4-dev
sudo apt-get install -y libacl1-dev
fi
python -m virtualenv ~/.venv
source ~/.venv/bin/activate
pip install tox pytest pytest-cov codecov Cython
pip install -e .

23
.travis/run.sh Executable file
View file

@ -0,0 +1,23 @@
#!/bin/bash
set -e
set -x
if [[ "$(uname -s)" == "Darwin" ]]; then
eval "$(pyenv init -)"
if [[ "${OPENSSL}" != "0.9.8" ]]; then
# set our flags to use homebrew openssl
export ARCHFLAGS="-arch x86_64"
export LDFLAGS="-L/usr/local/opt/openssl/lib"
export CFLAGS="-I/usr/local/opt/openssl/include"
fi
fi
source ~/.venv/bin/activate
if [[ "$(uname -s)" == "Darwin" ]]; then
# no fakeroot on OS X
sudo tox -e $TOXENV
else
fakeroot -u tox
fi

13
.travis/upload_coverage.sh Executable file
View file

@ -0,0 +1,13 @@
#!/bin/bash
set -e
set -x
NO_COVERAGE_TOXENVS=(pep8)
if ! [[ "${NO_COVERAGE_TOXENVS[*]}" =~ "${TOXENV}" ]]; then
source ~/.venv/bin/activate
ln .tox/.coverage .coverage
# on osx, tests run as root, need access to .coverage
sudo chmod 666 .coverage
codecov -e TRAVIS_OS_NAME TOXENV
fi

View file

@ -2,7 +2,8 @@ Borg Developers / Contributors ("The Borg Collective")
``````````````````````````````````````````````````````
- Thomas Waldmann <tw@waldmann-edv.de>
- Antoine Beaupré
- Radek Podgorny <radek@podgorny.cz>
- Yuri D'Elia
Borg is a fork of Attic. Attic is written and maintained
by Jonas Borgström and various contributors:

View file

@ -2,36 +2,144 @@ Borg Changelog
==============
Version 0.25.0 (not released yet)
---------------------------------
Compatibility notes:
- the new compression code is very compatible: as long as you stay with zlib
compression, older borg releases will still be able to read data from a
repo/archive made with the new code (note: this is not the case for the
default "none" compression, use "zlib,0" if you want a "no compression" mode
that can be read by older borg). Also the new code is able to read repos and
archives made with older borg versions (for all zlib levels 0..9).
Deprecations:
- --compression N (with N being a number, as in 0.24) is deprecated.
We keep the --compression 0..9 for now to not break scripts, but it is
deprecated and will be removed later, so better fix your scripts now:
--compression 0 (as in 0.24) is the same as --compression zlib,0 (now).
BUT: if you do not want compression, you rather want --compression none
(which is the default).
--compression 1 (in 0.24) is the same as --compression zlib,1 (now)
--compression 9 (in 0.24) is the same as --compression zlib,9 (now)
New features:
- create --compression none (default, means: do not compress, just pass through
data "as is". this is more efficient than zlib level 0 as used in borg 0.24)
- create --compression lz4 (super-fast, but not very high compression)
Please note that borgbackup needs lz4 library as additional requirement.
- create --compression zlib,N (slower, higher compression, default for N is 6)
- create --compression lzma,N (slowest, highest compression, default N is 6)
- honor the nodump flag (UF_NODUMP) and do not backup such items
Bug fixes:
- close fds of segments we delete (during compaction)
Other changes:
- none yet
Version 0.24.0
--------------
Incompatible changes (compared to 0.23):
- borg now always issues --umask NNN option when invoking another borg via ssh
on the repository server. By that, it's making sure it uses the same umask
for remote repos as for local ones. Because of this, you must upgrade both
server and client(s) to 0.24.
- the default umask is 077 now (if you do not specify via --umask) which might
be a different one as you used previously. The default umask avoids that
you accidentally give access permissions for group and/or others to files
created by borg (e.g. the repository).
Deprecations:
- "--encryption passphrase" mode is deprecated, see #85 and #97.
See the new "--encryption repokey" mode for a replacement.
New features:
- borg create --chunker-params ... to configure the chunker.
- borg create --chunker-params ... to configure the chunker, fixes #16
(attic #302, attic #300, and somehow also #41).
This can be used to reduce memory usage caused by chunk management overhead,
so borg does not create a huge chunks index/repo index and eats all your RAM
if you back up lots of data in huge files (like VM disk images).
See docs/misc/create_chunker-params.txt for more information.
- borg info now reports chunk counts in the chunk index.
- borg create --compression 0..9 to select zlib compression level, fixes #66
(attic #295).
- borg init --encryption repokey (to store the encryption key into the repo),
fixes #85
- improve at-end error logging, always log exceptions and set exit_code=1
- LoggedIO: better error checks / exceptions / exception handling
- implement --remote-path to allow non-default-path borg locations, #125
- implement --umask M and use 077 as default umask for better security, #117
- borg check: give a named single archive to it, fixes #139
- cache sync: show progress indication
- cache sync: reimplement the chunk index merging in C
Bug fixes:
- reduce memory usage, see --chunker-params, fixes #16.
This can be used to reduce chunk management overhead, so borg does not create
a huge chunks index/repo index and eats all your RAM if you back up lots of
data in huge files (like VM disk images).
- better Exception msg if there is no Borg installed on the remote repo server.
- fix segfault that happened for unreadable files (chunker: n needs to be a
signed size_t), #116
- fix the repair mode, #144
- repo delete: add destroy to allowed rpc methods, fixes issue #114
- more compatible repository locking code (based on mkdir), maybe fixes #92
(attic #317, attic #201).
- better Exception msg if no Borg is installed on the remote repo server, #56
- create a RepositoryCache implementation that can cope with >2GiB,
fixes attic #326.
- fix Traceback when running check --repair, attic #232
- clarify help text, fixes #73.
- add help string for --no-files-cache, fixes #140
Other changes:
- Fedora/Fedora-based install instructions added to docs.
- added docs/misc directory for misc. writeups that won't be included "as is"
into the html docs.
- improved docs:
- added docs/misc directory for misc. writeups that won't be included
"as is" into the html docs.
- document environment variables and return codes (attic #324, attic #52)
- web site: add related projects, fix web site url, IRC #borgbackup
- Fedora/Fedora-based install instructions added to docs
- Cygwin-based install instructions added to docs
- updated AUTHORS
- add FAQ entries about redundancy / integrity
- clarify that borg extract uses the cwd as extraction target
- update internals doc about chunker params, memory usage and compression
- added docs about development
- add some words about resource usage in general
- document how to backup a raw disk
- add note about how to run borg from virtual env
- add solutions for (ll)fuse installation problems
- document what borg check does, fixes #138
- reorganize borgbackup.github.io sidebar, prev/next at top
- deduplicate and refactor the docs / README.rst
- use borg-tmp as prefix for temporary files / directories
- short prune options without "keep-" are deprecated, do not suggest them
- improved tox configuration
- remove usage of unittest.mock, always use mock from pypi
- use entrypoints instead of scripts, for better use of the wheel format and
modern installs
- add requirements.d/development.txt and modify tox.ini
- use travis-ci for testing based on Linux and (new) OS X
- use coverage.py, pytest-cov and codecov.io for test coverage support
I forgot to list some stuff already implemented in 0.23.0, here they are:
New features:
- efficient archive list from manifest, meaning a big speedup for slow
repo connections and "list <repo>", "delete <repo>", "prune"
repo connections and "list <repo>", "delete <repo>", "prune" (attic #242,
attic #167)
- big speedup for chunks cache sync (esp. for slow repo connections), fixes #18
- hashindex: improve error messages

View file

@ -1,4 +1,4 @@
include README.rst AUTHORS LICENSE CHANGES MANIFEST.in versioneer.py
include README.rst AUTHORS LICENSE CHANGES.rst MANIFEST.in versioneer.py
recursive-include borg *.pyx
recursive-include docs *
recursive-exclude docs *.pyc

View file

@ -1,13 +1,112 @@
|build|
What is BorgBackup?
-------------------
BorgBackup (short: Borg) is a deduplicating backup program.
Optionally, it supports compression and authenticated encryption.
What is Borg?
-------------
Borg is a deduplicating backup program. The main goal of Borg is to provide
an efficient and secure way to backup data. The data deduplication
technique used makes Borg suitable for daily backups since only changes
are stored.
The main goal of Borg is to provide an efficient and secure way to backup data.
The data deduplication technique used makes Borg suitable for daily backups
since only changes are stored.
The authenticated encryption technique makes it suitable for backups to not
fully trusted targets.
Borg is a fork of Attic and maintained by "The Borg Collective" (see AUTHORS file).
`Borg Installation docs <http://borgbackup.github.io/borgbackup/installation.html>`_
Main features
~~~~~~~~~~~~~
**Space efficient storage**
Deduplication based on content-defined chunking is used to reduce the number
of bytes stored: each file is split into a number of variable length chunks
and only chunks that have never been seen before are added to the repository.
To deduplicate, all the chunks in the same repository are considered, no
matter whether they come from different machines, from previous backups,
from the same backup or even from the same single file.
Compared to other deduplication approaches, this method does NOT depend on:
* file/directory names staying the same
So you can move your stuff around without killing the deduplication,
even between machines sharing a repo.
* complete files or time stamps staying the same
If a big file changes a little, only a few new chunks will be stored -
this is great for VMs or raw disks.
* the absolute position of a data chunk inside a file
Stuff may get shifted and will still be found by the deduplication
algorithm.
**Speed**
* performance critical code (chunking, compression, encryption) is
implemented in C/Cython
* local caching of files/chunks index data
* quick detection of unmodified files
**Data encryption**
All data can be protected using 256-bit AES encryption, data integrity and
authenticity is verified using HMAC-SHA256.
**Compression**
All data can be compressed by lz4 (super fast, low compression), zlib
(medium speed and compression) or lzma (low speed, high compression).
**Off-site backups**
Borg can store data on any remote host accessible over SSH. If Borg is
installed on the remote host, big performance gains can be achieved
compared to using a network filesystem (sshfs, nfs, ...).
**Backups mountable as filesystems**
Backup archives are mountable as userspace filesystems for easy interactive
backup examination and restores (e.g. by using a regular file manager).
**Platforms Borg works on**
* Linux
* FreeBSD
* Mac OS X
* Cygwin (unsupported)
**Free and Open Source Software**
* security and functionality can be audited independently
* licensed under the BSD (3-clause) license
Easy to use
~~~~~~~~~~~
Initialize a new backup repository and create a backup archive::
$ borg init /mnt/backup
$ borg create /mnt/backup::Monday ~/Documents
Now doing another backup, just to show off the great deduplication::
$ borg create --stats /mnt/backup::Tuesday ~/Documents
Archive name: Tuesday
Archive fingerprint: 387a5e3f9b0e792e91c...
Start time: Tue Mar 25 12:00:10 2014
End time: Tue Mar 25 12:00:10 2014
Duration: 0.08 seconds
Number of files: 358
Original size Compressed size Deduplicated size
This archive: 57.16 MB 46.78 MB 151.67 kB <--- !
All archives: 114.02 MB 93.46 MB 44.81 MB
For a graphical frontend refer to our complementary project
`BorgWeb <https://github.com/borgbackup/borgweb>`_.
Notes
-----
Borg is a fork of `Attic <https://github.com/jborg/attic>`_ and maintained by
"`The Borg Collective <https://github.com/borgbackup/borg/blob/master/AUTHORS>`_".
Read `issue #1 <https://github.com/borgbackup/borg/issues/1>`_ about the initial
considerations regarding project goals and policy of the Borg project.
BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC.
EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY WHEN MAJOR RELEASE NUMBER
@ -17,61 +116,15 @@ NOT RELEASED DEVELOPMENT VERSIONS HAVE UNKNOWN COMPATIBILITY PROPERTIES.
THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS.
Read issue #1 on the issue tracker, goals are being defined there.
For more information, please also see the
`LICENSE <https://github.com/borgbackup/borg/blob/master/LICENSE>`_.
Please also see the LICENSE for more informations.
Easy to use
~~~~~~~~~~~
Initialize backup repository and create a backup archive::
$ borg init /mnt/backup
$ borg create -v /mnt/backup::documents ~/Documents
Main features
~~~~~~~~~~~~~
Space efficient storage
Variable block size deduplication is used to reduce the number of bytes
stored by detecting redundant data. Each file is split into a number of
variable length chunks and only chunks that have never been seen before are
compressed and added to the repository.
Optional data encryption
All data can be protected using 256-bit AES encryption and data integrity
and authenticity is verified using HMAC-SHA256.
Off-site backups
Borg can store data on any remote host accessible over SSH. This is
most efficient if Borg is also installed on the remote host.
Backups mountable as filesystems
Backup archives are mountable as userspace filesystems for easy backup
verification and restores.
What do I need?
---------------
Borg requires Python 3.2 or above to work.
Borg also requires a sufficiently recent OpenSSL (>= 1.0.0).
In order to mount archives as filesystems, llfuse is required.
How do I install it?
--------------------
::
$ pip3 install borgbackup
Where are the docs?
-------------------
Go to https://borgbackup.github.io/ for a prebuilt version of the documentation.
You can also build it yourself from the docs folder.
Where are the tests?
--------------------
The tests are in the borg/testsuite package. To run the test suite use the
following command::
$ fakeroot -u tox # you need to have tox and pytest installed
|build| |coverage|
.. |build| image:: https://travis-ci.org/borgbackup/borg.svg
:alt: Build Status
:target: https://travis-ci.org/borgbackup/borg
.. |coverage| image:: http://codecov.io/github/borgbackup/borg/coverage.svg?branch=master
:alt: Test Coverage
:target: http://codecov.io/github/borgbackup/borg?branch=master

View file

@ -1,4 +1,3 @@
#!/usr/bin/env python
from borg.archiver import main
main()

View file

@ -127,7 +127,8 @@ chunker_free(Chunker *c)
static int
chunker_fill(Chunker *c, PyThreadState **tstatep)
{
size_t n;
ssize_t n;
PyObject *data;
memmove(c->data, c->data + c->last, c->position + c->remaining - c->last);
c->position -= c->last;
c->last = 0;

View file

@ -145,10 +145,12 @@ hashindex_read(const char *path)
bytes_read = fread(&header, 1, sizeof(HashHeader), fd);
if(bytes_read != sizeof(HashHeader)) {
if(ferror(fd)) {
EPRINTF_PATH(path, "fread header failed (expected %ld, got %ld)", sizeof(HashHeader), bytes_read);
EPRINTF_PATH(path, "fread header failed (expected %ju, got %ju)",
(uintmax_t) sizeof(HashHeader), (uintmax_t) bytes_read);
}
else {
EPRINTF_MSG_PATH(path, "fread header failed (expected %ld, got %ld)", sizeof(HashHeader), bytes_read);
EPRINTF_MSG_PATH(path, "fread header failed (expected %ju, got %ju)",
(uintmax_t) sizeof(HashHeader), (uintmax_t) bytes_read);
}
goto fail;
}
@ -170,7 +172,8 @@ hashindex_read(const char *path)
}
buckets_length = (off_t)_le32toh(header.num_buckets) * (header.key_size + header.value_size);
if(length != sizeof(HashHeader) + buckets_length) {
EPRINTF_MSG_PATH(path, "Incorrect file length (expected %ld, got %ld)", sizeof(HashHeader) + buckets_length, length);
EPRINTF_MSG_PATH(path, "Incorrect file length (expected %ju, got %ju)",
(uintmax_t) sizeof(HashHeader) + buckets_length, (uintmax_t) length);
goto fail;
}
if(!(index = malloc(sizeof(HashIndex)))) {
@ -186,10 +189,12 @@ hashindex_read(const char *path)
bytes_read = fread(index->buckets, 1, buckets_length, fd);
if(bytes_read != buckets_length) {
if(ferror(fd)) {
EPRINTF_PATH(path, "fread buckets failed (expected %ld, got %ld)", buckets_length, bytes_read);
EPRINTF_PATH(path, "fread buckets failed (expected %ju, got %ju)",
(uintmax_t) buckets_length, (uintmax_t) bytes_read);
}
else {
EPRINTF_MSG_PATH(path, "fread buckets failed (expected %ld, got %ld)", buckets_length, bytes_read);
EPRINTF_MSG_PATH(path, "fread buckets failed (expected %ju, got %ju)",
(uintmax_t) buckets_length, (uintmax_t) bytes_read);
}
free(index->buckets);
free(index);
@ -385,3 +390,22 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs
*total_unique_chunks = unique_chunks;
*total_chunks = chunks;
}
static void
hashindex_merge(HashIndex *index, HashIndex *other)
{
int32_t key_size = index->key_size;
const int32_t *other_values;
int32_t *my_values;
void *key = NULL;
while((key = hashindex_next_key(other, key))) {
other_values = key + key_size;
my_values = (int32_t *)hashindex_get(index, key);
if(my_values == NULL) {
hashindex_set(index, key, other_values);
} else {
*my_values += *other_values;
}
}
}

View file

@ -2,8 +2,6 @@ from datetime import datetime
from getpass import getuser
from itertools import groupby
import errno
import shutil
import tempfile
import threading
from .key import key_factory
from .remote import cache_if_remote
@ -110,7 +108,7 @@ class ChunkBuffer:
class CacheChunkBuffer(ChunkBuffer):
def __init__(self, cache, key, stats, chunker_params=CHUNKER_PARAMS):
super(CacheChunkBuffer, self).__init__(key, chunker_params)
super().__init__(key, chunker_params)
self.cache = cache
self.stats = stats
@ -317,7 +315,6 @@ class Archive:
class IncompatibleFilesystemEncodingError(Error):
"""Failed to encode filename "{}" into file system encoding "{}". Consider configuring the LANG environment variable."""
def __init__(self, repository, key, manifest, name, cache=None, create=False,
checkpoint_interval=300, numeric_owner=False, progress=False,
chunker_params=CHUNKER_PARAMS):
@ -432,9 +429,11 @@ class Archive:
count, size, csize = cache.chunks[id]
stats.update(size, csize, count == 1)
cache.chunks[id] = count - 1, size, csize
def add_file_chunks(chunks):
for id, _, _ in chunks:
add(id)
# This function is a bit evil since it abuses the cache to calculate
# the stats. The cache transaction must be rolled back afterwards
unpacker = msgpack.Unpacker(use_list=False)
@ -751,13 +750,13 @@ class Archive:
return Archive._open_rb(path, st)
class RobustUnpacker():
class RobustUnpacker:
"""A restartable/robust version of the streaming msgpack unpacker
"""
item_keys = [msgpack.packb(name) for name in ('path', 'mode', 'source', 'chunks', 'rdev', 'xattrs', 'user', 'group', 'uid', 'gid', 'mtime')]
def __init__(self, validator):
super(RobustUnpacker, self).__init__()
super().__init__()
self.validator = validator
self._buffered_data = []
self._resync = False
@ -815,13 +814,10 @@ class ArchiveChecker:
def __init__(self):
self.error_found = False
self.possibly_superseded = set()
self.tmpdir = tempfile.mkdtemp()
def __del__(self):
shutil.rmtree(self.tmpdir)
def check(self, repository, repair=False, last=None):
def check(self, repository, repair=False, archive=None, last=None):
self.report_progress('Starting archive consistency check...')
self.check_all = archive is None and last is None
self.repair = repair
self.repository = repository
self.init_chunks()
@ -830,11 +826,9 @@ class ArchiveChecker:
self.manifest = self.rebuild_manifest()
else:
self.manifest, _ = Manifest.load(repository, key=self.key)
self.rebuild_refcounts(last=last)
if last is None:
self.verify_chunks()
else:
self.report_progress('Orphaned objects check skipped (needs all archives checked)')
self.rebuild_refcounts(archive=archive, last=last)
self.orphan_chunks_check()
self.finish()
if not self.error_found:
self.report_progress('Archive consistency check complete, no problems found.')
return self.repair or not self.error_found
@ -842,7 +836,7 @@ class ArchiveChecker:
def init_chunks(self):
"""Fetch a list of all object keys from repository
"""
# Explicity set the initial hash table capacity to avoid performance issues
# Explicitly set the initial hash table capacity to avoid performance issues
# due to hash table "resonance"
capacity = int(len(self.repository) * 1.2)
self.chunks = ChunkIndex(capacity)
@ -891,7 +885,7 @@ class ArchiveChecker:
self.report_progress('Manifest rebuild complete', error=True)
return manifest
def rebuild_refcounts(self, last=None):
def rebuild_refcounts(self, archive=None, last=None):
"""Rebuild object reference counts by walking the metadata
Missing and/or incorrect data is repaired when detected
@ -966,13 +960,24 @@ class ArchiveChecker:
for chunk_id, cdata in zip(items, repository.get_many(items)):
unpacker.feed(self.key.decrypt(chunk_id, cdata))
for item in unpacker:
if not isinstance(item, dict):
self.report_progress('Did not get expected metadata dict - archive corrupted!',
error=True)
continue
yield item
repository = cache_if_remote(self.repository)
num_archives = len(self.manifest.archives)
archive_items = sorted(self.manifest.archives.items(), reverse=True,
key=lambda name_info: name_info[1][b'time'])
end = None if last is None else min(num_archives, last)
if archive is None:
# we need last N or all archives
archive_items = sorted(self.manifest.archives.items(), reverse=True,
key=lambda name_info: name_info[1][b'time'])
num_archives = len(self.manifest.archives)
end = None if last is None else min(num_archives, last)
else:
# we only want one specific archive
archive_items = [item for item in self.manifest.archives.items() if item[0] == archive]
num_archives = 1
end = 1
for i, (name, info) in enumerate(archive_items[:end]):
self.report_progress('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives))
archive_id = info[b'id']
@ -1003,17 +1008,22 @@ class ArchiveChecker:
add_reference(new_archive_id, len(data), len(cdata), cdata)
info[b'id'] = new_archive_id
def verify_chunks(self):
unused = set()
for id_, (count, size, csize) in self.chunks.iteritems():
if count == 0:
unused.add(id_)
orphaned = unused - self.possibly_superseded
if orphaned:
self.report_progress('{} orphaned objects found'.format(len(orphaned)), error=True)
def orphan_chunks_check(self):
if self.check_all:
unused = set()
for id_, (count, size, csize) in self.chunks.iteritems():
if count == 0:
unused.add(id_)
orphaned = unused - self.possibly_superseded
if orphaned:
self.report_progress('{} orphaned objects found'.format(len(orphaned)), error=True)
if self.repair:
for id_ in unused:
self.repository.delete(id_)
else:
self.report_progress('Orphaned objects check skipped (needs all archives checked)')
def finish(self):
if self.repair:
for id_ in unused:
self.repository.delete(id_)
self.manifest.write()
self.repository.commit()

View file

@ -14,6 +14,7 @@ import traceback
from . import __version__
from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS
from .compress import Compressor, COMPR_BUFFER
from .repository import Repository
from .cache import Cache
from .key import key_creator
@ -21,9 +22,11 @@ from .helpers import Error, location_validator, format_time, format_file_size, \
format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \
get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
is_cachedir, bigint_to_int, ChunkerParams
is_cachedir, bigint_to_int, ChunkerParams, CompressionSpec
from .remote import RepositoryServer, RemoteRepository
has_lchflags = hasattr(os, 'lchflags')
class Archiver:
@ -85,8 +88,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
print('Repository check complete, no problems found.')
else:
return 1
if not args.repo_only and not ArchiveChecker().check(repository, repair=args.repair, last=args.last):
return 1
if not args.repo_only and not ArchiveChecker().check(
repository, repair=args.repair, archive=args.repository.archive, last=args.last):
return 1
return 0
def do_change_passphrase(self, args):
@ -101,7 +105,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
t0 = datetime.now()
repository = self.open_repository(args.archive, exclusive=True)
manifest, key = Manifest.load(repository)
key.compression_level = args.compression
compr_args = dict(buffer=COMPR_BUFFER)
compr_args.update(args.compression)
key.compressor = Compressor(**compr_args)
cache = Cache(repository, key, manifest, do_files=args.cache_files)
archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
create=True, checkpoint_interval=args.checkpoint_interval,
@ -174,6 +180,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
if restrict_dev and st.st_dev != restrict_dev:
return
status = None
# Ignore if nodump flag is set
if has_lchflags and (st.st_flags & stat.UF_NODUMP):
return
if stat.S_ISREG(st.st_mode):
try:
status = archive.process_file(path, st, cache)
@ -223,7 +232,6 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
# be restrictive when restoring files, restore permissions later
if sys.getfilesystemencoding() == 'ascii':
print('Warning: File system encoding is "ascii", extracting non-ascii filenames will not be supported.')
os.umask(0o077)
repository = self.open_repository(args.archive)
manifest, key = Manifest.load(repository)
archive = Archive(repository, key, manifest, args.archive.archive,
@ -291,11 +299,13 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
print("You requested to completely DELETE the repository *including* all archives it contains:")
for archive_info in manifest.list_archive_infos(sort_by='ts'):
print(format_archive(archive_info))
print("""Type "YES" if you understand this and want to continue.\n""")
if input('Do you want to continue? ') == 'YES':
repository.destroy()
cache.destroy()
print("Repository and corresponding cache were deleted.")
while not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
print("""Type "YES" if you understand this and want to continue.\n""")
if input('Do you want to continue? ') == 'YES':
break
repository.destroy()
cache.destroy()
print("Repository and corresponding cache were deleted.")
return self.exit_code
def do_mount(self, args):
@ -330,34 +340,38 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
repository = self.open_repository(args.src)
manifest, key = Manifest.load(repository)
if args.src.archive:
tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 0o10: '-', 0o12: 'l', 0o14: 's'}
archive = Archive(repository, key, manifest, args.src.archive)
for item in archive.iter_items():
type = tmap.get(item[b'mode'] // 4096, '?')
mode = format_file_mode(item[b'mode'])
size = 0
if type == '-':
if args.short:
for item in archive.iter_items():
print(remove_surrogates(item[b'path']))
else:
tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 0o10: '-', 0o12: 'l', 0o14: 's'}
for item in archive.iter_items():
type = tmap.get(item[b'mode'] // 4096, '?')
mode = format_file_mode(item[b'mode'])
size = 0
if type == '-':
try:
size = sum(size for _, size, _ in item[b'chunks'])
except KeyError:
pass
try:
size = sum(size for _, size, _ in item[b'chunks'])
except KeyError:
pass
try:
mtime = datetime.fromtimestamp(bigint_to_int(item[b'mtime']) / 1e9)
except ValueError:
# likely a broken mtime and datetime did not want to go beyond year 9999
mtime = datetime(9999, 12, 31, 23, 59, 59)
if b'source' in item:
if type == 'l':
extra = ' -> %s' % item[b'source']
mtime = datetime.fromtimestamp(bigint_to_int(item[b'mtime']) / 1e9)
except ValueError:
# likely a broken mtime and datetime did not want to go beyond year 9999
mtime = datetime(9999, 12, 31, 23, 59, 59)
if b'source' in item:
if type == 'l':
extra = ' -> %s' % item[b'source']
else:
type = 'h'
extra = ' link to %s' % item[b'source']
else:
type = 'h'
extra = ' link to %s' % item[b'source']
else:
extra = ''
print('%s%s %-6s %-6s %8d %s %s%s' % (
type, mode, item[b'user'] or item[b'uid'],
item[b'group'] or item[b'gid'], size, format_time(mtime),
remove_surrogates(item[b'path']), extra))
extra = ''
print('%s%s %-6s %-6s %8d %s %s%s' % (
type, mode, item[b'user'] or item[b'uid'],
item[b'group'] or item[b'gid'], size, format_time(mtime),
remove_surrogates(item[b'path']), extra))
else:
for archive_info in manifest.list_archive_infos(sort_by='ts'):
print(format_archive(archive_info))
@ -511,7 +525,12 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
common_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
default=False,
help='verbose output')
common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false')
common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false',
help='do not load/update the file metadata cache used to detect unchanged files')
common_parser.add_argument('--umask', dest='umask', type=lambda s: int(s, 8), default=0o077, metavar='M',
help='set umask to M (local and remote, default: 0o077)')
common_parser.add_argument('--remote-path', dest='remote_path', default='borg', metavar='PATH',
help='set remote path to executable (default: "borg")')
# We can't use argparse for "serve" since we don't want it to show up in "Available commands"
if args:
@ -533,6 +552,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
This command initializes an empty repository. A repository is a filesystem
directory containing the deduplicated data from zero or more archives.
Encryption can be enabled at repository init time.
Please note that the 'passphrase' encryption mode is DEPRECATED (instead of it,
consider using 'repokey').
""")
subparser = subparsers.add_parser('init', parents=[common_parser],
description=self.do_init.__doc__, epilog=init_epilog,
@ -542,27 +563,51 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
type=location_validator(archive=False),
help='repository to create')
subparser.add_argument('-e', '--encryption', dest='encryption',
choices=('none', 'passphrase', 'keyfile'), default='none',
help='select encryption method')
choices=('none', 'keyfile', 'repokey', 'passphrase'), default='none',
help='select encryption key mode')
check_epilog = textwrap.dedent("""
The check command verifies the consistency of a repository and the corresponding
archives. The underlying repository data files are first checked to detect bit rot
and other types of damage. After that the consistency and correctness of the archive
metadata is verified.
The check command verifies the consistency of a repository and the corresponding archives.
The archive metadata checks can be time consuming and requires access to the key
file and/or passphrase if encryption is enabled. These checks can be skipped using
the --repository-only option.
First, the underlying repository data files are checked:
- For all segments the segment magic (header) is checked
- For all objects stored in the segments, all metadata (e.g. crc and size) and
all data is read. The read data is checked by size and CRC. Bit rot and other
types of accidental damage can be detected this way.
- If we are in repair mode and a integrity error is detected for a segment,
we try to recover as many objects from the segment as possible.
- In repair mode, it makes sure that the index is consistent with the data
stored in the segments.
- If you use a remote repo server via ssh:, the repo check is executed on the
repo server without causing significant network traffic.
- The repository check can be skipped using the --archives-only option.
Second, the consistency and correctness of the archive metadata is verified:
- Is the repo manifest present? If not, it is rebuilt from archive metadata
chunks (this requires reading and decrypting of all metadata and data).
- Check if archive metadata chunk is present. if not, remove archive from
manifest.
- For all files (items) in the archive, for all chunks referenced by these
files, check if chunk is present (if not and we are in repair mode, replace
it with a same-size chunk of zeros). This requires reading of archive and
file metadata, but not data.
- If we are in repair mode and we checked all the archives: delete orphaned
chunks from the repo.
- if you use a remote repo server via ssh:, the archive check is executed on
the client machine (because if encryption is enabled, the checks will require
decryption and this is always done client-side, because key access will be
required).
- The archive checks can be time consuming, they can be skipped using the
--repository-only option.
""")
subparser = subparsers.add_parser('check', parents=[common_parser],
description=self.do_check.__doc__,
epilog=check_epilog,
formatter_class=argparse.RawDescriptionHelpFormatter)
subparser.set_defaults(func=self.do_check)
subparser.add_argument('repository', metavar='REPOSITORY',
type=location_validator(archive=False),
help='repository to check consistency of')
subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE',
type=location_validator(),
help='repository or archive to check consistency of')
subparser.add_argument('--repository-only', dest='repo_only', action='store_true',
default=False,
help='only perform repository checks')
@ -593,7 +638,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
traversing all paths specified. The archive will consume almost no disk space for
files or parts of files that have already been stored in other archives.
See "borg help patterns" for more help on exclude patterns.
See the output of the "borg help patterns" command for more help on exclude patterns.
""")
subparser = subparsers.add_parser('create', parents=[common_parser],
@ -635,9 +680,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE',
help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS)
subparser.add_argument('-C', '--compression', dest='compression',
type=int, default=0, metavar='N',
help='select compression algorithm and level. 0..9 is supported and means zlib '
'level 0 (no compression, fast, default) .. zlib level 9 (high compression, slow).')
type=CompressionSpec, default=dict(name='none'), metavar='COMPRESSION',
help='select compression algorithm (and level): '
'none == no compression (default), '
'lz4 == lz4, '
'zlib == zlib (default level 6), '
'zlib,0 .. zlib,9 == zlib (with level 0..9), '
'lzma == lzma (default level 6), '
'lzma,0 .. lzma,9 == lzma (with level 0..9).')
subparser.add_argument('archive', metavar='ARCHIVE',
type=location_validator(archive=True),
help='archive to create')
@ -650,7 +700,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
by passing a list of ``PATHs`` as arguments. The file selection can further
be restricted by using the ``--exclude`` option.
See "borg help patterns" for more help on exclude patterns.
See the output of the "borg help patterns" command for more help on exclude patterns.
""")
subparser = subparsers.add_parser('extract', parents=[common_parser],
description=self.do_extract.__doc__,
@ -723,6 +773,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
epilog=list_epilog,
formatter_class=argparse.RawDescriptionHelpFormatter)
subparser.set_defaults(func=self.do_list)
subparser.add_argument('--short', dest='short',
action='store_true', default=False,
help='only print file/directory names, nothing else')
subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', type=location_validator(),
help='repository/archive to list contents of')
mount_epilog = textwrap.dedent("""
@ -822,11 +875,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
args = parser.parse_args(args or ['-h'])
self.verbose = args.verbose
os.umask(args.umask)
RemoteRepository.remote_path = args.remote_path
RemoteRepository.umask = args.umask
update_excludes(args)
return args.func(args)
def sig_info_handler(signum, stack):
def sig_info_handler(signum, stack): # pragma: no cover
"""search the stack for infos about the currently processed file and print them"""
for frame in inspect.getouterframes(stack):
func, loc = frame[3], frame[0].f_locals
@ -849,7 +905,7 @@ def sig_info_handler(signum, stack):
break
def setup_signal_handlers():
def setup_signal_handlers(): # pragma: no cover
sigs = []
if hasattr(signal, 'SIGUSR1'):
sigs.append(signal.SIGUSR1) # kill -USR1 pid
@ -859,7 +915,7 @@ def setup_signal_handlers():
signal.signal(sig, sig_info_handler)
def main():
def main(): # pragma: no cover
# Make sure stdout and stderr have errors='replace') to avoid unicode
# issues when print()-ing unicode file names
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, sys.stdout.encoding, 'replace', line_buffering=True)

View file

@ -11,8 +11,9 @@ import tarfile
import tempfile
from .key import PlaintextKey
from .helpers import Error, get_cache_dir, decode_dict, st_mtime_ns, unhexlify, UpgradableLock, int_to_bigint, \
from .helpers import Error, get_cache_dir, decode_dict, st_mtime_ns, unhexlify, int_to_bigint, \
bigint_to_int
from .locking import UpgradableLock
from .hashindex import ChunkIndex
@ -22,7 +23,6 @@ class Cache:
class RepositoryReplay(Error):
"""Cache is newer than repository, refusing to continue"""
class CacheInitAbortedError(Error):
"""Cache initialization aborted"""
@ -129,7 +129,7 @@ class Cache:
def open(self):
if not os.path.isdir(self.path):
raise Exception('%s Does not look like a Borg cache' % self.path)
self.lock = UpgradableLock(os.path.join(self.path, 'config'), exclusive=True)
self.lock = UpgradableLock(os.path.join(self.path, 'lock'), exclusive=True).acquire()
self.rollback()
def close(self):
@ -292,6 +292,9 @@ class Cache:
add(chunk_idx, item_id, len(data), len(chunk))
unpacker.feed(data)
for item in unpacker:
if not isinstance(item, dict):
print('Error: Did not get expected metadata dict - archive corrupted!')
continue
if b'chunks' in item:
for chunk_id, size, csize in item[b'chunks']:
add(chunk_idx, chunk_id, size, csize)
@ -308,17 +311,20 @@ class Cache:
chunk_idx.clear()
for tarinfo in tf_in:
archive_id_hex = tarinfo.name
archive_name = tarinfo.pax_headers['archive_name']
print("- extracting archive %s ..." % archive_name)
tf_in.extract(archive_id_hex, tmp_dir)
chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8')
print("- reading archive ...")
archive_chunk_idx = ChunkIndex.read(chunk_idx_path)
for chunk_id, (count, size, csize) in archive_chunk_idx.iteritems():
add(chunk_idx, chunk_id, size, csize, incr=count)
print("- merging archive ...")
chunk_idx.merge(archive_chunk_idx)
os.unlink(chunk_idx_path)
self.begin_txn()
print('Synchronizing chunks cache...')
# XXX we have to do stuff on disk due to lacking ChunkIndex api
with tempfile.TemporaryDirectory() as tmp_dir:
with tempfile.TemporaryDirectory(prefix='borg-tmp') as tmp_dir:
repository = cache_if_remote(self.repository)
out_archive = open_out_archive()
in_archive = open_in_archive()

199
borg/compress.pyx Normal file
View file

@ -0,0 +1,199 @@
import zlib
try:
import lzma
except ImportError:
lzma = None
cdef extern from "lz4.h":
int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
cdef class CompressorBase:
"""
base class for all (de)compression classes,
also handles compression format auto detection and
adding/stripping the ID header (which enable auto detection).
"""
ID = b'\xFF\xFF' # reserved and not used
# overwrite with a unique 2-bytes bytestring in child classes
name = 'baseclass'
@classmethod
def detect(cls, data):
return data.startswith(cls.ID)
def __init__(self, **kwargs):
pass
def compress(self, data):
# add ID bytes
return self.ID + data
def decompress(self, data):
# strip ID bytes
return data[2:]
class CNONE(CompressorBase):
"""
none - no compression, just pass through data
"""
ID = b'\x00\x00'
name = 'none'
def compress(self, data):
return super().compress(data)
def decompress(self, data):
data = super().decompress(data)
if not isinstance(data, bytes):
data = bytes(data)
return data
cdef class LZ4(CompressorBase):
"""
raw LZ4 compression / decompression (liblz4).
Features:
- lz4 is super fast
- wrapper releases CPython's GIL to support multithreaded code
- buffer given by caller, avoiding frequent reallocation and buffer duplication
- uses safe lz4 methods that never go beyond the end of the output buffer
But beware:
- this is not very generic, the given buffer MUST be large enough to
handle all compression or decompression output (or it will fail).
- you must not do method calls to the same LZ4 instance from different
threads at the same time - create one LZ4 instance per thread!
"""
ID = b'\x01\x00'
name = 'lz4'
cdef char *buffer # helper buffer for (de)compression output
cdef int bufsize # size of this buffer
def __cinit__(self, **kwargs):
buffer = kwargs['buffer']
self.buffer = buffer
self.bufsize = len(buffer)
def compress(self, idata):
if not isinstance(idata, bytes):
idata = bytes(idata) # code below does not work with memoryview
cdef int isize = len(idata)
cdef int osize = self.bufsize
cdef char *source = idata
cdef char *dest = self.buffer
with nogil:
osize = LZ4_compress_limitedOutput(source, dest, isize, osize)
if not osize:
raise Exception('lz4 compress failed')
return super().compress(dest[:osize])
def decompress(self, idata):
if not isinstance(idata, bytes):
idata = bytes(idata) # code below does not work with memoryview
idata = super().decompress(idata)
cdef int isize = len(idata)
cdef int osize = self.bufsize
cdef char *source = idata
cdef char *dest = self.buffer
with nogil:
osize = LZ4_decompress_safe(source, dest, isize, osize)
if osize < 0:
# malformed input data, buffer too small, ...
raise Exception('lz4 decompress failed')
return dest[:osize]
class LZMA(CompressorBase):
"""
lzma compression / decompression (python 3.3+ stdlib)
"""
ID = b'\x02\x00'
name = 'lzma'
def __init__(self, level=6, **kwargs):
super().__init__(**kwargs)
self.level = level
if lzma is None:
raise ValueError('No lzma support found.')
def compress(self, data):
# we do not need integrity checks in lzma, we do that already
data = lzma.compress(data, preset=self.level, check=lzma.CHECK_NONE)
return super().compress(data)
def decompress(self, data):
data = super().decompress(data)
return lzma.decompress(data)
class ZLIB(CompressorBase):
"""
zlib compression / decompression (python stdlib)
"""
ID = b'\x08\x00' # not used here, see detect()
# avoid all 0x.8.. IDs elsewhere!
name = 'zlib'
@classmethod
def detect(cls, data):
# matches misc. patterns 0x.8.. used by zlib
cmf, flg = data[:2]
is_deflate = cmf & 0x0f == 8
check_ok = (cmf * 256 + flg) % 31 == 0
return check_ok and is_deflate
def __init__(self, level=6, **kwargs):
super().__init__(**kwargs)
self.level = level
def compress(self, data):
# note: for compatibility no super call, do not add ID bytes
return zlib.compress(data, self.level)
def decompress(self, data):
# note: for compatibility no super call, do not strip ID bytes
return zlib.decompress(data)
COMPRESSOR_TABLE = {
CNONE.name: CNONE,
LZ4.name: LZ4,
ZLIB.name: ZLIB,
LZMA.name: LZMA,
}
COMPRESSOR_LIST = [LZ4, CNONE, ZLIB, LZMA, ] # check fast stuff first
def get_compressor(name, **kwargs):
cls = COMPRESSOR_TABLE[name]
return cls(**kwargs)
class Compressor:
"""
compresses using a compressor with given name and parameters
decompresses everything we can handle (autodetect)
"""
def __init__(self, name='null', **kwargs):
self.params = kwargs
self.compressor = get_compressor(name, **self.params)
def compress(self, data):
return self.compressor.compress(data)
def decompress(self, data):
hdr = bytes(data[:2]) # detect() does not work with memoryview
for cls in COMPRESSOR_LIST:
if cls.detect(hdr):
return cls(**self.params).decompress(data)
else:
raise ValueError('No decompressor for this data found: %r.', data[:2])
# a buffer used for (de)compression result, which can be slightly bigger
# than the chunk buffer in the worst (incompressible data) case, add 10%:
COMPR_BUFFER = bytes(int(1.1 * 2 ** 23)) # CHUNK_MAX_EXP == 23

View file

@ -17,7 +17,7 @@ have_fuse_mtime_ns = hasattr(llfuse.EntryAttributes, 'st_mtime_ns')
class ItemCache:
def __init__(self):
self.fd = tempfile.TemporaryFile()
self.fd = tempfile.TemporaryFile(prefix='borg-tmp')
self.offset = 1000000
def add(self, item):
@ -34,7 +34,7 @@ class FuseOperations(llfuse.Operations):
"""Export archive as a fuse filesystem
"""
def __init__(self, key, repository, manifest, archive):
super(FuseOperations, self).__init__()
super().__init__()
self._inode_count = 0
self.key = key
self.repository = cache_if_remote(repository)

View file

@ -14,6 +14,7 @@ cdef extern from "_hashindex.c":
void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize,
long long *unique_size, long long *unique_csize,
long long *total_unique_chunks, long long *total_chunks)
void hashindex_merge(HashIndex *index, HashIndex *other)
int hashindex_get_size(HashIndex *index)
int hashindex_write(HashIndex *index, char *path)
void *hashindex_get(HashIndex *index, void *key)
@ -24,15 +25,18 @@ cdef extern from "_hashindex.c":
int _le32toh(int v)
_NoDefault = object()
cdef _NoDefault = object()
cimport cython
@cython.internal
cdef class IndexBase:
cdef HashIndex *index
key_size = 32
def __cinit__(self, capacity=0, path=None):
if path:
self.index = hashindex_read(<bytes>os.fsencode(path))
self.index = hashindex_read(os.fsencode(path))
if not self.index:
raise Exception('hashindex_read failed')
else:
@ -49,7 +53,7 @@ cdef class IndexBase:
return cls(path=path)
def write(self, path):
if not hashindex_write(self.index, <bytes>os.fsencode(path)):
if not hashindex_write(self.index, os.fsencode(path)):
raise Exception('hashindex_write failed')
def clear(self):
@ -187,6 +191,9 @@ cdef class ChunkIndex(IndexBase):
&total_unique_chunks, &total_chunks)
return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks
def merge(self, ChunkIndex other):
hashindex_merge(self.index, other.index)
cdef class ChunkKeyIterator:
cdef ChunkIndex idx

View file

@ -2,7 +2,6 @@ import argparse
import binascii
from collections import namedtuple
import grp
import msgpack
import os
import pwd
import queue
@ -12,7 +11,8 @@ import time
from datetime import datetime, timezone, timedelta
from fnmatch import translate
from operator import attrgetter
import fcntl
import msgpack
from . import hashindex
from . import chunker
@ -34,52 +34,15 @@ class ExtensionModuleError(Error):
"""The Borg binary extension modules do not seem to be properly installed"""
class UpgradableLock:
class ReadLockFailed(Error):
"""Failed to acquire read lock on {}"""
class WriteLockFailed(Error):
"""Failed to acquire write lock on {}"""
def __init__(self, path, exclusive=False):
self.path = path
try:
self.fd = open(path, 'r+')
except IOError:
self.fd = open(path, 'r')
try:
if exclusive:
fcntl.lockf(self.fd, fcntl.LOCK_EX)
else:
fcntl.lockf(self.fd, fcntl.LOCK_SH)
# Python 3.2 raises IOError, Python3.3+ raises OSError
except (IOError, OSError):
if exclusive:
raise self.WriteLockFailed(self.path)
else:
raise self.ReadLockFailed(self.path)
self.is_exclusive = exclusive
def upgrade(self):
try:
fcntl.lockf(self.fd, fcntl.LOCK_EX)
# Python 3.2 raises IOError, Python3.3+ raises OSError
except (IOError, OSError):
raise self.WriteLockFailed(self.path)
self.is_exclusive = True
def release(self):
fcntl.lockf(self.fd, fcntl.LOCK_UN)
self.fd.close()
def check_extension_modules():
from . import platform
if (hashindex.API_VERSION != 2 or
chunker.API_VERSION != 2 or
crypto.API_VERSION != 2 or
platform.API_VERSION != 2):
if hashindex.API_VERSION != 2:
raise ExtensionModuleError
if chunker.API_VERSION != 2:
raise ExtensionModuleError
if crypto.API_VERSION != 2:
raise ExtensionModuleError
if platform.API_VERSION != 2:
raise ExtensionModuleError
@ -318,9 +281,45 @@ def timestamp(s):
def ChunkerParams(s):
window_size, chunk_mask, chunk_min, chunk_max = s.split(',')
if int(chunk_max) > 23:
# do not go beyond 2**23 (8MB) chunk size now,
# COMPR_BUFFER can only cope with up to this size
raise ValueError
return int(window_size), int(chunk_mask), int(chunk_min), int(chunk_max)
def CompressionSpec(s):
values = s.split(',')
count = len(values)
if count < 1:
raise ValueError
compression = values[0]
try:
compression = int(compression)
if count > 1:
raise ValueError
# DEPRECATED: it is just --compression N
if 0 <= compression <= 9:
return dict(name='zlib', level=compression)
raise ValueError
except ValueError:
# --compression algo[,...]
name = compression
if name in ('none', 'lz4', ):
return dict(name=name)
if name in ('zlib', 'lzma', ):
if count < 2:
level = 6 # default compression level in py stdlib
elif count == 2:
level = int(values[1])
if not 0 <= level <= 9:
raise ValueError
else:
raise ValueError
return dict(name=name, level=level)
raise ValueError
def is_cachedir(path):
"""Determines whether the specified path is a cache directory (and
therefore should potentially be excluded from the backup) according to
@ -532,9 +531,9 @@ class Location:
else:
path = self.path
return 'ssh://{}{}{}{}'.format('{}@'.format(self.user) if self.user else '',
self.host,
':{}'.format(self.port) if self.port else '',
path)
self.host,
':{}'.format(self.port) if self.port else '',
path)
def location_validator(archive=None):
@ -609,7 +608,7 @@ def daemonize():
class StableDict(dict):
"""A dict subclass with stable items() ordering"""
def items(self):
return sorted(super(StableDict, self).items())
return sorted(super().items())
if sys.version < '3.3':

View file

@ -1,13 +1,14 @@
from binascii import hexlify, a2b_base64, b2a_base64
from getpass import getpass
import configparser
import getpass
import os
import msgpack
import textwrap
import hmac
from hashlib import sha256
import zlib
from .crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks
from .compress import Compressor, COMPR_BUFFER
from .helpers import IntegrityError, get_keys_dir, Error
PREFIX = b'\0' * 8
@ -17,11 +18,17 @@ class UnsupportedPayloadError(Error):
"""Unsupported payload type {}. A newer version is required to access this repository.
"""
class KeyfileNotFoundError(Error):
"""No key file for repository {} found in {}.
"""
class RepoKeyNotFoundError(Error):
"""No key entry found in the config of repository {}.
"""
class HMAC(hmac.HMAC):
"""Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews
"""
@ -32,28 +39,36 @@ class HMAC(hmac.HMAC):
def key_creator(repository, args):
if args.encryption == 'keyfile':
return KeyfileKey.create(repository, args)
elif args.encryption == 'passphrase':
elif args.encryption == 'repokey':
return RepoKey.create(repository, args)
elif args.encryption == 'passphrase': # deprecated, kill in 1.x
return PassphraseKey.create(repository, args)
else:
return PlaintextKey.create(repository, args)
def key_factory(repository, manifest_data):
if manifest_data[0] == KeyfileKey.TYPE:
key_type = manifest_data[0]
if key_type == KeyfileKey.TYPE:
return KeyfileKey.detect(repository, manifest_data)
elif manifest_data[0] == PassphraseKey.TYPE:
elif key_type == RepoKey.TYPE:
return RepoKey.detect(repository, manifest_data)
elif key_type == PassphraseKey.TYPE: # deprecated, kill in 1.x
return PassphraseKey.detect(repository, manifest_data)
elif manifest_data[0] == PlaintextKey.TYPE:
elif key_type == PlaintextKey.TYPE:
return PlaintextKey.detect(repository, manifest_data)
else:
raise UnsupportedPayloadError(manifest_data[0])
raise UnsupportedPayloadError(key_type)
class KeyBase:
TYPE = None # override in subclasses
def __init__(self):
def __init__(self, repository):
self.TYPE_STR = bytes([self.TYPE])
self.compression_level = 0
self.repository = repository
self.target = None # key location file path / repo obj
self.compressor = Compressor('none', buffer=COMPR_BUFFER)
def id_hash(self, data):
"""Return HMAC hash using the "id" HMAC key
@ -73,23 +88,23 @@ class PlaintextKey(KeyBase):
@classmethod
def create(cls, repository, args):
print('Encryption NOT enabled.\nUse the "--encryption=passphrase|keyfile" to enable encryption.')
return cls()
print('Encryption NOT enabled.\nUse the "--encryption=repokey|keyfile|passphrase" to enable encryption.')
return cls(repository)
@classmethod
def detect(cls, repository, manifest_data):
return cls()
return cls(repository)
def id_hash(self, data):
return sha256(data).digest()
def encrypt(self, data):
return b''.join([self.TYPE_STR, zlib.compress(data, self.compression_level)])
return b''.join([self.TYPE_STR, self.compressor.compress(data)])
def decrypt(self, id, data):
if data[0] != self.TYPE:
raise IntegrityError('Invalid encryption envelope')
data = zlib.decompress(memoryview(data)[1:])
data = self.compressor.decompress(memoryview(data)[1:])
if id and sha256(data).digest() != id:
raise IntegrityError('Chunk id verification failed')
return data
@ -116,7 +131,7 @@ class AESKeyBase(KeyBase):
return HMAC(self.id_key, data, sha256).digest()
def encrypt(self, data):
data = zlib.compress(data, self.compression_level)
data = self.compressor.compress(data)
self.enc_cipher.reset()
data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
hmac = HMAC(self.enc_hmac_key, data, sha256).digest()
@ -129,7 +144,7 @@ class AESKeyBase(KeyBase):
if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac:
raise IntegrityError('Encryption envelope checksum mismatch')
self.dec_cipher.reset(iv=PREFIX + data[33:41])
data = zlib.decompress(self.dec_cipher.decrypt(data[41:])) # should use memoryview
data = self.compressor.decompress(self.dec_cipher.decrypt(data[41:]))
if id and HMAC(self.id_key, data, sha256).digest() != id:
raise IntegrityError('Chunk id verification failed')
return data
@ -154,38 +169,65 @@ class AESKeyBase(KeyBase):
self.dec_cipher = AES(is_encrypt=False, key=self.enc_key)
class Passphrase(str):
@classmethod
def env_passphrase(cls, default=None):
passphrase = os.environ.get('BORG_PASSPHRASE', default)
if passphrase is not None:
return cls(passphrase)
@classmethod
def getpass(cls, prompt):
return cls(getpass.getpass(prompt))
@classmethod
def new(cls, allow_empty=False):
passphrase = cls.env_passphrase()
if passphrase is not None:
return passphrase
while True:
passphrase = cls.getpass('Enter new passphrase: ')
if allow_empty or passphrase:
passphrase2 = cls.getpass('Enter same passphrase again: ')
if passphrase == passphrase2:
print('Remember your passphrase. Your data will be inaccessible without it.')
return passphrase
else:
print('Passphrases do not match')
else:
print('Passphrase must not be blank')
def __repr__(self):
return '<Passphrase "***hidden***">'
def kdf(self, salt, iterations, length):
return pbkdf2_sha256(self.encode('utf-8'), salt, iterations, length)
class PassphraseKey(AESKeyBase):
# This mode is DEPRECATED and will be killed at 1.0 release.
# With this mode:
# - you can never ever change your passphrase for existing repos.
# - you can never ever use a different iterations count for existing repos.
TYPE = 0x01
iterations = 100000
iterations = 100000 # must not be changed ever!
@classmethod
def create(cls, repository, args):
key = cls()
passphrase = os.environ.get('BORG_PASSPHRASE')
if passphrase is not None:
passphrase2 = passphrase
else:
passphrase, passphrase2 = 1, 2
while passphrase != passphrase2:
passphrase = getpass('Enter passphrase: ')
if not passphrase:
print('Passphrase must not be blank')
continue
passphrase2 = getpass('Enter same passphrase again: ')
if passphrase != passphrase2:
print('Passphrases do not match')
key = cls(repository)
print('WARNING: "passphrase" mode is deprecated and will be removed in 1.0.')
print('If you want something similar (but with less issues), use "repokey" mode.')
passphrase = Passphrase.new(allow_empty=False)
key.init(repository, passphrase)
if passphrase:
print('Remember your passphrase. Your data will be inaccessible without it.')
return key
@classmethod
def detect(cls, repository, manifest_data):
prompt = 'Enter passphrase for %s: ' % repository._location.orig
key = cls()
passphrase = os.environ.get('BORG_PASSPHRASE')
key = cls(repository)
passphrase = Passphrase.env_passphrase()
if passphrase is None:
passphrase = getpass(prompt)
passphrase = Passphrase.getpass(prompt)
while True:
key.init(repository, passphrase)
try:
@ -194,7 +236,7 @@ class PassphraseKey(AESKeyBase):
key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks))
return key
except IntegrityError:
passphrase = getpass(prompt)
passphrase = Passphrase.getpass(prompt)
def change_passphrase(self):
class ImmutablePassphraseError(Error):
@ -203,42 +245,31 @@ class PassphraseKey(AESKeyBase):
raise ImmutablePassphraseError
def init(self, repository, passphrase):
self.init_from_random_data(pbkdf2_sha256(passphrase.encode('utf-8'), repository.id, self.iterations, 100))
self.init_from_random_data(passphrase.kdf(repository.id, self.iterations, 100))
self.init_ciphers()
class KeyfileKey(AESKeyBase):
FILE_ID = 'BORG_KEY'
TYPE = 0x00
class KeyfileKeyBase(AESKeyBase):
@classmethod
def detect(cls, repository, manifest_data):
key = cls()
path = cls.find_key_file(repository)
prompt = 'Enter passphrase for key file %s: ' % path
passphrase = os.environ.get('BORG_PASSPHRASE', '')
while not key.load(path, passphrase):
passphrase = getpass(prompt)
key = cls(repository)
target = key.find_key()
prompt = 'Enter passphrase for key %s: ' % target
passphrase = Passphrase.env_passphrase(default='')
while not key.load(target, passphrase):
passphrase = Passphrase.getpass(prompt)
num_blocks = num_aes_blocks(len(manifest_data) - 41)
key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks))
return key
@classmethod
def find_key_file(cls, repository):
id = hexlify(repository.id).decode('ascii')
keys_dir = get_keys_dir()
for name in os.listdir(keys_dir):
filename = os.path.join(keys_dir, name)
with open(filename, 'r') as fd:
line = fd.readline().strip()
if (line and line.startswith(cls.FILE_ID) and
line[len(cls.FILE_ID)+1:] == id):
return filename
raise KeyfileNotFoundError(repository._location.canonical_path(), get_keys_dir())
def find_key(self):
raise NotImplementedError
def load(self, filename, passphrase):
with open(filename, 'r') as fd:
cdata = a2b_base64(''.join(fd.readlines()[1:]).encode('ascii')) # .encode needed for Python 3.[0-2]
def load(self, target, passphrase):
raise NotImplementedError
def _load(self, key_data, passphrase):
cdata = a2b_base64(key_data.encode('ascii')) # .encode needed for Python 3.[0-2]
data = self.decrypt_key_file(cdata, passphrase)
if data:
key = msgpack.unpackb(data)
@ -249,23 +280,22 @@ class KeyfileKey(AESKeyBase):
self.enc_hmac_key = key[b'enc_hmac_key']
self.id_key = key[b'id_key']
self.chunk_seed = key[b'chunk_seed']
self.path = filename
return True
return False
def decrypt_key_file(self, data, passphrase):
d = msgpack.unpackb(data)
assert d[b'version'] == 1
assert d[b'algorithm'] == b'sha256'
key = pbkdf2_sha256(passphrase.encode('utf-8'), d[b'salt'], d[b'iterations'], 32)
key = passphrase.kdf(d[b'salt'], d[b'iterations'], 32)
data = AES(is_encrypt=False, key=key).decrypt(d[b'data'])
if HMAC(key, data, sha256).digest() != d[b'hash']:
return None
return data
if HMAC(key, data, sha256).digest() == d[b'hash']:
return data
def encrypt_key_file(self, data, passphrase):
salt = get_random_bytes(32)
iterations = 100000
key = pbkdf2_sha256(passphrase.encode('utf-8'), salt, iterations, 32)
key = passphrase.kdf(salt, iterations, 32)
hash = HMAC(key, data, sha256).digest()
cdata = AES(is_encrypt=True, key=key).encrypt(data)
d = {
@ -278,7 +308,7 @@ class KeyfileKey(AESKeyBase):
}
return msgpack.packb(d)
def save(self, path, passphrase):
def _save(self, passphrase):
key = {
'version': 1,
'repository_id': self.repository_id,
@ -288,45 +318,101 @@ class KeyfileKey(AESKeyBase):
'chunk_seed': self.chunk_seed,
}
data = self.encrypt_key_file(msgpack.packb(key), passphrase)
with open(path, 'w') as fd:
fd.write('%s %s\n' % (self.FILE_ID, hexlify(self.repository_id).decode('ascii')))
fd.write('\n'.join(textwrap.wrap(b2a_base64(data).decode('ascii'))))
fd.write('\n')
self.path = path
key_data = '\n'.join(textwrap.wrap(b2a_base64(data).decode('ascii')))
return key_data
def change_passphrase(self):
passphrase, passphrase2 = 1, 2
while passphrase != passphrase2:
passphrase = getpass('New passphrase: ')
passphrase2 = getpass('Enter same passphrase again: ')
if passphrase != passphrase2:
print('Passphrases do not match')
self.save(self.path, passphrase)
print('Key file "%s" updated' % self.path)
passphrase = Passphrase.new(allow_empty=True)
self.save(self.target, passphrase)
print('Key updated')
@classmethod
def create(cls, repository, args):
passphrase = Passphrase.new(allow_empty=True)
key = cls(repository)
key.repository_id = repository.id
key.init_from_random_data(get_random_bytes(100))
key.init_ciphers()
target = key.get_new_target(args)
key.save(target, passphrase)
print('Key in "%s" created.' % target)
print('Keep this key safe. Your data will be inaccessible without it.')
return key
def save(self, target, passphrase):
raise NotImplementedError
def get_new_target(self, args):
raise NotImplementedError
class KeyfileKey(KeyfileKeyBase):
TYPE = 0x00
FILE_ID = 'BORG_KEY'
def find_key(self):
id = hexlify(self.repository.id).decode('ascii')
keys_dir = get_keys_dir()
for name in os.listdir(keys_dir):
filename = os.path.join(keys_dir, name)
with open(filename, 'r') as fd:
line = fd.readline().strip()
if line.startswith(self.FILE_ID) and line[len(self.FILE_ID)+1:] == id:
return filename
raise KeyfileNotFoundError(self.repository._location.canonical_path(), get_keys_dir())
def get_new_target(self, args):
filename = args.repository.to_key_filename()
path = filename
i = 1
while os.path.exists(path):
i += 1
path = filename + '.%d' % i
passphrase = os.environ.get('BORG_PASSPHRASE')
if passphrase is not None:
passphrase2 = passphrase
else:
passphrase, passphrase2 = 1, 2
while passphrase != passphrase2:
passphrase = getpass('Enter passphrase (empty for no passphrase):')
passphrase2 = getpass('Enter same passphrase again: ')
if passphrase != passphrase2:
print('Passphrases do not match')
key = cls()
key.repository_id = repository.id
key.init_from_random_data(get_random_bytes(100))
key.init_ciphers()
key.save(path, passphrase)
print('Key file "%s" created.' % key.path)
print('Keep this file safe. Your data will be inaccessible without it.')
return key
return path
def load(self, target, passphrase):
with open(target, 'r') as fd:
key_data = ''.join(fd.readlines()[1:])
success = self._load(key_data, passphrase)
if success:
self.target = target
return success
def save(self, target, passphrase):
key_data = self._save(passphrase)
with open(target, 'w') as fd:
fd.write('%s %s\n' % (self.FILE_ID, hexlify(self.repository_id).decode('ascii')))
fd.write(key_data)
fd.write('\n')
self.target = target
class RepoKey(KeyfileKeyBase):
TYPE = 0x03
def find_key(self):
loc = self.repository._location.canonical_path()
try:
self.repository.load_key()
return loc
except configparser.NoOptionError:
raise RepoKeyNotFoundError(loc)
def get_new_target(self, args):
return self.repository
def load(self, target, passphrase):
# what we get in target is just a repo location, but we already have the repo obj:
target = self.repository
key_data = target.load_key()
key_data = key_data.decode('utf-8') # remote repo: msgpack issue #99, getting bytes
success = self._load(key_data, passphrase)
if success:
self.target = target
return success
def save(self, target, passphrase):
key_data = self._save(passphrase)
key_data = key_data.encode('utf-8') # remote repo: msgpack issue #99, giving bytes
target.save_key(key_data)
self.target = target

286
borg/locking.py Normal file
View file

@ -0,0 +1,286 @@
import errno
import json
import os
import socket
import threading
import time
from borg.helpers import Error
ADD, REMOVE = 'add', 'remove'
SHARED, EXCLUSIVE = 'shared', 'exclusive'
def get_id():
"""Get identification tuple for 'us'"""
hostname = socket.gethostname()
pid = os.getpid()
tid = threading.current_thread().ident & 0xffffffff
return hostname, pid, tid
class TimeoutTimer:
"""
A timer for timeout checks (can also deal with no timeout, give timeout=None [default]).
It can also compute and optionally execute a reasonable sleep time (e.g. to avoid
polling too often or to support thread/process rescheduling).
"""
def __init__(self, timeout=None, sleep=None):
"""
Initialize a timer.
:param timeout: time out interval [s] or None (no timeout)
:param sleep: sleep interval [s] (>= 0: do sleep call, <0: don't call sleep)
or None (autocompute: use 10% of timeout, or 1s for no timeout)
"""
if timeout is not None and timeout < 0:
raise ValueError("timeout must be >= 0")
self.timeout_interval = timeout
if sleep is None:
if timeout is None:
sleep = 1.0
else:
sleep = timeout / 10.0
self.sleep_interval = sleep
self.start_time = None
self.end_time = None
def __repr__(self):
return "<%s: start=%r end=%r timeout=%r sleep=%r>" % (
self.__class__.__name__, self.start_time, self.end_time,
self.timeout_interval, self.sleep_interval)
def start(self):
self.start_time = time.time()
if self.timeout_interval is not None:
self.end_time = self.start_time + self.timeout_interval
return self
def sleep(self):
if self.sleep_interval >= 0:
time.sleep(self.sleep_interval)
def timed_out(self):
return self.end_time is not None and time.time() >= self.end_time
def timed_out_or_sleep(self):
if self.timed_out():
return True
else:
self.sleep()
return False
class ExclusiveLock:
"""An exclusive Lock based on mkdir fs operation being atomic"""
class LockError(Error):
"""Failed to acquire the lock {}."""
class LockTimeout(LockError):
"""Failed to create/acquire the lock {} (timeout)."""
class LockFailed(LockError):
"""Failed to create/acquire the lock {} ({})."""
class UnlockError(Error):
"""Failed to release the lock {}."""
class NotLocked(UnlockError):
"""Failed to release the lock {} (was not locked)."""
class NotMyLock(UnlockError):
"""Failed to release the lock {} (was/is locked, but not by me)."""
def __init__(self, path, timeout=None, sleep=None, id=None):
self.timeout = timeout
self.sleep = sleep
self.path = os.path.abspath(path)
self.id = id or get_id()
self.unique_name = os.path.join(self.path, "%s.%d-%x" % self.id)
def __enter__(self):
return self.acquire()
def __exit__(self, *exc):
self.release()
def __repr__(self):
return "<%s: %r>" % (self.__class__.__name__, self.unique_name)
def acquire(self, timeout=None, sleep=None):
if timeout is None:
timeout = self.timeout
if sleep is None:
sleep = self.sleep
timer = TimeoutTimer(timeout, sleep).start()
while True:
try:
os.mkdir(self.path)
except OSError as err:
if err.errno == errno.EEXIST: # already locked
if self.by_me():
return self
if timer.timed_out_or_sleep():
raise self.LockTimeout(self.path)
else:
raise self.LockFailed(self.path, str(err))
else:
with open(self.unique_name, "wb"):
pass
return self
def release(self):
if not self.is_locked():
raise self.NotLocked(self.path)
if not self.by_me():
raise self.NotMyLock(self.path)
os.unlink(self.unique_name)
os.rmdir(self.path)
def is_locked(self):
return os.path.exists(self.path)
def by_me(self):
return os.path.exists(self.unique_name)
def break_lock(self):
if self.is_locked():
for name in os.listdir(self.path):
os.unlink(os.path.join(self.path, name))
os.rmdir(self.path)
class LockRoster:
"""
A Lock Roster to track shared/exclusive lockers.
Note: you usually should call the methods with an exclusive lock held,
to avoid conflicting access by multiple threads/processes/machines.
"""
def __init__(self, path, id=None):
self.path = path
self.id = id or get_id()
def load(self):
try:
with open(self.path) as f:
data = json.load(f)
except IOError as err:
if err.errno != errno.ENOENT:
raise
data = {}
return data
def save(self, data):
with open(self.path, "w") as f:
json.dump(data, f)
def remove(self):
os.unlink(self.path)
def get(self, key):
roster = self.load()
return set(tuple(e) for e in roster.get(key, []))
def modify(self, key, op):
roster = self.load()
try:
elements = set(tuple(e) for e in roster[key])
except KeyError:
elements = set()
if op == ADD:
elements.add(self.id)
elif op == REMOVE:
elements.remove(self.id)
else:
raise ValueError('Unknown LockRoster op %r' % op)
roster[key] = list(list(e) for e in elements)
self.save(roster)
class UpgradableLock:
"""
A Lock for a resource that can be accessed in a shared or exclusive way.
Typically, write access to a resource needs an exclusive lock (1 writer,
noone is allowed reading) and read access to a resource needs a shared
lock (multiple readers are allowed).
"""
class SharedLockFailed(Error):
"""Failed to acquire shared lock [{}]"""
class ExclusiveLockFailed(Error):
"""Failed to acquire write lock [{}]"""
def __init__(self, path, exclusive=False, sleep=None, id=None):
self.path = path
self.is_exclusive = exclusive
self.sleep = sleep
self.id = id or get_id()
# globally keeping track of shared and exclusive lockers:
self._roster = LockRoster(path + '.roster', id=id)
# an exclusive lock, used for:
# - holding while doing roster queries / updates
# - holding while the UpgradableLock itself is exclusive
self._lock = ExclusiveLock(path + '.exclusive', id=id)
def __enter__(self):
return self.acquire()
def __exit__(self, *exc):
self.release()
def __repr__(self):
return "<%s: %r>" % (self.__class__.__name__, self.id)
def acquire(self, exclusive=None, remove=None, sleep=None):
if exclusive is None:
exclusive = self.is_exclusive
sleep = sleep or self.sleep or 0.2
try:
if exclusive:
self._wait_for_readers_finishing(remove, sleep)
self._roster.modify(EXCLUSIVE, ADD)
else:
with self._lock:
if remove is not None:
self._roster.modify(remove, REMOVE)
self._roster.modify(SHARED, ADD)
self.is_exclusive = exclusive
return self
except ExclusiveLock.LockError as err:
msg = str(err)
if exclusive:
raise self.ExclusiveLockFailed(msg)
else:
raise self.SharedLockFailed(msg)
def _wait_for_readers_finishing(self, remove, sleep):
while True:
self._lock.acquire()
if remove is not None:
self._roster.modify(remove, REMOVE)
remove = None
if len(self._roster.get(SHARED)) == 0:
return # we are the only one and we keep the lock!
self._lock.release()
time.sleep(sleep)
def release(self):
if self.is_exclusive:
self._roster.modify(EXCLUSIVE, REMOVE)
self._lock.release()
else:
with self._lock:
self._roster.modify(SHARED, REMOVE)
def upgrade(self):
if not self.is_exclusive:
self.acquire(exclusive=True, remove=SHARED)
def downgrade(self):
if self.is_exclusive:
self.acquire(exclusive=False, remove=EXCLUSIVE)
def break_lock(self):
self._roster.remove()
self._lock.break_lock()

View file

@ -1,42 +1,41 @@
class LRUCache(dict):
def __init__(self, capacity):
super(LRUCache, self).__init__()
class LRUCache:
def __init__(self, capacity, dispose):
self._cache = {}
self._lru = []
self._capacity = capacity
self._dispose = dispose
def __setitem__(self, key, value):
try:
self._lru.remove(key)
except ValueError:
pass
assert key not in self._cache, (
"Unexpected attempt to replace a cached item,"
" without first deleting the old item.")
self._lru.append(key)
while len(self._lru) > self._capacity:
del self[self._lru[0]]
return super(LRUCache, self).__setitem__(key, value)
self._cache[key] = value
def __getitem__(self, key):
try:
self._lru.remove(key)
self._lru.append(key)
except ValueError:
pass
return super(LRUCache, self).__getitem__(key)
value = self._cache[key] # raise KeyError if not found
self._lru.remove(key)
self._lru.append(key)
return value
def __delitem__(self, key):
try:
self._lru.remove(key)
except ValueError:
pass
return super(LRUCache, self).__delitem__(key)
value = self._cache.pop(key) # raise KeyError if not found
self._dispose(value)
self._lru.remove(key)
def pop(self, key, default=None):
try:
self._lru.remove(key)
except ValueError:
pass
return super(LRUCache, self).pop(key, default)
def __contains__(self, key):
return key in self._cache
def _not_implemented(self, *args, **kw):
raise NotImplementedError
popitem = setdefault = update = _not_implemented
def clear(self):
for value in self._cache.values():
self._dispose(value)
self._cache.clear()
# useful for testing
def items(self):
return self._cache.items()
def __len__(self):
return len(self._cache)

View file

@ -3,7 +3,6 @@ import fcntl
import msgpack
import os
import select
import shutil
from subprocess import Popen, PIPE
import sys
import tempfile
@ -11,7 +10,6 @@ import traceback
from . import __version__
from .hashindex import NSIndex
from .helpers import Error, IntegrityError
from .repository import Repository
@ -25,24 +23,28 @@ class ConnectionClosed(Error):
class PathNotAllowed(Error):
"""Repository path not allowed"""
class InvalidRPCMethod(Error):
"""RPC method is not valid"""
class RepositoryServer:
class RepositoryServer: # pragma: no cover
rpc_methods = (
'__len__',
'check',
'commit',
'delete',
'get',
'list',
'negotiate',
'open',
'put',
'repair',
'rollback',
)
'__len__',
'check',
'commit',
'delete',
'destroy',
'get',
'list',
'negotiate',
'open',
'put',
'repair',
'rollback',
'save_key',
'load_key',
)
def __init__(self, restrict_to_paths):
self.repository = None
@ -71,7 +73,7 @@ class RepositoryServer:
type, msgid, method, args = unpacked
method = method.decode('ascii')
try:
if not method in self.rpc_methods:
if method not in self.rpc_methods:
raise InvalidRPCMethod(method)
try:
f = getattr(self, method)
@ -106,9 +108,10 @@ class RepositoryServer:
class RemoteRepository:
extra_test_args = []
remote_path = None
umask = None
class RPCError(Exception):
def __init__(self, name):
self.name = name
@ -122,9 +125,11 @@ class RemoteRepository:
self.responses = {}
self.unpacker = msgpack.Unpacker(use_list=False)
self.p = None
# use local umask also for the remote process
umask = ['--umask', '%03o' % self.umask]
if location.host == '__testsuite__':
args = [sys.executable, '-m', 'borg.archiver', 'serve'] + self.extra_test_args
else:
args = [sys.executable, '-m', 'borg.archiver', 'serve'] + umask + self.extra_test_args
else: # pragma: no cover
args = ['ssh']
if location.port:
args += ['-p', str(location.port)]
@ -132,7 +137,7 @@ class RemoteRepository:
args.append('%s@%s' % (location.user, location.host))
else:
args.append('%s' % location.host)
args += ['borg', 'serve']
args += [self.remote_path, 'serve'] + umask
self.p = Popen(args, bufsize=0, stdin=PIPE, stdout=PIPE)
self.stdin_fd = self.p.stdin.fileno()
self.stdout_fd = self.p.stdout.fileno()
@ -152,6 +157,9 @@ class RemoteRepository:
def __del__(self):
self.close()
def __repr__(self):
return '<%s %s>' % (self.__class__.__name__, self.location.canonical_path())
def call(self, cmd, *args, **kw):
for resp in self.call_many(cmd, [args], **kw):
return resp
@ -199,7 +207,7 @@ class RemoteRepository:
break
r, w, x = select.select(self.r_fds, w_fds, self.x_fds, 1)
if x:
raise Exception('FD exception occured')
raise Exception('FD exception occurred')
if r:
data = os.read(self.stdout_fd, BUFSIZE)
if not data:
@ -277,6 +285,12 @@ class RemoteRepository:
def delete(self, id_, wait=True):
return self.call('delete', id_, wait=wait)
def save_key(self, keydata):
return self.call('save_key', keydata)
def load_key(self):
return self.call('load_key')
def close(self):
if self.p:
self.p.stdin.close()
@ -291,56 +305,29 @@ class RemoteRepository:
class RepositoryCache:
"""A caching Repository wrapper
Caches Repository GET operations using a temporary file
Caches Repository GET operations using a local temporary Repository.
"""
def __init__(self, repository):
self.tmppath = None
self.index = None
self.data_fd = None
self.repository = repository
self.entries = {}
self.initialize()
tmppath = tempfile.mkdtemp(prefix='borg-tmp')
self.caching_repo = Repository(tmppath, create=True, exclusive=True)
def __del__(self):
self.cleanup()
def initialize(self):
self.tmppath = tempfile.mkdtemp()
self.index = NSIndex()
self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b')
def cleanup(self):
del self.index
if self.data_fd:
self.data_fd.close()
if self.tmppath:
shutil.rmtree(self.tmppath)
def load_object(self, offset, size):
self.data_fd.seek(offset)
data = self.data_fd.read(size)
assert len(data) == size
return data
def store_object(self, key, data):
self.data_fd.seek(0, os.SEEK_END)
self.data_fd.write(data)
offset = self.data_fd.tell()
self.index[key] = offset - len(data), len(data)
self.caching_repo.destroy()
def get(self, key):
return next(self.get_many([key]))
def get_many(self, keys):
unknown_keys = [key for key in keys if key not in self.index]
unknown_keys = [key for key in keys if key not in self.caching_repo]
repository_iterator = zip(unknown_keys, self.repository.get_many(unknown_keys))
for key in keys:
try:
yield self.load_object(*self.index[key])
except KeyError:
yield self.caching_repo.get(key)
except Repository.ObjectNotFound:
for key_, data in repository_iterator:
if key_ == key:
self.store_object(key, data)
self.caching_repo.put(key, data)
yield data
break
# Consume any pending requests

45
borg/repository.py Normal file → Executable file
View file

@ -9,7 +9,8 @@ import sys
from zlib import crc32
from .hashindex import NSIndex
from .helpers import Error, IntegrityError, read_msgpack, write_msgpack, unhexlify, UpgradableLock
from .helpers import Error, IntegrityError, read_msgpack, write_msgpack, unhexlify
from .locking import UpgradableLock
from .lrucache import LRUCache
MAX_OBJECT_SIZE = 20 * 1024 * 1024
@ -61,6 +62,9 @@ class Repository:
def __del__(self):
self.close()
def __repr__(self):
return '<%s %s>' % (self.__class__.__name__, self.path)
def create(self, path):
"""Create a new empty repository at `path`
"""
@ -77,9 +81,23 @@ class Repository:
config.set('repository', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR)
config.set('repository', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE)
config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii'))
with open(os.path.join(path, 'config'), 'w') as fd:
self.save_config(path, config)
def save_config(self, path, config):
config_path = os.path.join(path, 'config')
with open(config_path, 'w') as fd:
config.write(fd)
def save_key(self, keydata):
assert self.config
keydata = keydata.decode('utf-8') # remote repo: msgpack issue #99, getting bytes
self.config.set('repository', 'key', keydata)
self.save_config(self.path, self.config)
def load_key(self):
keydata = self.config.get('repository', 'key')
return keydata.encode('utf-8') # remote repo: msgpack issue #99, returning bytes
def destroy(self):
"""Destroy the repository at `self.path`
"""
@ -113,11 +131,11 @@ class Repository:
self.path = path
if not os.path.isdir(path):
raise self.DoesNotExist(path)
self.lock = UpgradableLock(os.path.join(path, 'lock'), exclusive).acquire()
self.config = RawConfigParser()
self.config.read(os.path.join(self.path, 'config'))
if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1:
raise self.InvalidRepository(path)
self.lock = UpgradableLock(os.path.join(path, 'config'), exclusive)
self.max_segment_size = self.config.getint('repository', 'max_segment_size')
self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
self.id = unhexlify(self.config.get('repository', 'id').strip())
@ -148,7 +166,7 @@ class Repository:
self._active_txn = True
try:
self.lock.upgrade()
except UpgradableLock.WriteLockFailed:
except UpgradableLock.ExclusiveLockFailed:
# if upgrading the lock to exclusive fails, we do not have an
# active transaction. this is important for "serve" mode, where
# the repository instance lives on - even if exceptions happened.
@ -316,7 +334,6 @@ class Repository:
report_error('Adding commit tag to segment {}'.format(transaction_id))
self.io.segment = transaction_id + 1
self.io.write_commit()
self.io.close_segment()
if current_index and not repair:
if len(current_index) != len(self.index):
report_error('Index object count mismatch. {} != {}'.format(len(current_index), len(self.index)))
@ -341,6 +358,11 @@ class Repository:
self.index = self.open_index(self.get_transaction_id())
return len(self.index)
def __contains__(self, id):
if not self.index:
self.index = self.open_index(self.get_transaction_id())
return id in self.index
def list(self, limit=None, marker=None):
if not self.index:
self.index = self.open_index(self.get_transaction_id())
@ -390,7 +412,7 @@ class Repository:
self.segments.setdefault(segment, 0)
def preload(self, ids):
"""Preload objects (only applies to remote repositories
"""Preload objects (only applies to remote repositories)
"""
@ -410,7 +432,8 @@ class LoggedIO:
def __init__(self, path, limit, segments_per_dir, capacity=90):
self.path = path
self.fds = LRUCache(capacity)
self.fds = LRUCache(capacity,
dispose=lambda fd: fd.close())
self.segment = 0
self.limit = limit
self.segments_per_dir = segments_per_dir
@ -418,9 +441,8 @@ class LoggedIO:
self._write_fd = None
def close(self):
for segment in list(self.fds.keys()):
self.fds.pop(segment).close()
self.close_segment()
self.fds.clear()
self.fds = None # Just to make sure we're disabled
def segment_iterator(self, reverse=False):
@ -494,6 +516,8 @@ class LoggedIO:
return fd
def delete_segment(self, segment):
if segment in self.fds:
del self.fds[segment]
try:
os.unlink(self.segment_filename(segment))
except OSError:
@ -536,7 +560,8 @@ class LoggedIO:
header = fd.read(self.header_fmt.size)
def recover_segment(self, segment, filename):
self.fds.pop(segment).close()
if segment in self.fds:
del self.fds[segment]
# FIXME: save a copy of the original file
with open(filename, 'rb') as fd:
data = memoryview(fd.read())

View file

@ -73,7 +73,7 @@ class BaseTestCase(unittest.TestCase):
d1 = [filename] + [getattr(s1, a) for a in attrs]
d2 = [filename] + [getattr(s2, a) for a in attrs]
if not os.path.islink(path1) or utime_supports_fd:
# Older versions of llfuse does not support ns precision properly
# Older versions of llfuse do not support ns precision properly
if fuse and not have_fuse_mtime_ns:
d1.append(round(st_mtime_ns(s1), -4))
d2.append(round(st_mtime_ns(s2), -4))
@ -94,30 +94,3 @@ class BaseTestCase(unittest.TestCase):
return
time.sleep(.1)
raise Exception('wait_for_mount(%s) timeout' % path)
def get_tests(suite):
"""Generates a sequence of tests from a test suite
"""
for item in suite:
try:
# TODO: This could be "yield from..." with Python 3.3+
for i in get_tests(item):
yield i
except TypeError:
yield item
class TestLoader(unittest.TestLoader):
"""A customized test loader that properly detects and filters our test cases
"""
def loadTestsFromName(self, pattern, module=None):
suite = self.discover('borg.testsuite', '*.py')
tests = unittest.TestSuite()
for test in get_tests(suite):
if pattern.lower() in test.id().lower():
tests.addTest(test)
return tests

View file

@ -1,12 +1,12 @@
from datetime import datetime, timezone
import msgpack
from mock import Mock
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker
from ..key import PlaintextKey
from ..helpers import Manifest
from . import BaseTestCase
from .mock import Mock
class MockCache:
@ -23,7 +23,7 @@ class ArchiveTimestampTestCase(BaseTestCase):
def _test_timestamp_parsing(self, isoformat, expected):
repository = Mock()
key = PlaintextKey()
key = PlaintextKey(repository)
manifest = Manifest(repository, key)
a = Archive(repository, key, manifest, 'test', create=True)
a.close()
@ -46,7 +46,7 @@ class ChunkBufferTestCase(BaseTestCase):
def test(self):
data = [{b'foo': 1}, {b'bar': 2}]
cache = MockCache()
key = PlaintextKey()
key = PlaintextKey(None)
chunks = CacheChunkBuffer(cache, key, None)
for d in data:
chunks.add(d)

View file

@ -11,6 +11,9 @@ import time
import unittest
from hashlib import sha256
from mock import patch
import pytest
from .. import xattr
from ..archive import Archive, ChunkBuffer, CHUNK_MAX_EXP
from ..archiver import Archiver
@ -20,11 +23,10 @@ from ..helpers import Manifest
from ..remote import RemoteRepository, PathNotAllowed
from ..repository import Repository
from . import BaseTestCase
from .mock import patch
try:
import llfuse
has_llfuse = True
has_llfuse = True or llfuse # avoids "unused import"
except ImportError:
has_llfuse = False
@ -32,6 +34,12 @@ has_lchflags = hasattr(os, 'lchflags')
src_dir = os.path.join(os.getcwd(), os.path.dirname(__file__), '..')
# Python <= 3.2 raises OSError instead of PermissionError (See #164)
try:
PermissionError = PermissionError
except NameError:
PermissionError = OSError
class changedir:
def __init__(self, dir):
@ -57,7 +65,9 @@ class environment_variable:
def __exit__(self, *args, **kw):
for k, v in self.old_values.items():
if v is not None:
if v is None:
del os.environ[k]
else:
os.environ[k] = v
@ -83,13 +93,13 @@ class ArchiverTestCaseBase(BaseTestCase):
os.mkdir(self.keys_path)
os.mkdir(self.cache_path)
with open(self.exclude_file_path, 'wb') as fd:
fd.write(b'input/file2\n# A commment line, then a blank line\n\n')
fd.write(b'input/file2\n# A comment line, then a blank line\n\n')
self._old_wd = os.getcwd()
os.chdir(self.tmpdir)
def tearDown(self):
shutil.rmtree(self.tmpdir)
os.chdir(self._old_wd)
shutil.rmtree(self.tmpdir)
def cmd(self, *args, **kw):
exit_code = kw.get('exit_code', 0)
@ -143,7 +153,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.create_regular_file('empty', size=0)
# next code line raises OverflowError on 32bit cpu (raspberry pi 2):
# 2600-01-01 > 2**64 ns
#os.utime('input/empty', (19880895600, 19880895600))
# os.utime('input/empty', (19880895600, 19880895600))
# thus, we better test with something not that far in future:
# 2038-01-19 (1970 + 2^31 - 1 seconds) is the 32bit "deadline":
os.utime('input/empty', (2**31 - 1, 2**31 - 1))
@ -151,15 +161,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.create_regular_file('flagfile', size=1024)
# Directory
self.create_regular_file('dir2/file2', size=1024 * 80)
# File owner
os.chown('input/file1', 100, 200)
# File mode
os.chmod('input/file1', 0o7755)
os.chmod('input/dir2', 0o555)
# Block device
os.mknod('input/bdev', 0o600 | stat.S_IFBLK, os.makedev(10, 20))
# Char device
os.mknod('input/cdev', 0o600 | stat.S_IFCHR, os.makedev(30, 40))
# Hard link
os.link(os.path.join(self.input_path, 'file1'),
os.path.join(self.input_path, 'hardlink'))
@ -172,24 +175,59 @@ class ArchiverTestCase(ArchiverTestCaseBase):
# same for newer ubuntu and centos.
# if this is supported just on specific platform, platform should be checked first,
# so that the test setup for all tests using it does not fail here always for others.
#xattr.setxattr(os.path.join(self.input_path, 'link1'), 'user.foo_symlink', b'bar_symlink', follow_symlinks=False)
# xattr.setxattr(os.path.join(self.input_path, 'link1'), 'user.foo_symlink', b'bar_symlink', follow_symlinks=False)
# FIFO node
os.mkfifo(os.path.join(self.input_path, 'fifo1'))
if has_lchflags:
os.lchflags(os.path.join(self.input_path, 'flagfile'), stat.UF_NODUMP)
try:
# Block device
os.mknod('input/bdev', 0o600 | stat.S_IFBLK, os.makedev(10, 20))
# Char device
os.mknod('input/cdev', 0o600 | stat.S_IFCHR, os.makedev(30, 40))
# File mode
os.chmod('input/dir2', 0o555) # if we take away write perms, we need root to remove contents
# File owner
os.chown('input/file1', 100, 200)
have_root = True # we have (fake)root
except PermissionError:
have_root = False
return have_root
def test_basic_functionality(self):
self.create_test_files()
have_root = self.create_test_files()
self.cmd('init', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
self.cmd('create', self.repository_location + '::test.2', 'input')
self.cmd('create', '--stats', self.repository_location + '::test.2', 'input')
with changedir('output'):
self.cmd('extract', self.repository_location + '::test')
self.assert_equal(len(self.cmd('list', self.repository_location).splitlines()), 2)
self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), 11)
expected = [
'input',
'input/bdev',
'input/cdev',
'input/dir2',
'input/dir2/file2',
'input/empty',
'input/fifo1',
'input/file1',
'input/flagfile',
'input/hardlink',
'input/link1',
]
if not have_root:
# we could not create these device files without (fake)root
expected.remove('input/bdev')
expected.remove('input/cdev')
if has_lchflags:
# remove the file we did not backup, so input and output become equal
expected.remove('input/flagfile') # this file is UF_NODUMP
os.remove(os.path.join('input', 'flagfile'))
self.assert_equal(self.cmd('list', '--short', self.repository_location + '::test').splitlines(), expected)
self.assert_dirs_equal('input', 'output/input')
info_output = self.cmd('info', self.repository_location + '::test')
self.assert_in('Number of files: 4', info_output)
item_count = 3 if has_lchflags else 4 # one file is UF_NODUMP
self.assert_in('Number of files: %d' % item_count, info_output)
shutil.rmtree(self.cache_path)
with environment_variable(BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK='1'):
info_output2 = self.cmd('info', self.repository_location + '::test')
@ -243,6 +281,19 @@ class ArchiverTestCase(ArchiverTestCaseBase):
if sparse_support and hasattr(st, 'st_blocks'):
self.assert_true(st.st_blocks * 512 < total_len / 10) # is output sparse?
def test_unusual_filenames(self):
filenames = ['normal', 'with some blanks', '(with_parens)', ]
for filename in filenames:
filename = os.path.join(self.input_path, filename)
with open(filename, 'wb') as fd:
pass
self.cmd('init', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
for filename in filenames:
with changedir('output'):
self.cmd('extract', self.repository_location + '::test', os.path.join('input', filename))
assert os.path.exists(os.path.join('output', 'input', filename))
def test_repository_swap_detection(self):
self.create_test_files()
os.environ['BORG_PASSPHRASE'] = 'passphrase'
@ -253,7 +304,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd('init', '--encryption=none', self.repository_location)
self._set_repository_id(self.repository_path, repository_id)
self.assert_equal(repository_id, self._extract_repository_id(self.repository_path))
self.assert_raises(Cache.EncryptionMethodMismatch, lambda :self.cmd('create', self.repository_location + '::test.2', 'input'))
self.assert_raises(Cache.EncryptionMethodMismatch, lambda: self.cmd('create', self.repository_location + '::test.2', 'input'))
def test_repository_swap_detection2(self):
self.create_test_files()
@ -263,7 +314,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd('create', self.repository_location + '_encrypted::test', 'input')
shutil.rmtree(self.repository_path + '_encrypted')
os.rename(self.repository_path + '_unencrypted', self.repository_path + '_encrypted')
self.assert_raises(Cache.RepositoryAccessAborted, lambda :self.cmd('create', self.repository_location + '_encrypted::test.2', 'input'))
self.assert_raises(Cache.RepositoryAccessAborted, lambda: self.cmd('create', self.repository_location + '_encrypted::test.2', 'input'))
def test_strip_components(self):
self.cmd('init', self.repository_location)
@ -389,11 +440,21 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.cmd('extract', '--dry-run', self.repository_location + '::test.2')
self.cmd('delete', self.repository_location + '::test')
self.cmd('extract', '--dry-run', self.repository_location + '::test.2')
self.cmd('delete', self.repository_location + '::test.2')
self.cmd('delete', '--stats', self.repository_location + '::test.2')
# Make sure all data except the manifest has been deleted
repository = Repository(self.repository_path)
self.assert_equal(len(repository), 1)
def test_delete_repo(self):
self.create_regular_file('file1', size=1024 * 80)
self.create_regular_file('dir2/file2', size=1024 * 80)
self.cmd('init', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
self.cmd('create', self.repository_location + '::test.2', 'input')
self.cmd('delete', self.repository_location)
# Make sure the repo is gone
self.assertFalse(os.path.exists(self.repository_path))
def test_corrupted_repository(self):
self.cmd('init', self.repository_location)
self.create_src_archive('test')
@ -405,6 +466,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
fd.write(b'XXXX')
self.cmd('check', self.repository_location, exit_code=1)
# we currently need to be able to create a lock directory inside the repo:
@pytest.mark.xfail(reason="we need to be able to create the lock directory inside the repo")
def test_readonly_repository(self):
self.cmd('init', self.repository_location)
self.create_src_archive('test')
@ -415,6 +478,13 @@ class ArchiverTestCase(ArchiverTestCaseBase):
# Restore permissions so shutil.rmtree is able to delete it
os.system('chmod -R u+w ' + self.repository_path)
def test_umask(self):
self.create_regular_file('file1', size=1024 * 80)
self.cmd('init', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
mode = os.stat(self.repository_path).st_mode
self.assertEqual(stat.S_IMODE(mode), 0o700)
def test_cmdline_compatibility(self):
self.create_regular_file('file1', size=1024 * 80)
self.cmd('init', self.repository_location)
@ -439,10 +509,38 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.assert_not_in('test1', output)
self.assert_in('test2', output)
def test_prune_repository_prefix(self):
self.cmd('init', self.repository_location)
self.cmd('create', self.repository_location + '::foo-2015-08-12-10:00', src_dir)
self.cmd('create', self.repository_location + '::foo-2015-08-12-20:00', src_dir)
self.cmd('create', self.repository_location + '::bar-2015-08-12-10:00', src_dir)
self.cmd('create', self.repository_location + '::bar-2015-08-12-20:00', src_dir)
output = self.cmd('prune', '-v', '--dry-run', self.repository_location, '--keep-daily=2', '--prefix=foo-')
self.assert_in('Keeping archive: foo-2015-08-12-20:00', output)
self.assert_in('Would prune: foo-2015-08-12-10:00', output)
output = self.cmd('list', self.repository_location)
self.assert_in('foo-2015-08-12-10:00', output)
self.assert_in('foo-2015-08-12-20:00', output)
self.assert_in('bar-2015-08-12-10:00', output)
self.assert_in('bar-2015-08-12-20:00', output)
self.cmd('prune', self.repository_location, '--keep-daily=2', '--prefix=foo-')
output = self.cmd('list', self.repository_location)
self.assert_not_in('foo-2015-08-12-10:00', output)
self.assert_in('foo-2015-08-12-20:00', output)
self.assert_in('bar-2015-08-12-10:00', output)
self.assert_in('bar-2015-08-12-20:00', output)
def test_usage(self):
self.assert_raises(SystemExit, lambda: self.cmd())
self.assert_raises(SystemExit, lambda: self.cmd('-h'))
def test_help(self):
assert 'Borg' in self.cmd('help')
assert 'patterns' in self.cmd('help', 'patterns')
assert 'Initialize' in self.cmd('help', 'init')
assert 'positional arguments' not in self.cmd('help', 'init', '--epilog-only')
assert 'This command initializes' not in self.cmd('help', 'init', '--usage-only')
@unittest.skipUnless(has_llfuse, 'llfuse not installed')
def test_fuse_mount_repository(self):
mountpoint = os.path.join(self.tmpdir, 'mountpoint')
@ -524,7 +622,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
class ArchiverCheckTestCase(ArchiverTestCaseBase):
def setUp(self):
super(ArchiverCheckTestCase, self).setUp()
super().setUp()
with patch.object(ChunkBuffer, 'BUFFER_SIZE', 10):
self.cmd('init', self.repository_location)
self.create_src_archive('archive1')

102
borg/testsuite/compress.py Normal file
View file

@ -0,0 +1,102 @@
import zlib
try:
import lzma
except ImportError:
lzma = None
import pytest
from ..compress import get_compressor, Compressor, CNONE, ZLIB, LZ4
buffer = bytes(2**16)
data = b'fooooooooobaaaaaaaar' * 10
params = dict(name='zlib', level=6, buffer=buffer)
def test_get_compressor():
c = get_compressor(name='none')
assert isinstance(c, CNONE)
c = get_compressor(name='lz4', buffer=buffer)
assert isinstance(c, LZ4)
c = get_compressor(name='zlib')
assert isinstance(c, ZLIB)
with pytest.raises(KeyError):
get_compressor(name='foobar')
def test_cnull():
c = get_compressor(name='none')
cdata = c.compress(data)
assert len(cdata) > len(data)
assert data in cdata # it's not compressed and just in there 1:1
assert data == c.decompress(cdata)
assert data == Compressor(**params).decompress(cdata) # autodetect
def test_lz4():
c = get_compressor(name='lz4', buffer=buffer)
cdata = c.compress(data)
assert len(cdata) < len(data)
assert data == c.decompress(cdata)
assert data == Compressor(**params).decompress(cdata) # autodetect
def test_zlib():
c = get_compressor(name='zlib')
cdata = c.compress(data)
assert len(cdata) < len(data)
assert data == c.decompress(cdata)
assert data == Compressor(**params).decompress(cdata) # autodetect
def test_lzma():
if lzma is None:
pytest.skip("No lzma support found.")
c = get_compressor(name='lzma')
cdata = c.compress(data)
assert len(cdata) < len(data)
assert data == c.decompress(cdata)
assert data == Compressor(**params).decompress(cdata) # autodetect
def test_autodetect_invalid():
with pytest.raises(ValueError):
Compressor(**params).decompress(b'\xff\xfftotalcrap')
with pytest.raises(ValueError):
Compressor(**params).decompress(b'\x08\x00notreallyzlib')
def test_zlib_compat():
# for compatibility reasons, we do not add an extra header for zlib,
# nor do we expect one when decompressing / autodetecting
for level in range(10):
c = get_compressor(name='zlib', level=level)
cdata1 = c.compress(data)
cdata2 = zlib.compress(data, level)
assert cdata1 == cdata2
data2 = c.decompress(cdata2)
assert data == data2
data2 = Compressor(**params).decompress(cdata2)
assert data == data2
def test_compressor():
params_list = [
dict(name='none', buffer=buffer),
dict(name='lz4', buffer=buffer),
dict(name='zlib', level=0, buffer=buffer),
dict(name='zlib', level=6, buffer=buffer),
dict(name='zlib', level=9, buffer=buffer),
]
if lzma:
params_list += [
dict(name='lzma', level=0, buffer=buffer),
dict(name='lzma', level=6, buffer=buffer),
dict(name='lzma', level=9, buffer=buffer),
]
for params in params_list:
c = Compressor(**params)
assert data == c.decompress(c.compress(data))

View file

@ -6,6 +6,11 @@ from ..hashindex import NSIndex, ChunkIndex
from . import BaseTestCase
def H(x):
# make some 32byte long thing that depends on x
return bytes('%-0.32d' % x, 'ascii')
class HashIndexTestCase(BaseTestCase):
def _generic_test(self, cls, make_value, sha):
@ -78,3 +83,20 @@ class HashIndexTestCase(BaseTestCase):
second_half = list(idx.iteritems(marker=all[49][0]))
self.assert_equal(len(second_half), 50)
self.assert_equal(second_half, all[50:])
def test_chunkindex_merge(self):
idx1 = ChunkIndex()
idx1[H(1)] = 1, 100, 100
idx1[H(2)] = 2, 200, 200
idx1[H(3)] = 3, 300, 300
# no H(4) entry
idx2 = ChunkIndex()
idx2[H(1)] = 4, 100, 100
idx2[H(2)] = 5, 200, 200
# no H(3) entry
idx2[H(4)] = 6, 400, 400
idx1.merge(idx2)
assert idx1[H(1)] == (5, 100, 100)
assert idx1[H(2)] == (7, 200, 200)
assert idx1[H(3)] == (3, 300, 300)
assert idx1[H(4)] == (6, 400, 400)

View file

@ -1,14 +1,13 @@
import hashlib
from time import mktime, strptime
from datetime import datetime, timezone, timedelta
import os
import tempfile
import unittest
import pytest
import msgpack
from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, to_localtime, \
StableDict, int_to_bigint, bigint_to_int, parse_timestamp
from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \
prune_within, prune_split, \
StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec
from . import BaseTestCase
@ -96,7 +95,7 @@ class PatternTestCase(BaseTestCase):
['/etc/passwd', '/etc/hosts', '/home', '/var/log/messages', '/var/log/dmesg'])
self.assert_equal(self.evaluate(['/home/u'], []), [])
self.assert_equal(self.evaluate(['/', '/home', '/etc/hosts'], ['/']), [])
self.assert_equal(self.evaluate(['/home/'], ['/home/user2']),
self.assert_equal(self.evaluate(['/home/'], ['/home/user2']),
['/home', '/home/user/.profile', '/home/user/.bashrc'])
self.assert_equal(self.evaluate(['/'], ['*.profile', '/var/log']),
['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc', '/home/user2/public_html/index.html'])
@ -106,6 +105,30 @@ class PatternTestCase(BaseTestCase):
['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg'])
def test_compression_specs():
with pytest.raises(ValueError):
CompressionSpec('')
assert CompressionSpec('0') == dict(name='zlib', level=0)
assert CompressionSpec('1') == dict(name='zlib', level=1)
assert CompressionSpec('9') == dict(name='zlib', level=9)
with pytest.raises(ValueError):
CompressionSpec('10')
assert CompressionSpec('none') == dict(name='none')
assert CompressionSpec('lz4') == dict(name='lz4')
assert CompressionSpec('zlib') == dict(name='zlib', level=6)
assert CompressionSpec('zlib,0') == dict(name='zlib', level=0)
assert CompressionSpec('zlib,9') == dict(name='zlib', level=9)
with pytest.raises(ValueError):
CompressionSpec('zlib,9,invalid')
assert CompressionSpec('lzma') == dict(name='lzma', level=6)
assert CompressionSpec('lzma,0') == dict(name='lzma', level=0)
assert CompressionSpec('lzma,9') == dict(name='lzma', level=9)
with pytest.raises(ValueError):
CompressionSpec('lzma,9,invalid')
with pytest.raises(ValueError):
CompressionSpec('invalid')
class MakePathSafeTestCase(BaseTestCase):
def test(self):
@ -118,23 +141,6 @@ class MakePathSafeTestCase(BaseTestCase):
self.assert_equal(make_path_safe('/'), '.')
self.assert_equal(make_path_safe('/'), '.')
class UpgradableLockTestCase(BaseTestCase):
def test(self):
file = tempfile.NamedTemporaryFile()
lock = UpgradableLock(file.name)
lock.upgrade()
lock.upgrade()
lock.release()
@unittest.skipIf(os.getuid() == 0, 'Root can always open files for writing')
def test_read_only_lock_file(self):
file = tempfile.NamedTemporaryFile()
os.chmod(file.name, 0o444)
lock = UpgradableLock(file.name)
self.assert_raises(UpgradableLock.WriteLockFailed, lock.upgrade)
lock.release()
class MockArchive:
@ -161,7 +167,7 @@ class PruneSplitTestCase(BaseTestCase):
for ta in test_archives, reversed(test_archives):
self.assert_equal(set(prune_split(ta, '%Y-%m', n, skip)),
subset(test_archives, indices))
test_pairs = [(1, 1), (2, 1), (2, 28), (3, 1), (3, 2), (3, 31), (5, 1)]
test_dates = [local_to_UTC(month, day) for month, day in test_pairs]
test_archives = [MockArchive(date) for date in test_dates]
@ -185,24 +191,24 @@ class PruneWithinTestCase(BaseTestCase):
for ta in test_archives, reversed(test_archives):
self.assert_equal(set(prune_within(ta, within)),
subset(test_archives, indices))
# 1 minute, 1.5 hours, 2.5 hours, 3.5 hours, 25 hours, 49 hours
test_offsets = [60, 90*60, 150*60, 210*60, 25*60*60, 49*60*60]
now = datetime.now(timezone.utc)
test_dates = [now - timedelta(seconds=s) for s in test_offsets]
test_archives = [MockArchive(date) for date in test_dates]
dotest(test_archives, '1H', [0])
dotest(test_archives, '2H', [0, 1])
dotest(test_archives, '3H', [0, 1, 2])
dotest(test_archives, '1H', [0])
dotest(test_archives, '2H', [0, 1])
dotest(test_archives, '3H', [0, 1, 2])
dotest(test_archives, '24H', [0, 1, 2, 3])
dotest(test_archives, '26H', [0, 1, 2, 3, 4])
dotest(test_archives, '2d', [0, 1, 2, 3, 4])
dotest(test_archives, '2d', [0, 1, 2, 3, 4])
dotest(test_archives, '50H', [0, 1, 2, 3, 4, 5])
dotest(test_archives, '3d', [0, 1, 2, 3, 4, 5])
dotest(test_archives, '1w', [0, 1, 2, 3, 4, 5])
dotest(test_archives, '1m', [0, 1, 2, 3, 4, 5])
dotest(test_archives, '1y', [0, 1, 2, 3, 4, 5])
dotest(test_archives, '3d', [0, 1, 2, 3, 4, 5])
dotest(test_archives, '1w', [0, 1, 2, 3, 4, 5])
dotest(test_archives, '1m', [0, 1, 2, 3, 4, 5])
dotest(test_archives, '1y', [0, 1, 2, 3, 4, 5])
class StableDictTestCase(BaseTestCase):

121
borg/testsuite/locking.py Normal file
View file

@ -0,0 +1,121 @@
import time
import pytest
from ..locking import get_id, TimeoutTimer, ExclusiveLock , UpgradableLock, LockRoster, ADD, REMOVE, SHARED, EXCLUSIVE
ID1 = "foo", 1, 1
ID2 = "bar", 2, 2
def test_id():
hostname, pid, tid = get_id()
assert isinstance(hostname, str)
assert isinstance(pid, int)
assert isinstance(tid, int)
assert len(hostname) > 0
assert pid > 0
class TestTimeoutTimer:
def test_timeout(self):
timeout = 0.5
t = TimeoutTimer(timeout).start()
assert not t.timed_out()
time.sleep(timeout * 1.5)
assert t.timed_out()
def test_notimeout_sleep(self):
timeout, sleep = None, 0.5
t = TimeoutTimer(timeout, sleep).start()
assert not t.timed_out_or_sleep()
assert time.time() >= t.start_time + 1 * sleep
assert not t.timed_out_or_sleep()
assert time.time() >= t.start_time + 2 * sleep
@pytest.fixture()
def lockpath(tmpdir):
return str(tmpdir.join('lock'))
class TestExclusiveLock:
def test_checks(self, lockpath):
with ExclusiveLock(lockpath, timeout=1) as lock:
assert lock.is_locked() and lock.by_me()
def test_acquire_break_reacquire(self, lockpath):
lock = ExclusiveLock(lockpath, id=ID1).acquire()
lock.break_lock()
with ExclusiveLock(lockpath, id=ID2):
pass
def test_timeout(self, lockpath):
with ExclusiveLock(lockpath, id=ID1):
with pytest.raises(ExclusiveLock.LockTimeout):
ExclusiveLock(lockpath, id=ID2, timeout=0.1).acquire()
class TestUpgradableLock:
def test_shared(self, lockpath):
lock1 = UpgradableLock(lockpath, exclusive=False, id=ID1).acquire()
lock2 = UpgradableLock(lockpath, exclusive=False, id=ID2).acquire()
assert len(lock1._roster.get(SHARED)) == 2
assert len(lock1._roster.get(EXCLUSIVE)) == 0
lock1.release()
lock2.release()
def test_exclusive(self, lockpath):
with UpgradableLock(lockpath, exclusive=True, id=ID1) as lock:
assert len(lock._roster.get(SHARED)) == 0
assert len(lock._roster.get(EXCLUSIVE)) == 1
def test_upgrade(self, lockpath):
with UpgradableLock(lockpath, exclusive=False) as lock:
lock.upgrade()
lock.upgrade() # NOP
assert len(lock._roster.get(SHARED)) == 0
assert len(lock._roster.get(EXCLUSIVE)) == 1
def test_downgrade(self, lockpath):
with UpgradableLock(lockpath, exclusive=True) as lock:
lock.downgrade()
lock.downgrade() # NOP
assert len(lock._roster.get(SHARED)) == 1
assert len(lock._roster.get(EXCLUSIVE)) == 0
def test_break(self, lockpath):
lock = UpgradableLock(lockpath, exclusive=True, id=ID1).acquire()
lock.break_lock()
assert len(lock._roster.get(SHARED)) == 0
assert len(lock._roster.get(EXCLUSIVE)) == 0
with UpgradableLock(lockpath, exclusive=True, id=ID2):
pass
@pytest.fixture()
def rosterpath(tmpdir):
return str(tmpdir.join('roster'))
class TestLockRoster:
def test_empty(self, rosterpath):
roster = LockRoster(rosterpath)
empty = roster.load()
roster.save(empty)
assert empty == {}
def test_modify_get(self, rosterpath):
roster1 = LockRoster(rosterpath, id=ID1)
assert roster1.get(SHARED) == set()
roster1.modify(SHARED, ADD)
assert roster1.get(SHARED) == {ID1, }
roster2 = LockRoster(rosterpath, id=ID2)
roster2.modify(SHARED, ADD)
assert roster2.get(SHARED) == {ID1, ID2, }
roster1 = LockRoster(rosterpath, id=ID1)
roster1.modify(SHARED, REMOVE)
assert roster1.get(SHARED) == {ID2, }
roster2 = LockRoster(rosterpath, id=ID2)
roster2.modify(SHARED, REMOVE)
assert roster2.get(SHARED) == set()

View file

@ -1,40 +1,52 @@
from ..lrucache import LRUCache
from . import BaseTestCase
import pytest
from tempfile import TemporaryFile
class LRUCacheTestCase(BaseTestCase):
class TestLRUCache:
def test(self):
c = LRUCache(2)
self.assert_equal(len(c), 0)
def test_lrucache(self):
c = LRUCache(2, dispose=lambda _: None)
assert len(c) == 0
assert c.items() == set()
for i, x in enumerate('abc'):
c[x] = i
self.assert_equal(len(c), 2)
self.assert_equal(set(c), set(['b', 'c']))
self.assert_equal(set(c.items()), set([('b', 1), ('c', 2)]))
self.assert_equal(False, 'a' in c)
self.assert_equal(True, 'b' in c)
self.assert_raises(KeyError, lambda: c['a'])
self.assert_equal(c['b'], 1)
self.assert_equal(c['c'], 2)
assert len(c) == 2
assert c.items() == set([('b', 1), ('c', 2)])
assert 'a' not in c
assert 'b' in c
with pytest.raises(KeyError):
c['a']
assert c['b'] == 1
assert c['c'] == 2
c['d'] = 3
self.assert_equal(len(c), 2)
self.assert_equal(c['c'], 2)
self.assert_equal(c['d'], 3)
c['c'] = 22
c['e'] = 4
self.assert_equal(len(c), 2)
self.assert_raises(KeyError, lambda: c['d'])
self.assert_equal(c['c'], 22)
self.assert_equal(c['e'], 4)
assert len(c) == 2
assert c['c'] == 2
assert c['d'] == 3
del c['c']
self.assert_equal(len(c), 1)
self.assert_raises(KeyError, lambda: c['c'])
self.assert_equal(c['e'], 4)
assert len(c) == 1
with pytest.raises(KeyError):
c['c']
assert c['d'] == 3
c.clear()
assert c.items() == set()
def test_pop(self):
c = LRUCache(2)
c[1] = 1
c[2] = 2
c.pop(1)
c[3] = 3
def test_dispose(self):
c = LRUCache(2, dispose=lambda f: f.close())
f1 = TemporaryFile()
f2 = TemporaryFile()
f3 = TemporaryFile()
c[1] = f1
c[2] = f2
assert not f2.closed
c[3] = f3
assert 1 not in c
assert f1.closed
assert 2 in c
assert not f2.closed
del c[2]
assert 2 not in c
assert f2.closed
c.clear()
assert c.items() == set()
assert f3.closed

View file

@ -1,5 +0,0 @@
try:
# Only available in python 3.3+
from unittest.mock import *
except ImportError:
from mock import *

View file

@ -102,4 +102,3 @@ class PlatformDarwinTestCase(BaseTestCase):
self.set_acl(file2.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=True)
self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)[b'acl_extended'])
self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)[b'acl_extended'])

View file

@ -2,12 +2,14 @@ import os
import shutil
import tempfile
from mock import patch
from ..hashindex import NSIndex
from ..helpers import Location, IntegrityError, UpgradableLock
from ..helpers import Location, IntegrityError
from ..locking import UpgradableLock
from ..remote import RemoteRepository, InvalidRPCMethod
from ..repository import Repository
from . import BaseTestCase
from .mock import patch
class RepositoryTestCaseBase(BaseTestCase):
@ -156,10 +158,10 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase):
for name in os.listdir(self.repository.path):
if name.startswith('index.'):
os.unlink(os.path.join(self.repository.path, name))
with patch.object(UpgradableLock, 'upgrade', side_effect=UpgradableLock.WriteLockFailed) as upgrade:
with patch.object(UpgradableLock, 'upgrade', side_effect=UpgradableLock.ExclusiveLockFailed) as upgrade:
self.reopen()
self.assert_raises(UpgradableLock.WriteLockFailed, lambda: len(self.repository))
upgrade.assert_called_once()
self.assert_raises(UpgradableLock.ExclusiveLockFailed, lambda: len(self.repository))
upgrade.assert_called_once_with()
def test_crash_before_write_index(self):
self.add_keys()
@ -309,7 +311,7 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
# Simulate a crash before compact
with patch.object(Repository, 'compact_segments') as compact:
self.repository.commit()
compact.assert_called_once()
compact.assert_called_once_with()
self.reopen()
self.check(repair=True)
self.assert_equal(self.repository.get(bytes(32)), b'data2')
@ -328,3 +330,7 @@ class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase):
def open(self, create=False):
return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
def test_crash_before_compact(self):
# skip this test, we can't mock-patch a Repository class in another process!
pass

View file

@ -1,11 +0,0 @@
import unittest
from . import TestLoader
def main():
unittest.main(testLoader=TestLoader(), defaultTest='')
if __name__ == '__main__':
main()

View file

@ -11,7 +11,7 @@ from ctypes.util import find_library
def is_enabled(path=None):
"""Determine if xattr is enabled on the filesystem
"""
with tempfile.NamedTemporaryFile(dir=path) as fd:
with tempfile.NamedTemporaryFile(dir=path, prefix='borg-tmp') as fd:
try:
setxattr(fd.fileno(), 'user.name', b'value')
except OSError:

View file

@ -3,9 +3,18 @@
<h3>Useful Links</h3>
<ul>
<li><a href="https://borgbackup.github.io/">Main Web Site</a></li>
<li><a href="https://borgbackup.github.io/borgbackup/">Main Web Site</a></li>
<li><a href="https://pypi.python.org/pypi/borgbackup">PyPI packages</a></li>
<li><a href="https://github.com/borgbackup/borg/issues/147">Binary Packages</a></li>
<li><a href="https://github.com/borgbackup/borg/blob/master/CHANGES.rst">Current ChangeLog</a></li>
<li><a href="https://github.com/borgbackup/borg">GitHub</a></li>
<li><a href="https://github.com/borgbackup/borg/issues">Issue Tracker</a></li>
<li><a href="https://www.bountysource.com/teams/borgbackup">Bounties &amp; Fundraisers</a></li>
<li><a href="http://librelist.com/browser/borgbackup/">Mailing List</a></li>
</ul>
<h3>Related Projects</h3>
<ul>
<li><a href="https://borgbackup.github.io/borgweb/">BorgWeb</a></li>
</ul>

4
docs/changes.rst Normal file
View file

@ -0,0 +1,4 @@
.. include:: global.rst.inc
.. _changelog:
.. include:: ../CHANGES.rst

View file

@ -11,13 +11,13 @@
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys, os
from borg import __version__ as sw_version
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
import sys, os
sys.path.insert(0, os.path.abspath('..'))
from borg import __version__ as sw_version
# -- General configuration -----------------------------------------------------
@ -42,7 +42,7 @@ master_doc = 'index'
# General information about the project.
project = 'Borg - Deduplicating Archiver'
copyright = '2010-2014, Jonas Borgström'
copyright = '2010-2014, Jonas Borgström, 2015 The Borg Collective (see AUTHORS file)'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
@ -134,7 +134,7 @@ html_static_path = []
# Custom sidebar templates, maps document names to template names.
html_sidebars = {
'index': ['sidebarlogo.html', 'sidebarusefullinks.html', 'searchbox.html'],
'**': ['sidebarlogo.html', 'localtoc.html', 'relations.html', 'sidebarusefullinks.html', 'searchbox.html']
'**': ['sidebarlogo.html', 'relations.html', 'searchbox.html', 'localtoc.html', 'sidebarusefullinks.html']
}
# Additional templates that should be rendered to pages, maps page names to
# template names.

67
docs/development.rst Normal file
View file

@ -0,0 +1,67 @@
.. include:: global.rst.inc
.. _development:
Development
===========
This chapter will get you started with |project_name|' development.
|project_name| is written in Python (with a little bit of Cython and C for
the performance critical parts).
Building a development environment
----------------------------------
First, just install borg into a virtual env as described before.
To install some additional packages needed for running the tests, activate your
virtual env and run::
pip install -r requirements.d/development.txt
Running the tests
-----------------
The tests are in the borg/testsuite package.
To run them, you need to have fakeroot, tox and pytest installed.
To run the test suite use the following command::
fakeroot -u tox # run all tests
Some more advanced examples::
# verify a changed tox.ini (run this after any change to tox.ini):
fakeroot -u tox --recreate
fakeroot -u tox -e py32 # run all tests, but only on python 3.2
fakeroot -u tox borg.testsuite.locking # only run 1 test module
fakeroot -u tox borg.testsuite.locking -- -k '"not Timer"' # exclude some tests
fakeroot -u tox borg.testsuite -- -v # verbose py.test
Important notes:
- Without fakeroot -u some tests will fail.
- When using -- to give options to py.test, you MUST also give borg.testsuite[.module].
Building the docs with Sphinx
-----------------------------
The documentation (in reStructuredText format, .rst) is in docs/.
To build the html version of it, you need to have sphinx installed::
pip3 install sphinx
Now run::
cd docs/
make html
Then point a web browser at docs/_build/html/index.html.

View file

@ -70,8 +70,9 @@ When backing up to remote encrypted repos, is encryption done locally?
When backing up to remote servers, do I have to trust the remote server?
Yes and No.
No, as far as data confidentiality is concerned - all your files/dirs data
and metadata are stored in their encrypted form into the repository.
No, as far as data confidentiality is concerned - if you use encryption,
all your files/dirs data and metadata are stored in their encrypted form
into the repository.
Yes, as an attacker with access to the remote server could delete (or
otherwise make unavailable) all your backups.
@ -90,6 +91,20 @@ If I want to run |project_name| on a ARM CPU older than ARM v6?
echo "2" > /proc/cpu/alignment
Can |project_name| add redundancy to the backup data to deal with hardware malfunction?
No, it can't. While that at first sounds like a good idea to defend against some
defect HDD sectors or SSD flash blocks, dealing with this in a reliable way needs a lot
of low-level storage layout information and control which we do not have (and also can't
get, even if we wanted).
So, if you need that, consider RAID1 or a filesystems that offers redundant storage.
Can |project_name| verify data integrity of a backup archive?
Yes, if you want to detect accidental data damage (like bit rot), use the ``check``
operation. It will notice corruption using CRCs and hashes.
If you want to be able to detect malicious tampering also, use a encrypted repo.
It will then be able to check using CRCs and HMACs.
Why was Borg forked from Attic?
Borg was created in May 2015 in response to the difficulty of
getting new code or larger changes incorporated into Attic and

View file

@ -1,62 +0,0 @@
.. include:: global.rst.inc
.. _foreword:
Foreword
========
|project_name| is a secure backup program for Linux, FreeBSD and Mac OS X.
|project_name| is designed for efficient data storage where only new or
modified data is stored.
Features
--------
Space efficient storage
Variable block size `deduplication`_ is used to reduce the number of bytes
stored by detecting redundant data. Each file is split into a number of
variable length chunks and only chunks that have never been seen before
are compressed and added to the repository.
Optional data encryption
All data can be protected using 256-bit AES_ encryption and data integrity
and authenticity is verified using `HMAC-SHA256`_.
Off-site backups
|project_name| can store data on any remote host accessible over SSH as
long as |project_name| is installed.
Backups mountable as filesystems
Backup archives are :ref:`mountable <borg_mount>` as
`userspace filesystems`_ for easy backup verification and restores.
Glossary
--------
.. _deduplication_def:
Deduplication
Deduplication is a technique for improving storage utilization by
eliminating redundant data.
.. _archive_def:
Archive
An archive is a collection of files along with metadata that include file
permissions, directory structure and various file attributes.
Since each archive in a repository must have a unique name a good naming
convention is ``hostname-YYYY-MM-DD``.
.. _repository_def:
Repository
A repository is a filesystem directory storing data from zero or more
archives. The data in a repository is both deduplicated and
optionally encrypted making it both efficient and safe. Repositories are
created using :ref:`borg_init` and the contents can be listed using
:ref:`borg_list`.
Key file
When a repository is initialized a key file containing a password
protected encryption key is created. It is vital to keep this file safe
since the repository data is totally inaccessible without it.

View file

@ -13,6 +13,7 @@
.. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2
.. _ACL: https://en.wikipedia.org/wiki/Access_control_list
.. _libacl: http://savannah.nongnu.org/projects/acl/
.. _liblz4: https://github.com/Cyan4973/lz4
.. _OpenSSL: https://www.openssl.org/
.. _Python: http://www.python.org/
.. _Buzhash: https://en.wikipedia.org/wiki/Buzhash

View file

@ -1,81 +1,18 @@
.. include:: global.rst.inc
Welcome to Borg
================
|project_name| is a deduplicating and compressing backup program.
Optionally, it also supports authenticated encryption.
The main goal of |project_name| is to provide an efficient and secure way
to backup data. The data deduplication technique used makes |project_name|
suitable for daily backups since only the changes are stored. The authenticated
encryption makes it suitable for backups to not fully trusted targets.
|project_name| is written in Python (with a little bit of Cython and C for
the performance critical parts).
Easy to use
-----------
Initialize a new backup :ref:`repository <repository_def>` and create your
first backup :ref:`archive <archive_def>` in two lines::
$ borg init /mnt/backup
$ borg create /mnt/backup::Monday ~/Documents
$ borg create --stats /mnt/backup::Tuesday ~/Documents
Archive name: Tuesday
Archive fingerprint: 387a5e3f9b0e792e91ce87134b0f4bfe17677d9248cb5337f3fbf3a8e157942a
Start time: Tue Mar 25 12:00:10 2014
End time: Tue Mar 25 12:00:10 2014
Duration: 0.08 seconds
Number of files: 358
Original size Compressed size Deduplicated size
This archive: 57.16 MB 46.78 MB 151.67 kB
All archives: 114.02 MB 93.46 MB 44.81 MB
See the :ref:`quickstart` chapter for a more detailed example.
Easy installation
-----------------
You can use pip to install |project_name| quickly and easily::
$ pip3 install borgbackup
Need more help with installing? See :ref:`installation`.
User's Guide
============
Borg Documentation
==================
.. toctree::
:maxdepth: 2
foreword
intro
installation
quickstart
usage
faq
support
changes
internals
Getting help
============
If you've found a bug or have a concrete feature request, please create a new
ticket on the project's `issue tracker`_ (after checking whether someone else
already has reported the same thing).
For more general questions or discussions, IRC or mailing list are preferred.
IRC
---
Join us on channel ##borgbackup on chat.freenode.net. As usual on IRC, just
ask or tell directly and then patiently wait for replies. Stay connected.
Mailing list
------------
There is a mailing list for Borg on librelist_ that you can use for feature
requests and general discussions about Borg. A mailing list archive is
available `here <http://librelist.com/browser/borgbackup/>`_.
To subscribe to the list, send an email to borgbackup@librelist.com and reply
to the confirmation mail. Likewise, to unsubscribe, send an email to
borgbackup-unsubscribe@librelist.com and reply to the confirmation mail.
development

View file

@ -9,6 +9,7 @@ Installation
* Python_ >= 3.2
* OpenSSL_ >= 1.0.0
* libacl_
* liblz4_
* some python dependencies, see install_requires in setup.py
General notes
@ -19,12 +20,18 @@ usually available as an optional install.
Virtualenv_ can be used to build and install |project_name| without affecting
the system Python or requiring root access.
Important:
if you install into a virtual environment, you need to activate
the virtual env first (``source borg-env/bin/activate``).
Alternatively, directly run ``borg-env/bin/borg`` (or symlink that into some
directory that is in your PATH so you can just run ``borg``).
The llfuse_ python package is also required if you wish to mount an
archive as a FUSE filesystem. Only FUSE >= 2.8.0 can support llfuse.
You only need Cython to compile the .pyx files to the respective .c files
when using |project_name| code from git. For |project_name| releases, the .c
files will be bundled.
files will be bundled, so you won't need Cython to install a release.
Platform notes
--------------
@ -32,7 +39,7 @@ FreeBSD: You may need to get a recent enough OpenSSL version from FreeBSD ports.
Mac OS X: You may need to get a recent enough OpenSSL version from homebrew_.
Mac OS X: A recent enough FUSE implementation might be unavailable.
Mac OS X: You need OS X FUSE >= 3.0.
Debian / Ubuntu installation (from git)
@ -53,11 +60,17 @@ Some of the steps detailled below might be useful also for non-git installs.
# ACL support Headers + Library
apt-get install libacl1-dev libacl1
# lz4 super fast compression support Headers + Library
apt-get install liblz4-dev liblz4-1
# if you do not have gcc / make / etc. yet
apt-get install build-essential
# optional: lowlevel FUSE py binding - to mount backup archives
apt-get install python3-llfuse fuse
# optional: FUSE support - to mount backup archives
# in case you get complaints about permission denied on /etc/fuse.conf:
# on ubuntu this means your user is not in the "fuse" group. just add
# yourself there, log out and log in again.
apt-get install libfuse-dev fuse
# optional: for unit testing
apt-get install fakeroot
@ -73,6 +86,7 @@ Some of the steps detailled below might be useful also for non-git installs.
pip install cython # compile .pyx -> .c
pip install tox pytest # optional, for running unit tests
pip install sphinx # optional, to build the docs
pip install llfuse # optional, for FUSE support
cd borg
pip install -e . # in-place editable mode
@ -96,13 +110,16 @@ Some of the steps detailled below might be useful also for non-git installs.
# ACL support Headers + Library
sudo dnf install libacl-devel libacl
# optional: lowlevel FUSE py binding - to mount backup archives
sudo dnf install python3-llfuse fuse
# lz4 super fast compression support Headers + Library
sudo dnf install lz4
# optional: FUSE support - to mount backup archives
sudo dnf install fuse-devel fuse
# optional: for unit testing
sudo dnf install fakeroot
# get |project_name| from github, install it
git clone |git_url|
@ -114,8 +131,47 @@ Some of the steps detailled below might be useful also for non-git installs.
pip install cython # compile .pyx -> .c
pip install tox pytest # optional, for running unit tests
pip install sphinx # optional, to build the docs
pip install llfuse # optional, for FUSE support
cd borg
pip install -e . # in-place editable mode
# optional: run all the tests, on all supported Python versions
fakeroot -u tox
Cygwin (from git)
-----------------
Please note that running under cygwin is rather experimental, stuff has been
tested with CygWin (x86-64) v2.1.0.
You'll need at least (use the cygwin installer to fetch/install these):
::
python3
python3-setuptools
python3-cython
binutils
gcc-core
git
libopenssl
liblz4_1 liblz4-devel # from cygwinports.org
make
openssh
openssl-devel
You can then install ``pip`` and ``virtualenv``:
::
easy_install-3.4 pip
pip install virtualenv
And now continue as for Linux (see above).
In case that creation of the virtual env fails, try deleting this file:
::
/usr/lib/python3.4/__pycache__/platform.cpython-34.pyc

View file

@ -26,7 +26,7 @@ README
simple text file telling that this is a |project_name| repository
config
repository configuration and lock file
repository configuration
data/
directory where the actual data is stored
@ -37,6 +37,9 @@ hints.%d
index.%d
repository index
lock.roster and lock.exclusive/*
used by the locking system to manage shared and exclusive locks
Config file
-----------
@ -55,9 +58,6 @@ identifier for repositories. It will not change if you move the
repository around so you can make a local transfer then decide to move
the repository to another (even remote) location at a later time.
|project_name| will do a POSIX read lock on the config file when operating
on the repository.
Keys
----
@ -168,13 +168,27 @@ A chunk is stored as an object as well, of course.
Chunks
------
|project_name| uses a rolling hash computed by the Buzhash_ algorithm, with a
window size of 4095 bytes (`0xFFF`), with a minimum chunk size of 1024 bytes.
It triggers (chunks) when the last 16 bits of the hash are zero, producing
chunks of 64kiB on average.
The |project_name| chunker uses a rolling hash computed by the Buzhash_ algorithm.
It triggers (chunks) when the last HASH_MASK_BITS bits of the hash are zero,
producing chunks of 2^HASH_MASK_BITS Bytes on average.
create --chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE
can be used to tune the chunker parameters, the default is:
- CHUNK_MIN_EXP = 10 (minimum chunk size = 2^10 B = 1 kiB)
- CHUNK_MAX_EXP = 23 (maximum chunk size = 2^23 B = 8 MiB)
- HASH_MASK_BITS = 16 (statistical medium chunk size ~= 2^16 B = 64 kiB)
- HASH_WINDOW_SIZE = 4095 [B] (`0xFFF`)
The default parameters are OK for relatively small backup data volumes and
repository sizes and a lot of available memory (RAM) and disk space for the
chunk index. If that does not apply, you are advised to tune these parameters
to keep the chunk count lower than with the defaults.
The buzhash table is altered by XORing it with a seed randomly generated once
for the archive, and stored encrypted in the keyfile.
for the archive, and stored encrypted in the keyfile. This is to prevent chunk
size based fingerprinting attacks on your encrypted repo contents (to guess
what files you have based on a specific set of chunk sizes).
Indexes / Caches
@ -243,7 +257,7 @@ Indexes / Caches memory usage
Here is the estimated memory usage of |project_name|:
chunk_count ~= total_file_size / 65536
chunk_count ~= total_file_size / 2 ^ HASH_MASK_BITS
repo_index_usage = chunk_count * 40
@ -252,20 +266,32 @@ Here is the estimated memory usage of |project_name|:
files_cache_usage = total_file_count * 240 + chunk_count * 80
mem_usage ~= repo_index_usage + chunks_cache_usage + files_cache_usage
= total_file_count * 240 + total_file_size / 400
= chunk_count * 164 + total_file_count * 240
All units are Bytes.
It is assuming every chunk is referenced exactly once and that typical chunk size is 64kiB.
It is assuming every chunk is referenced exactly once (if you have a lot of
duplicate chunks, you will have less chunks than estimated above).
It is also assuming that typical chunk size is 2^HASH_MASK_BITS (if you have
a lot of files smaller than this statistical medium chunk size, you will have
more chunks than estimated above, because 1 file is at least 1 chunk).
If a remote repository is used the repo index will be allocated on the remote side.
E.g. backing up a total count of 1Mi files with a total size of 1TiB:
E.g. backing up a total count of 1Mi files with a total size of 1TiB.
mem_usage = 1 * 2**20 * 240 + 1 * 2**40 / 400 = 2.8GiB
a) with create --chunker-params 10,23,16,4095 (default):
Note: there is a commandline option to switch off the files cache. You'll save
some memory, but it will need to read / chunk all the files then.
mem_usage = 2.8GiB
b) with create --chunker-params 10,23,20,4095 (custom):
mem_usage = 0.4GiB
Note: there is also the --no-files-cache option to switch off the files cache.
You'll save some memory, but it will need to read / chunk all the files then as
it can not skip unmodified files then.
Encryption
@ -291,6 +317,7 @@ Encryption keys are either derived from a passphrase or kept in a key file.
The passphrase is passed through the ``BORG_PASSPHRASE`` environment variable
or prompted for interactive usage.
Key files
---------
@ -355,4 +382,35 @@ representation of the repository id.
Compression
-----------
Currently, zlib level 6 is used as compression.
|project_name| supports the following compression methods:
- none (no compression, pass through data 1:1)
- lz4 (low compression, but super fast)
- zlib (level 0-9, level 0 is no compression [but still adding zlib overhead],
level 1 is low, level 9 is high compression)
- lzma (level 0-9, level 0 is low, level 9 is high compression).
Speed: none > lz4 > zlib > lzma
Compression: lzma > zlib > lz4 > none
Be careful, higher zlib and especially lzma compression levels might take a
lot of resources (CPU and memory).
The overall speed of course also depends on the speed of your target storage.
If that is slow, using a higher compression level might yield better overall
performance. You need to experiment a bit. Maybe just watch your CPU load, if
that is relatively low, increase compression until 1 core is 70-100% loaded.
Even if your target storage is rather fast, you might see interesting effects:
while doing no compression at all (none) is a operation that takes no time, it
likely will need to store more data to the storage compared to using lz4.
The time needed to transfer and store the additional data might be much more
than if you had used lz4 (which is super fast, but still might compress your
data about 2:1). This is assuming your data is compressible (if you backup
already compressed data, trying to compress them at backup time is usually
pointless).
Compression is applied after deduplication, thus using different compression
methods in one repo does not influence deduplication.
See ``borg create --help`` about how to specify the compression level and its default.

7
docs/intro.rst Normal file
View file

@ -0,0 +1,7 @@
.. include:: global.rst.inc
.. _foreword:
Introduction
============
.. include:: ../README.rst

View file

@ -89,6 +89,31 @@ certain number of old archives::
# and 6 monthly archives.
borg prune -v $REPOSITORY --keep-daily=7 --keep-weekly=4 --keep-monthly=6
.. backup_compression:
Backup compression
------------------
Default is no compression, but we support different methods with high speed
or high compression:
If you have a quick repo storage and you want a little compression:
$ borg create --compression lz4 /mnt/backup::repo ~
If you have a medium fast repo storage and you want a bit more compression (N=0..9,
0 means no compression, 9 means high compression):
$ borg create --compression zlib,N /mnt/backup::repo ~
If you have a very slow repo storage and you want high compression (N=0..9, 0 means
low compression, 9 means high compression):
$ borg create --compression lzma,N /mnt/backup::repo ~
You'll need to experiment a bit to find the best compression for your use case.
Keep an eye on CPU load and throughput.
.. _encrypted_repos:
Repository encryption
@ -159,6 +184,3 @@ mounting the remote filesystem, for example, using sshfs::
$ borg init /mnt/backup
$ fusermount -u /mnt
However, be aware that sshfs doesn't fully implement POSIX locks, so
you must be sure to not have two processes trying to access the same
repository at the same time.

37
docs/support.rst Normal file
View file

@ -0,0 +1,37 @@
.. include:: global.rst.inc
.. _support:
Support
=======
Please first read the docs and the FAQ section in the docs, a lot of stuff is
documented / explained there.
Issue Tracker
-------------
If you've found a bug or have a concrete feature request, please create a new
ticket on the project's `issue tracker`_ (after checking whether someone else
already has reported the same thing).
For more general questions or discussions, IRC or mailing list are preferred.
IRC
---
Join us on channel #borgbackup on chat.freenode.net.
As usual on IRC, just ask or tell directly and then patiently wait for replies.
Stay connected.
Mailing list
------------
There is a mailing list for Borg on librelist_ that you can use for feature
requests and general discussions about Borg. A mailing list archive is
available `here <http://librelist.com/browser/borgbackup/>`_.
To subscribe to the list, send an email to borgbackup@librelist.com and reply
to the confirmation mail.
To unsubscribe, send an email to borgbackup-unsubscribe@librelist.com and reply
to the confirmation mail.

View file

@ -15,6 +15,93 @@ Like most UNIX commands |project_name| is quiet by default but the ``-v`` or
``--verbose`` option can be used to get the program to output more status
messages as it is processing.
Return codes
------------
|project_name| can exit with the following return codes (rc):
::
0 no error, normal termination
1 some error occurred (this can be a complete or a partial failure)
128+N killed by signal N (e.g. 137 == kill -9)
Note: we are aware that more distinct return codes might be useful, but it is
not clear yet which return codes should be used for which precise conditions.
See issue #61 for a discussion about that. Depending on the outcome of the
discussion there, return codes may change in future (the only thing rather sure
is that 0 will always mean some sort of success and "not 0" will always mean
some sort of warning / error / failure - but the definition of success might
change).
Environment Variables
---------------------
|project_name| uses some environment variables for automation:
::
Specifying a passphrase:
BORG_PASSPHRASE : When set, use the value to answer the passphrase question for encrypted repositories.
Some "yes" sayers (if set, they automatically confirm that you really want to do X even if there is that warning):
BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK : For "Warning: Attempting to access a previously unknown unencrypted repository"
BORG_RELOCATED_REPO_ACCESS_IS_OK : For "Warning: The repository at location ... was previously located at ..."
BORG_CHECK_I_KNOW_WHAT_I_AM_DOING : For "Warning: 'check --repair' is an experimental feature that might result in data loss."
Directories:
BORG_KEYS_DIR : Default to '~/.borg/keys'. This directory contains keys for encrypted repositories.
BORG_CACHE_DIR : Default to '~/.cache/borg'. This directory contains the local cache and might need a lot
of space for dealing with big repositories).
Building:
BORG_OPENSSL_PREFIX : Adds given OpenSSL header file directory to the default locations (setup.py).
General:
TMPDIR : where temporary files are stored (might need a lot of temporary space for some operations)
Please note:
- be very careful when using the "yes" sayers, the warnings with prompt exist for your / your data's security/safety
- also be very careful when putting your passphrase into a script, make sure it has appropriate file permissions
(e.g. mode 600, root:root).
Resource Usage
--------------
|project_name| might use a lot of resources depending on the size of the data set it is dealing with.
CPU: it won't go beyond 100% of 1 core as the code is currently single-threaded.
Especially higher zlib and lzma compression levels use significant amounts of CPU cycles.
Memory (RAM): the chunks index and the files index are read into memory for performance reasons.
compression, esp. lzma compression with high levels might need substantial amounts
of memory.
Temporary files: reading data and metadata from a FUSE mounted repository will consume about the same space as the
deduplicated chunks used to represent them in the repository.
Cache files: chunks index and files index (plus a compressed collection of single-archive chunk indexes).
Chunks index: proportional to the amount of data chunks in your repo. lots of small chunks in your repo implies a big
chunks index. you may need to tweak the chunker params (see create options) if you have a lot of data and
you want to keep the chunks index at some reasonable size.
Files index: proportional to the amount of files in your last backup. can be switched off (see create options), but
next backup will be much slower if you do.
Network: if your repository is remote, all deduplicated (and optionally compressed/encrypted) data of course has to go
over the connection (ssh: repo url). if you use a locally mounted network filesystem, additionally some copy
operations used for transaction support also go over the connection. if you backup multiple sources to one
target repository, additional traffic happens for cache resynchronization.
In case you are interested in more details, please read the internals documentation.
.. include:: usage/init.rst.inc
Examples
@ -27,8 +114,43 @@ Examples
# Remote repository (accesses a remote borg via ssh)
$ borg init user@hostname:backup
# Encrypted remote repository
$ borg init --encryption=passphrase user@hostname:backup
# Encrypted remote repository, store the key in the repo
$ borg init --encryption=repokey user@hostname:backup
# Encrypted remote repository, store the key your home dir
$ borg init --encryption=keyfile user@hostname:backup
Important notes about encryption:
Use encryption! Repository encryption protects you e.g. against the case that
an attacker has access to your backup repository.
But be careful with the key / the passphrase:
``--encryption=passphrase`` is DEPRECATED and will be removed in next major release.
This mode has very fundamental, unfixable problems (like you can never change
your passphrase or the pbkdf2 iteration count for an existing repository, because
the encryption / decryption key is directly derived from the passphrase).
If you want "passphrase-only" security, just use the ``repokey`` mode. The key will
be stored inside the repository (in its "config" file). In above mentioned
attack scenario, the attacker will have the key (but not the passphrase).
If you want "passphrase and having-the-key" security, use the ``keyfile`` mode.
The key will be stored in your home directory (in ``.borg/keys``). In the attack
scenario, the attacker who has just access to your repo won't have the key (and
also not the passphrase).
Make a backup copy of the key file (``keyfile`` mode) or repo config file
(``repokey`` mode) and keep it at a safe place, so you still have the key in
case it gets corrupted or lost.
The backup that is encrypted with that key won't help you with that, of course.
Make sure you use a good passphrase. Not too short, not too simple. The real
encryption / decryption key is encrypted with / locked by your passphrase.
If an attacker gets your key, he can't unlock and use it without knowing the
passphrase. In ``repokey`` and ``keyfile`` modes, you can change your passphrase
for existing repos.
.. include:: usage/create.rst.inc
@ -53,6 +175,21 @@ Examples
# Backup huge files with little chunk management overhead
$ borg create --chunker-params 19,23,21,4095 /mnt/backup::VMs /srv/VMs
# Backup a raw device (must not be active/in use/mounted at that time)
$ dd if=/dev/sda bs=10M | borg create /mnt/backup::my-sda -
# No compression (default)
$ borg create /mnt/backup::repo ~
# Super fast, low compression
$ borg create --compression lz4 /mnt/backup::repo ~
# Less fast, higher compression (N = 0..9)
$ borg create --compression zlib,N /mnt/backup::repo ~
# Even slower, even higher compression (N = 0..9)
$ borg create --compression lzma,N /mnt/backup::repo ~
.. include:: usage/extract.rst.inc
@ -72,6 +209,9 @@ Examples
# Extract the "src" directory but exclude object files
$ borg extract /mnt/backup::my-files home/USERNAME/src --exclude '*.o'
Note: currently, extract always writes into the current working directory ("."),
so make sure you ``cd`` to the right place before calling ``borg extract``.
.. include:: usage/check.rst.inc
.. include:: usage/delete.rst.inc

View file

@ -0,0 +1,5 @@
tox
mock
pytest
pytest-cov<2.0.0
Cython

View file

@ -2,7 +2,7 @@
python_files = testsuite/*.py
[flake8]
ignore = E123,E126,E127,E129,E203,E221,E226,E231,E241,E265,E301,E302,E303,E713,F401,F403,W291,W293,W391
ignore = E226,F403
max-line-length = 250
exclude = versioneer.py,docs/conf.py,borg/_version.py
exclude = versioneer.py,docs/conf.py,borg/_version.py,build,dist,.git,.idea,.cache
max-complexity = 100

View file

@ -16,11 +16,10 @@ if sys.version_info < min_python:
print("Borg requires Python %d.%d or later" % min_python)
sys.exit(1)
try:
from setuptools import setup, Extension
except ImportError:
from distutils.core import setup, Extension
from setuptools import setup, Extension
compress_source = 'borg/compress.pyx'
crypto_source = 'borg/crypto.pyx'
chunker_source = 'borg/chunker.pyx'
hashindex_source = 'borg/hashindex.pyx'
@ -40,6 +39,7 @@ try:
def make_distribution(self):
self.filelist.extend([
'borg/compress.c',
'borg/crypto.c',
'borg/chunker.c', 'borg/_chunker.c',
'borg/hashindex.c', 'borg/_hashindex.c',
@ -47,13 +47,14 @@ try:
'borg/platform_freebsd.c',
'borg/platform_darwin.c',
])
super(Sdist, self).make_distribution()
super().make_distribution()
except ImportError:
class Sdist(versioneer.cmd_sdist):
def __init__(self, *args, **kwargs):
raise Exception('Cython is required to run sdist')
compress_source = compress_source.replace('.pyx', '.c')
crypto_source = crypto_source.replace('.pyx', '.c')
chunker_source = chunker_source.replace('.pyx', '.c')
hashindex_source = hashindex_source.replace('.pyx', '.c')
@ -61,7 +62,9 @@ except ImportError:
platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c')
platform_darwin_source = platform_darwin_source.replace('.pyx', '.c')
from distutils.command.build_ext import build_ext
if not all(os.path.exists(path) for path in [crypto_source, chunker_source, hashindex_source, platform_linux_source, platform_freebsd_source]):
if not all(os.path.exists(path) for path in [
compress_source, crypto_source, chunker_source, hashindex_source,
platform_linux_source, platform_freebsd_source]):
raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version')
@ -91,6 +94,7 @@ cmdclass = versioneer.get_cmdclass()
cmdclass.update({'build_ext': build_ext, 'sdist': Sdist})
ext_modules = [
Extension('borg.compress', [compress_source], libraries=['lz4']),
Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs),
Extension('borg.chunker', [chunker_source]),
Extension('borg.hashindex', [hashindex_source])
@ -129,7 +133,11 @@ setup(
'Topic :: System :: Archiving :: Backup',
],
packages=['borg', 'borg.testsuite'],
scripts=['scripts/borg'],
entry_points={
'console_scripts': [
'borg = borg.archiver:main',
]
},
cmdclass=cmdclass,
ext_modules=ext_modules,
# msgpack pure python data corruption was fixed in 0.4.6.

21
tox.ini
View file

@ -1,15 +1,14 @@
# tox configuration - if you change anything here, run this to verify:
# fakeroot -u tox --recreate
[tox]
envlist = py32, py33, py34
[testenv]
# Change dir to avoid import problem
changedir = {envdir}
deps =
pytest
commands = py.test
passenv = * # fakeroot -u needs some env vars
[testenv:py32]
deps =
pytest
mock
# Change dir to avoid import problem for cython code. The directory does
# not really matter, should be just different from the toplevel dir.
changedir = {toxworkdir}
deps = -rrequirements.d/development.txt
commands = py.test --cov=borg --pyargs {posargs:borg.testsuite}
# fakeroot -u needs some env vars:
passenv = *