Merge pull request #1 from borgbackup/master

Pull latest upstream master
2026-06-12 02:17:54 -04:00 · 2015-09-12 17:11:13 -04:00 · 2015-09-12 17:11:13 -04:00 · 31fdba089f
commit 31fdba089f
parent e17ca5123e 6c619000e3
58 changed files with 2250 additions and 2246 deletions
--- a/.coveragerc
+++ b/.coveragerc
@ -0,0 +1,17 @@
+[run]
+branch = True
+source = borg
+omit =
+    borg/__init__.py
+    borg/__main__.py
+    borg/_version.py
+
+[report]
+exclude_lines =
+    pragma: no cover
+    def __repr__
+    raise AssertionError
+    raise NotImplementedError
+    if 0:
+    if __name__ == .__main__.:
+ignore_errors = True
--- a/.gitignore
+++ b/.gitignore
@ -6,6 +6,7 @@ env
 .tox
 hashindex.c
 chunker.c
+compress.c
 crypto.c
 platform_darwin.c
 platform_freebsd.c
@ -16,3 +17,9 @@ platform_linux.c
 *.so
 docs/usage/*.inc
 .idea/
+.cache/
+borg/_version.py
+borg.build/
+borg.dist/
+borg.exe
+.coverage
--- a/.travis.yml
+++ b/.travis.yml
@ -1,12 +1,47 @@
+sudo: required
+
 language: python
-python:
-  - "3.2"
-  - "3.3"
-  - "3.4"
-# command to install dependencies
+
+cache:
+    directories:
+        - $HOME/.cache/pip
+
+matrix:
+    include:
+        - python: 3.2
+          os: linux
+          env: TOXENV=py32
+        - python: 3.3
+          os: linux
+          env: TOXENV=py33
+        - python: 3.4
+          os: linux
+          env: TOXENV=py34
+        - language: generic
+          os: osx
+          osx_image: xcode6.4
+          env: TOXENV=py32
+        - language: generic
+          os: osx
+          osx_image: xcode6.4
+          env: TOXENV=py33
+        - language: generic
+          os: osx
+          osx_image: xcode6.4
+          env: TOXENV=py34
+
 install:
-  - "sudo apt-get install -y libacl1-dev"
-  - "pip install --use-mirrors Cython"
-  - "pip install -e ."
-# command to run tests
-script: fakeroot -u py.test
+    - ./.travis/install.sh
+
+script:
+    - ./.travis/run.sh
+
+after_success:
+    - ./.travis/upload_coverage.sh
+
+notifications:
+    irc:
+        channels:
+            - "irc.freenode.org#borgbackup"
+        use_notice: true
+        skip_join: true
--- a/.travis/install.sh
+++ b/.travis/install.sh
@ -0,0 +1,48 @@
+#!/bin/bash
+
+set -e
+set -x
+
+if [[ "$(uname -s)" == 'Darwin' ]]; then
+    brew update || brew update
+
+    if [[ "${OPENSSL}" != "0.9.8" ]]; then
+        brew outdated openssl || brew upgrade openssl
+    fi
+
+    if which pyenv > /dev/null; then
+        eval "$(pyenv init -)"
+    fi
+
+    brew install lz4
+    brew outdated pyenv || brew upgrade pyenv
+
+    case "${TOXENV}" in
+        py32)
+            pyenv install 3.2.6
+            pyenv global 3.2.6
+            ;;
+        py33)
+            pyenv install 3.3.6
+            pyenv global 3.3.6
+            ;;
+        py34)
+            pyenv install 3.4.3
+            pyenv global 3.4.3
+            ;;
+    esac
+    pyenv rehash
+    python -m pip install --user virtualenv
+else
+    pip install virtualenv
+    sudo add-apt-repository -y ppa:gezakovacs/lz4
+    sudo apt-get update
+    sudo apt-get install -y liblz4-dev
+    sudo apt-get install -y libacl1-dev
+fi
+
+python -m virtualenv ~/.venv
+source ~/.venv/bin/activate
+pip install -r requirements.d/development.txt
+pip install codecov
+pip install -e .
--- a/.travis/run.sh
+++ b/.travis/run.sh
@ -0,0 +1,23 @@
+#!/bin/bash
+
+set -e
+set -x
+
+if [[ "$(uname -s)" == "Darwin" ]]; then
+    eval "$(pyenv init -)"
+    if [[ "${OPENSSL}" != "0.9.8" ]]; then
+        # set our flags to use homebrew openssl
+        export ARCHFLAGS="-arch x86_64"
+        export LDFLAGS="-L/usr/local/opt/openssl/lib"
+        export CFLAGS="-I/usr/local/opt/openssl/include"
+    fi
+fi
+
+source ~/.venv/bin/activate
+
+if [[ "$(uname -s)" == "Darwin" ]]; then
+    # no fakeroot on OS X
+    sudo tox -e $TOXENV
+else
+    fakeroot -u tox
+fi
--- a/.travis/upload_coverage.sh
+++ b/.travis/upload_coverage.sh
@ -0,0 +1,13 @@
+#!/bin/bash
+
+set -e
+set -x
+
+NO_COVERAGE_TOXENVS=(pep8)
+if ! [[ "${NO_COVERAGE_TOXENVS[*]}" =~ "${TOXENV}" ]]; then
+    source ~/.venv/bin/activate
+    ln .tox/.coverage .coverage
+    # on osx, tests run as root, need access to .coverage
+    sudo chmod 666 .coverage
+    codecov -e TRAVIS_OS_NAME TOXENV
+fi
--- a/CHANGES.rst
+++ b/CHANGES.rst
@ -2,9 +2,113 @@ Borg Changelog
 ==============


+Version 0.26.0 (not released yet)
+---------------------------------
+
+New features:
+
+- BORG_REPO env var to specify the default repo, #168
+- read special files as if they were regular files, #79
+
+Bug fixes:
+
+- borg mount repo: use absolute path, attic #200, attic #137
+- chunker: use off_t to get 64bit on 32bit platform, #178
+- initialize chunker fd to -1, so it's not equal to STDIN_FILENO (0)
+- fix reaction to "no" answer at delete repo prompt, #182
+
+Other changes:
+
+- detect inconsistency / corruption / hash collision, #170
+- replace versioneer with setuptools_scm, #106
+
+
+Version 0.25.0
+--------------
+
+Compatibility notes:
+
+- lz4 compression library (liblz4) is a new requirement (#156)
+- the new compression code is very compatible: as long as you stay with zlib
+  compression, older borg releases will still be able to read data from a
+  repo/archive made with the new code (note: this is not the case for the
+  default "none" compression, use "zlib,0" if you want a "no compression" mode
+  that can be read by older borg). Also the new code is able to read repos and
+  archives made with older borg versions (for all zlib levels  0..9).
+
+Deprecations:
+
+- --compression N (with N being a number, as in 0.24) is deprecated.
+  We keep the --compression 0..9 for now to not break scripts, but it is
+  deprecated and will be removed later, so better fix your scripts now:
+  --compression 0 (as in 0.24) is the same as --compression zlib,0 (now).
+  BUT: if you do not want compression, you rather want --compression none
+  (which is the default).
+  --compression 1 (in 0.24) is the same as --compression zlib,1 (now)
+  --compression 9 (in 0.24) is the same as --compression zlib,9 (now)
+
+New features:
+
+- create --compression none (default, means: do not compress, just pass through
+  data "as is". this is more efficient than zlib level 0 as used in borg 0.24)
+- create --compression lz4 (super-fast, but not very high compression)
+- create --compression zlib,N (slower, higher compression, default for N is 6)
+- create --compression lzma,N (slowest, highest compression, default N is 6)
+- honor the nodump flag (UF_NODUMP) and do not backup such items
+- list --short just outputs a simple list of the files/directories in an archive
+
+Bug fixes:
+
+- fixed --chunker-params parameter order confusion / malfunction, fixes #154
+- close fds of segments we delete (during compaction)
+- close files which fell out the lrucache
+- fadvise DONTNEED now is only called for the byte range actually read, not for
+  the whole file, fixes #158.
+- fix issue with negative "all archives" size, fixes #165
+- restore_xattrs: ignore if setxattr fails with EACCES, fixes #162
+
+Other changes:
+
+- remove fakeroot requirement for tests, tests run faster without fakeroot
+  (test setup does not fail any more without fakeroot, so you can run with or
+  without fakeroot), fixes #151 and #91.
+- more tests for archiver
+- recover_segment(): don't assume we have an fd for segment
+- lrucache refactoring / cleanup, add dispose function, py.test tests
+- generalize hashindex code for any key length (less hardcoding)
+- lock roster: catch file not found in remove() method and ignore it
+- travis CI: use requirements file
+- improved docs:
+
+  - replace hack for llfuse with proper solution (install libfuse-dev)
+  - update docs about compression
+  - update development docs about fakeroot
+  - internals: add some words about lock files / locking system
+  - support: mention BountySource and for what it can be used
+  - theme: use a lighter green
+  - add pypi, wheel, dist package based install docs
+  - split install docs into system-specific preparations and generic instructions
+
+
 Version 0.24.0
 --------------

+Incompatible changes (compared to 0.23):
+
+- borg now always issues --umask NNN option when invoking another borg via ssh
+  on the repository server. By that, it's making sure it uses the same umask
+  for remote repos as for local ones. Because of this, you must upgrade both
+  server and client(s) to 0.24.
+- the default umask is 077 now (if you do not specify via --umask) which might
+  be a different one as you used previously. The default umask avoids that
+  you accidentally give access permissions for group and/or others to files
+  created by borg (e.g. the repository).
+
+Deprecations:
+
+- "--encryption passphrase" mode is deprecated, see #85 and #97.
+  See the new "--encryption repokey" mode for a replacement.
+
 New features:

 - borg create --chunker-params ... to configure the chunker, fixes #16
@ -17,12 +121,21 @@ New features:
 - borg create --compression 0..9 to select zlib compression level, fixes #66
  (attic #295).
 - borg init --encryption repokey (to store the encryption key into the repo),
-  deprecate --encryption passphrase, fixes #85
+  fixes #85
 - improve at-end error logging, always log exceptions and set exit_code=1
 - LoggedIO: better error checks / exceptions / exception handling
+- implement --remote-path to allow non-default-path borg locations, #125
+- implement --umask M and use 077 as default umask for better security, #117
+- borg check: give a named single archive to it, fixes #139
+- cache sync: show progress indication
+- cache sync: reimplement the chunk index merging in C

 Bug fixes:

+- fix segfault that happened for unreadable files (chunker: n needs to be a
+  signed size_t), #116
+- fix the repair mode, #144
+- repo delete: add destroy to allowed rpc methods, fixes issue #114
 - more compatible repository locking code (based on mkdir), maybe fixes #92
  (attic #317, attic #201).
 - better Exception msg if no Borg is installed on the remote repo server, #56
@ -30,10 +143,12 @@ Bug fixes:
  fixes attic #326.
 - fix Traceback when running check --repair, attic #232
 - clarify help text, fixes #73.
+- add help string for --no-files-cache, fixes #140

 Other changes:

 - improved docs:
+
  - added docs/misc directory for misc. writeups that won't be included
    "as is" into the html docs.
  - document environment variables and return codes (attic #324, attic #52)
@ -44,14 +159,25 @@ Other changes:
  - add FAQ entries about redundancy / integrity
  - clarify that borg extract uses the cwd as extraction target
  - update internals doc about chunker params, memory usage and compression
+  - added docs about development
+  - add some words about resource usage in general
+  - document how to backup a raw disk
+  - add note about how to run borg from virtual env
+  - add solutions for (ll)fuse installation problems
+  - document what borg check does, fixes #138
+  - reorganize borgbackup.github.io sidebar, prev/next at top
+  - deduplicate and refactor the docs / README.rst

 - use borg-tmp as prefix for temporary files / directories
 - short prune options without "keep-" are deprecated, do not suggest them
- improved tox configuration, documented there how to invoke it
+- improved tox configuration
 - remove usage of unittest.mock, always use mock from pypi
 - use entrypoints instead of scripts, for better use of the wheel format and
  modern installs
-    
+- add requirements.d/development.txt and modify tox.ini
+- use travis-ci for testing based on Linux and (new) OS X
+- use coverage.py, pytest-cov and codecov.io for test coverage support
+
 I forgot to list some stuff already implemented in 0.23.0, here they are:

 New features:
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1,4 +1,4 @@
-include README.rst AUTHORS LICENSE CHANGES MANIFEST.in versioneer.py
+include README.rst AUTHORS LICENSE CHANGES.rst MANIFEST.in versioneer.py
 recursive-include borg *.pyx
 recursive-include docs *
 recursive-exclude docs *.pyc
--- a/README.rst
+++ b/README.rst
@ -1,13 +1,112 @@
-|build|
+What is BorgBackup?
+-------------------
+BorgBackup (short: Borg) is a deduplicating backup program.
+Optionally, it supports compression and authenticated encryption.

-What is Borg?
-------------
-Borg is a deduplicating backup program. The main goal of Borg is to provide
-an efficient and secure way to backup data. The data deduplication
-technique used makes Borg suitable for daily backups since only changes
-are stored.
+The main goal of Borg is to provide an efficient and secure way to backup data.
+The data deduplication technique used makes Borg suitable for daily backups
+since only changes are stored.
+The authenticated encryption technique makes it suitable for backups to not
+fully trusted targets.

-Borg is a fork of Attic and maintained by "The Borg Collective" (see AUTHORS file).
+`Borg Installation docs <http://borgbackup.github.io/borgbackup/installation.html>`_
+
+
+Main features
+~~~~~~~~~~~~~
+**Space efficient storage**
+  Deduplication based on content-defined chunking is used to reduce the number
+  of bytes stored: each file is split into a number of variable length chunks
+  and only chunks that have never been seen before are added to the repository.
+
+  To deduplicate, all the chunks in the same repository are considered, no
+  matter whether they come from different machines, from previous backups,
+  from the same backup or even from the same single file.
+
+  Compared to other deduplication approaches, this method does NOT depend on:
+
+  * file/directory names staying the same
+
+    So you can move your stuff around without killing the deduplication,
+    even between machines sharing a repo.
+
+  * complete files or time stamps staying the same
+
+    If a big file changes a little, only a few new chunks will be stored -
+    this is great for VMs or raw disks.
+
+  * the absolute position of a data chunk inside a file
+
+    Stuff may get shifted and will still be found by the deduplication
+    algorithm.
+
+**Speed**
+  * performance critical code (chunking, compression, encryption) is
+    implemented in C/Cython
+  * local caching of files/chunks index data
+  * quick detection of unmodified files
+
+**Data encryption**
+    All data can be protected using 256-bit AES encryption, data integrity and
+    authenticity is verified using HMAC-SHA256.
+
+**Compression**
+    All data can be compressed by lz4 (super fast, low compression), zlib
+    (medium speed and compression) or lzma (low speed, high compression).
+
+**Off-site backups**
+    Borg can store data on any remote host accessible over SSH.  If Borg is
+    installed on the remote host, big performance gains can be achieved
+    compared to using a network filesystem (sshfs, nfs, ...).
+
+**Backups mountable as filesystems**
+    Backup archives are mountable as userspace filesystems for easy interactive
+    backup examination and restores (e.g. by using a regular file manager).
+
+**Platforms Borg works on**
+  * Linux
+  * FreeBSD
+  * Mac OS X
+  * Cygwin (unsupported)
+
+**Free and Open Source Software**
+  * security and functionality can be audited independently
+  * licensed under the BSD (3-clause) license
+
+
+Easy to use
+~~~~~~~~~~~
+Initialize a new backup repository and create a backup archive::
+
+    $ borg init /mnt/backup
+    $ borg create /mnt/backup::Monday ~/Documents
+
+Now doing another backup, just to show off the great deduplication::
+
+    $ borg create --stats /mnt/backup::Tuesday ~/Documents
+
+    Archive name: Tuesday
+    Archive fingerprint: 387a5e3f9b0e792e91c...
+    Start time: Tue Mar 25 12:00:10 2014
+    End time:   Tue Mar 25 12:00:10 2014
+    Duration: 0.08 seconds
+    Number of files: 358
+                      Original size    Compressed size    Deduplicated size
+    This archive:          57.16 MB           46.78 MB            151.67 kB  <--- !
+    All archives:         114.02 MB           93.46 MB             44.81 MB
+
+For a graphical frontend refer to our complementary project
+`BorgWeb <https://github.com/borgbackup/borgweb>`_.
+
+
+Notes
+-----
+
+Borg is a fork of `Attic <https://github.com/jborg/attic>`_ and maintained by
+"`The Borg Collective <https://github.com/borgbackup/borg/blob/master/AUTHORS>`_".
+
+Read `issue #1 <https://github.com/borgbackup/borg/issues/1>`_ about the initial
+considerations regarding project goals and policy of the Borg project.

 BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC.
 EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY WHEN MAJOR RELEASE NUMBER
@ -17,61 +116,15 @@ NOT RELEASED DEVELOPMENT VERSIONS HAVE UNKNOWN COMPATIBILITY PROPERTIES.

 THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS.

-Read issue #1 on the issue tracker, goals are being defined there.
+For more information, please also see the
+`LICENSE  <https://github.com/borgbackup/borg/blob/master/LICENSE>`_.

-Please also see the LICENSE for more informations.
-
-Easy to use
-~~~~~~~~~~~
-Initialize backup repository and create a backup archive::
-
-    $ borg init /mnt/backup
-    $ borg create -v /mnt/backup::documents ~/Documents
-
-Main features
-~~~~~~~~~~~~~
-Space efficient storage
-  Variable block size deduplication is used to reduce the number of bytes 
-  stored by detecting redundant data. Each file is split into a number of
-  variable length chunks and only chunks that have never been seen before are
-  compressed and added to the repository.
-
-Optional data encryption
-    All data can be protected using 256-bit AES encryption and data integrity
-    and authenticity is verified using HMAC-SHA256.
-
-Off-site backups
-    Borg can store data on any remote host accessible over SSH.  This is
-    most efficient if Borg is also installed on the remote host.
-
-Backups mountable as filesystems
-    Backup archives are mountable as userspace filesystems for easy backup
-    verification and restores.
-
-What do I need?
---------------
-Borg requires Python 3.2 or above to work.
-Borg also requires a sufficiently recent OpenSSL (>= 1.0.0).
-In order to mount archives as filesystems, llfuse is required.
-
-How do I install it?
--------------------
-::
-
-  $ pip3 install borgbackup
-
-Where are the docs?
-------------------
-Go to https://borgbackup.github.io/ for a prebuilt version of the documentation.
-You can also build it yourself from the docs folder.
-
-Where are the tests?
--------------------
-The tests are in the borg/testsuite package. To run the test suite use the
-following command::
-
-  $ fakeroot -u tox  # you need to have tox and pytest installed
+|build| |coverage|

 .. |build| image:: https://travis-ci.org/borgbackup/borg.svg
        :alt: Build Status
        :target: https://travis-ci.org/borgbackup/borg
+
+.. |coverage| image:: http://codecov.io/github/borgbackup/borg/coverage.svg?branch=master
+        :alt: Test Coverage
+        :target: http://codecov.io/github/borgbackup/borg?branch=master
--- a/borg/init.py
+++ b/borg/init.py
@ -1,5 +1,3 @@
 # This is a python package

-from ._version import get_versions
-__version__ = get_versions()['version']
-del get_versions
+from ._version import version as __version__
--- a/borg/main.py
+++ b/borg/main.py
@ -0,0 +1,3 @@
+from borg.archiver import main
+main()
+
--- a/borg/_chunker.c
+++ b/borg/_chunker.c
@ -83,7 +83,8 @@ typedef struct {
    PyObject *fd;
    int fh;
    int done, eof;
-    size_t remaining, bytes_read, bytes_yielded, position, last;
+    size_t remaining, position, last;
+    off_t bytes_read, bytes_yielded;
 } Chunker;

 static Chunker *
@ -96,6 +97,7 @@ chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32
    c->table = buzhash_init_table(seed);
    c->buf_size = max_size;
    c->data = malloc(c->buf_size);
+    c->fh = -1;
    return c;
 }

@ -127,7 +129,8 @@ chunker_free(Chunker *c)
 static int
 chunker_fill(Chunker *c)
 {
-    size_t n;
+    ssize_t n;
+    off_t offset, length;
    PyObject *data;
    memmove(c->data, c->data + c->last, c->position + c->remaining - c->last);
    c->position -= c->last;
@ -137,6 +140,7 @@ chunker_fill(Chunker *c)
        return 1;
    }
    if(c->fh >= 0) {
+        offset = c->bytes_read;
        // if we have a os-level file descriptor, use os-level API
        n = read(c->fh, c->data + c->position + c->remaining, n);
        if(n > 0) {
@ -151,13 +155,16 @@ chunker_fill(Chunker *c)
            // some error happened
            return 0;
        }
+        length = c->bytes_read - offset;
        #if ( _XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L )
-        // We tell the OS that we do not need the data of this file any more
-        // that it maybe has in the cache. This avoids that we spoil the
+        // We tell the OS that we do not need the data that we just have read any
+        // more (that it maybe has in the cache). This avoids that we spoil the
        // complete cache with data that we only read once and (due to cache
        // size limit) kick out data from the cache that might be still useful
        // for the OS or other processes.
-        posix_fadvise(c->fh, (off_t) 0, (off_t) 0, POSIX_FADV_DONTNEED);
+        if (length > 0) {
+            posix_fadvise(c->fh, offset, length, POSIX_FADV_DONTNEED);
+        }
        #endif
    }
    else {
--- a/borg/_hashindex.c
+++ b/borg/_hashindex.c
@ -145,10 +145,12 @@ hashindex_read(const char *path)
    bytes_read = fread(&header, 1, sizeof(HashHeader), fd);
    if(bytes_read != sizeof(HashHeader)) {
        if(ferror(fd)) {
-            EPRINTF_PATH(path, "fread header failed (expected %ld, got %ld)", sizeof(HashHeader), bytes_read);
+            EPRINTF_PATH(path, "fread header failed (expected %ju, got %ju)",
+                         (uintmax_t) sizeof(HashHeader), (uintmax_t) bytes_read);
        }
        else {
-            EPRINTF_MSG_PATH(path, "fread header failed (expected %ld, got %ld)", sizeof(HashHeader), bytes_read);
+            EPRINTF_MSG_PATH(path, "fread header failed (expected %ju, got %ju)",
+                             (uintmax_t) sizeof(HashHeader), (uintmax_t) bytes_read);
        }
        goto fail;
    }
@ -170,7 +172,8 @@ hashindex_read(const char *path)
    }
    buckets_length = (off_t)_le32toh(header.num_buckets) * (header.key_size + header.value_size);
    if(length != sizeof(HashHeader) + buckets_length) {
-        EPRINTF_MSG_PATH(path, "Incorrect file length (expected %ld, got %ld)", sizeof(HashHeader) + buckets_length, length);
+        EPRINTF_MSG_PATH(path, "Incorrect file length (expected %ju, got %ju)",
+                         (uintmax_t) sizeof(HashHeader) + buckets_length, (uintmax_t) length);
        goto fail;
    }
    if(!(index = malloc(sizeof(HashIndex)))) {
@ -186,10 +189,12 @@ hashindex_read(const char *path)
    bytes_read = fread(index->buckets, 1, buckets_length, fd);
    if(bytes_read != buckets_length) {
        if(ferror(fd)) {
-            EPRINTF_PATH(path, "fread buckets failed (expected %ld, got %ld)", buckets_length, bytes_read);
+            EPRINTF_PATH(path, "fread buckets failed (expected %ju, got %ju)",
+                         (uintmax_t) buckets_length, (uintmax_t) bytes_read);
        }
        else {
-            EPRINTF_MSG_PATH(path, "fread buckets failed (expected %ld, got %ld)", buckets_length, bytes_read);
+            EPRINTF_MSG_PATH(path, "fread buckets failed (expected %ju, got %ju)",
+                             (uintmax_t) buckets_length, (uintmax_t) bytes_read);
        }
        free(index->buckets);
        free(index);
@ -375,8 +380,8 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs
        chunks += values[0];
        unique_size += values[1];
        unique_csize += values[2];
-        size += values[0] * values[1];
-        csize += values[0] * values[2];
+        size += (int64_t) values[0] * values[1];
+        csize += (int64_t) values[0] * values[2];
    }
    *total_size = size;
    *total_csize = csize;
@ -385,3 +390,22 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs
    *total_unique_chunks = unique_chunks;
    *total_chunks = chunks;
 }
+
+static void
+hashindex_merge(HashIndex *index, HashIndex *other)
+{
+    int32_t key_size = index->key_size;
+    const int32_t *other_values;
+    int32_t *my_values;
+    void *key = NULL;
+
+    while((key = hashindex_next_key(other, key))) {
+        other_values = key + key_size;
+        my_values = (int32_t *)hashindex_get(index, key);
+        if(my_values == NULL) {
+            hashindex_set(index, key, other_values);
+        } else {
+            *my_values += *other_values;
+        }
+    }
+}
--- a/borg/_version.py
+++ b/borg/_version.py
@ -1,239 +0,0 @@
-
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (built by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain. Generated by
-# versioneer-0.14 (https://github.com/warner/python-versioneer)
-
-import errno
-import os
-import re
-import subprocess
-import sys
-
-# these strings will be replaced by git during git-archive
-git_refnames = "$Format:%d$"
-git_full = "$Format:%H$"
-
-# these strings are filled in when 'setup.py versioneer' creates _version.py
-tag_prefix = ""
-parentdir_prefix = "borgbackup-"
-versionfile_source = "borg/_version.py"
-
-
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
-    assert isinstance(commands, list)
-    p = None
-    for c in commands:
-        try:
-            # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
-                                 stderr=(subprocess.PIPE if hide_stderr
-                                         else None))
-            break
-        except EnvironmentError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %s" % args[0])
-                print(e)
-            return None
-    else:
-        if verbose:
-            print("unable to find command, tried %s" % (commands,))
-        return None
-    stdout = p.communicate()[0].strip()
-    if sys.version_info[0] >= 3:
-        stdout = stdout.decode()
-    if p.returncode != 0:
-        if verbose:
-            print("unable to run %s (error)" % args[0])
-        return None
-    return stdout
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose=False):
-    # Source tarballs conventionally unpack into a directory that includes
-    # both the project name and a version string.
-    dirname = os.path.basename(root)
-    if not dirname.startswith(parentdir_prefix):
-        if verbose:
-            print("guessing rootdir is '%s', but '%s' doesn't start with "
-                  "prefix '%s'" % (root, dirname, parentdir_prefix))
-        return None
-    return {"version": dirname[len(parentdir_prefix):], "full": ""}
-
-
-def git_get_keywords(versionfile_abs):
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        f = open(versionfile_abs, "r")
-        for line in f.readlines():
-            if line.strip().startswith("git_refnames ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["refnames"] = mo.group(1)
-            if line.strip().startswith("git_full ="):
-                mo = re.search(r'=\s*"(.*)"', line)
-                if mo:
-                    keywords["full"] = mo.group(1)
-        f.close()
-    except EnvironmentError:
-        pass
-    return keywords
-
-
-def git_versions_from_keywords(keywords, tag_prefix, verbose=False):
-    if not keywords:
-        return {}  # keyword-finding function failed to find keywords
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        return {}  # unexpanded, so not in an unpacked git-archive tarball
-    refs = set([r.strip() for r in refnames.strip("()").split(",")])
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
-        if verbose:
-            print("discarding '%s', no digits" % ",".join(refs-tags))
-    if verbose:
-        print("likely tags: %s" % ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix):]
-            if verbose:
-                print("picking %s" % r)
-            return {"version": r,
-                    "full": keywords["full"].strip()}
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {"version": "0+unknown",
-            "full": keywords["full"].strip()}
-
-
-def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False):
-    # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens.
-
-    # dirty
-    dirty = git_describe.endswith("-dirty")
-    if dirty:
-        git_describe = git_describe[:git_describe.rindex("-dirty")]
-    dirty_suffix = ".dirty" if dirty else ""
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" not in git_describe:  # just HEX
-        return "0+untagged.g"+git_describe+dirty_suffix, dirty
-
-    # just TAG-NUM-gHEX
-    mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
-    if not mo:
-        # unparseable. Maybe git-describe is misbehaving?
-        return "0+unparseable"+dirty_suffix, dirty
-
-    # tag
-    full_tag = mo.group(1)
-    if not full_tag.startswith(tag_prefix):
-        if verbose:
-            fmt = "tag '%s' doesn't start with prefix '%s'"
-            print(fmt % (full_tag, tag_prefix))
-        return None, dirty
-    tag = full_tag[len(tag_prefix):]
-
-    # distance: number of commits since tag
-    distance = int(mo.group(2))
-
-    # commit: short hex revision ID
-    commit = mo.group(3)
-
-    # now build up version string, with post-release "local version
-    # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a
-    # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you
-    # can always test version.endswith(".dirty").
-    version = tag
-    if distance or dirty:
-        version += "+%d.g%s" % (distance, commit) + dirty_suffix
-
-    return version, dirty
-
-
-def git_versions_from_vcs(tag_prefix, root, verbose=False):
-    # this runs 'git' from the root of the source tree. This only gets called
-    # if the git-archive 'subst' keywords were *not* expanded, and
-    # _version.py hasn't already been rewritten with a short version string,
-    # meaning we're inside a checked out source tree.
-
-    if not os.path.exists(os.path.join(root, ".git")):
-        if verbose:
-            print("no .git in %s" % root)
-        return {}  # get_versions() will try next method
-
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-    # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
-    # if there are no tags, this yields HEX[-dirty] (no NUM)
-    stdout = run_command(GITS, ["describe", "--tags", "--dirty",
-                                "--always", "--long"],
-                         cwd=root)
-    # --long was added in git-1.5.5
-    if stdout is None:
-        return {}  # try next method
-    version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose)
-
-    # build "full", which is FULLHEX[.dirty]
-    stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if stdout is None:
-        return {}
-    full = stdout.strip()
-    if dirty:
-        full += ".dirty"
-
-    return {"version": version, "full": full}
-
-
-def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False):
-    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
-    # __file__, we can work backwards from there to the root. Some
-    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
-    # case we can only use expanded keywords.
-
-    keywords = {"refnames": git_refnames, "full": git_full}
-    ver = git_versions_from_keywords(keywords, tag_prefix, verbose)
-    if ver:
-        return ver
-
-    try:
-        root = os.path.realpath(__file__)
-        # versionfile_source is the relative path from the top of the source
-        # tree (where the .git directory might live) to this file. Invert
-        # this to find the root from __file__.
-        for i in versionfile_source.split('/'):
-            root = os.path.dirname(root)
-    except NameError:
-        return default
-
-    return (git_versions_from_vcs(tag_prefix, root, verbose)
-            or versions_from_parentdir(parentdir_prefix, root, verbose)
-            or default)
--- a/borg/archive.py
+++ b/borg/archive.py
@ -323,14 +323,17 @@ class Archive:
            raise Exception('Unknown archive item type %r' % item[b'mode'])

    def restore_attrs(self, path, item, symlink=False, fd=None):
-        xattrs = item.get(b'xattrs')
-        if xattrs:
-                for k, v in xattrs.items():
-                    try:
-                        xattr.setxattr(fd or path, k, v, follow_symlinks=False)
-                    except OSError as e:
-                        if e.errno != errno.ENOTSUP:
-                            raise
+        xattrs = item.get(b'xattrs', {})
+        for k, v in xattrs.items():
+            try:
+                xattr.setxattr(fd or path, k, v, follow_symlinks=False)
+            except OSError as e:
+                if e.errno not in (errno.ENOTSUP, errno.EACCES, ):
+                    # only raise if the errno is not on our ignore list:
+                    # ENOTSUP == xattrs not supported here
+                    # EACCES == permission denied to set this specific xattr
+                    #           (this may happen related to security.* keys)
+                    raise
        uid = gid = None
        if not self.numeric_owner:
            uid = user2uid(item[b'user'])
@ -452,6 +455,7 @@ class Archive:
            b'mtime': int_to_bigint(int(time.time()) * 1000000000)
        }
        self.add_item(item)
+        return 'i'  # stdin

    def process_file(self, path, st, cache):
        status = None
@ -609,8 +613,9 @@ class ArchiveChecker:
        self.error_found = False
        self.possibly_superseded = set()

-    def check(self, repository, repair=False, last=None):
+    def check(self, repository, repair=False, archive=None, last=None):
        self.report_progress('Starting archive consistency check...')
+        self.check_all = archive is None and last is None
        self.repair = repair
        self.repository = repository
        self.init_chunks()
@ -619,11 +624,9 @@ class ArchiveChecker:
            self.manifest = self.rebuild_manifest()
        else:
            self.manifest, _ = Manifest.load(repository, key=self.key)
-        self.rebuild_refcounts(last=last)
-        if last is None:
-            self.verify_chunks()
-        else:
-            self.report_progress('Orphaned objects check skipped (needs all archives checked)')
+        self.rebuild_refcounts(archive=archive, last=last)
+        self.orphan_chunks_check()
+        self.finish()
        if not self.error_found:
            self.report_progress('Archive consistency check complete, no problems found.')
        return self.repair or not self.error_found
@ -631,7 +634,7 @@ class ArchiveChecker:
    def init_chunks(self):
        """Fetch a list of all object keys from repository
        """
-        # Explicity set the initial hash table capacity to avoid performance issues
+        # Explicitly set the initial hash table capacity to avoid performance issues
        # due to hash table "resonance"
        capacity = int(len(self.repository) * 1.2)
        self.chunks = ChunkIndex(capacity)
@ -680,7 +683,7 @@ class ArchiveChecker:
        self.report_progress('Manifest rebuild complete', error=True)
        return manifest

-    def rebuild_refcounts(self, last=None):
+    def rebuild_refcounts(self, archive=None, last=None):
        """Rebuild object reference counts by walking the metadata

        Missing and/or incorrect data is repaired when detected
@ -762,10 +765,17 @@ class ArchiveChecker:
                        yield item

        repository = cache_if_remote(self.repository)
-        num_archives = len(self.manifest.archives)
-        archive_items = sorted(self.manifest.archives.items(), reverse=True,
-                               key=lambda name_info: name_info[1][b'time'])
-        end = None if last is None else min(num_archives, last)
+        if archive is None:
+            # we need last N or all archives
+            archive_items = sorted(self.manifest.archives.items(), reverse=True,
+                                   key=lambda name_info: name_info[1][b'time'])
+            num_archives = len(self.manifest.archives)
+            end = None if last is None else min(num_archives, last)
+        else:
+            # we only want one specific archive
+            archive_items = [item for item in self.manifest.archives.items() if item[0] == archive]
+            num_archives = 1
+            end = 1
        for i, (name, info) in enumerate(archive_items[:end]):
            self.report_progress('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives))
            archive_id = info[b'id']
@ -796,16 +806,22 @@ class ArchiveChecker:
            add_reference(new_archive_id, len(data), len(cdata), cdata)
            info[b'id'] = new_archive_id

-    def verify_chunks(self):
-        unused = set()
-        for id_, (count, size, csize) in self.chunks.iteritems():
-            if count == 0:
-                unused.add(id_)
-        orphaned = unused - self.possibly_superseded
-        if orphaned:
-            self.report_progress('{} orphaned objects found'.format(len(orphaned)), error=True)
+    def orphan_chunks_check(self):
+        if self.check_all:
+            unused = set()
+            for id_, (count, size, csize) in self.chunks.iteritems():
+                if count == 0:
+                    unused.add(id_)
+            orphaned = unused - self.possibly_superseded
+            if orphaned:
+                self.report_progress('{} orphaned objects found'.format(len(orphaned)), error=True)
+            if self.repair:
+                for id_ in unused:
+                    self.repository.delete(id_)
+        else:
+            self.report_progress('Orphaned objects check skipped (needs all archives checked)')
+
+    def finish(self):
        if self.repair:
-            for id_ in unused:
-                self.repository.delete(id_)
            self.manifest.write()
            self.repository.commit()
--- a/borg/archiver.py
+++ b/borg/archiver.py
@ -14,6 +14,7 @@ import traceback

 from . import __version__
 from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS
+from .compress import Compressor, COMPR_BUFFER
 from .repository import Repository
 from .cache import Cache
 from .key import key_creator
@ -21,9 +22,11 @@ from .helpers import Error, location_validator, format_time, format_file_size, \
    format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \
    get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
    Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
-    is_cachedir, bigint_to_int, ChunkerParams
+    is_cachedir, bigint_to_int, ChunkerParams, CompressionSpec
 from .remote import RepositoryServer, RemoteRepository

+has_lchflags = hasattr(os, 'lchflags')
+

 class Archiver:

@ -85,8 +88,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                print('Repository check complete, no problems found.')
            else:
                return 1
-        if not args.repo_only and not ArchiveChecker().check(repository, repair=args.repair, last=args.last):
-                return 1
+        if not args.repo_only and not ArchiveChecker().check(
+                repository, repair=args.repair, archive=args.repository.archive, last=args.last):
+            return 1
        return 0

    def do_change_passphrase(self, args):
@ -98,15 +102,21 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")

    def do_create(self, args):
        """Create new archive"""
+        dry_run = args.dry_run
        t0 = datetime.now()
-        repository = self.open_repository(args.archive, exclusive=True)
-        manifest, key = Manifest.load(repository)
-        key.compression_level = args.compression
-        cache = Cache(repository, key, manifest, do_files=args.cache_files)
-        archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
-                          create=True, checkpoint_interval=args.checkpoint_interval,
-                          numeric_owner=args.numeric_owner, progress=args.progress,
-                          chunker_params=args.chunker_params)
+        if not dry_run:
+            repository = self.open_repository(args.archive, exclusive=True)
+            manifest, key = Manifest.load(repository)
+            compr_args = dict(buffer=COMPR_BUFFER)
+            compr_args.update(args.compression)
+            key.compressor = Compressor(**compr_args)
+            cache = Cache(repository, key, manifest, do_files=args.cache_files)
+            archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
+                              create=True, checkpoint_interval=args.checkpoint_interval,
+                              numeric_owner=args.numeric_owner, progress=args.progress,
+                              chunker_params=args.chunker_params)
+        else:
+            archive = cache = None
        # Add cache dir to inode_skip list
        skip_inodes = set()
        try:
@ -124,11 +134,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
        for path in args.paths:
            if path == '-':  # stdin
                path = 'stdin'
-                self.print_verbose(path)
-                try:
-                    archive.process_stdin(path, cache)
-                except IOError as e:
-                    self.print_error('%s: %s', path, e)
+                if not dry_run:
+                    try:
+                        status = archive.process_stdin(path, cache)
+                    except IOError as e:
+                        self.print_error('%s: %s', path, e)
+                else:
+                    status = '-'
+                self.print_verbose("%1s %s", status, path)
                continue
            path = os.path.normpath(path)
            if args.dontcross:
@ -139,25 +152,28 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                    continue
            else:
                restrict_dev = None
-            self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev)
-        archive.save(timestamp=args.timestamp)
-        if args.progress:
-            archive.stats.show_progress(final=True)
-        if args.stats:
-            t = datetime.now()
-            diff = t - t0
-            print('-' * 78)
-            print('Archive name: %s' % args.archive.archive)
-            print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii'))
-            print('Start time: %s' % t0.strftime('%c'))
-            print('End time: %s' % t.strftime('%c'))
-            print('Duration: %s' % format_timedelta(diff))
-            print('Number of files: %d' % archive.stats.nfiles)
-            archive.stats.print_('This archive:', cache)
-            print('-' * 78)
+            self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev,
+                          read_special=args.read_special, dry_run=dry_run)
+        if not dry_run:
+            archive.save(timestamp=args.timestamp)
+            if args.progress:
+                archive.stats.show_progress(final=True)
+            if args.stats:
+                t = datetime.now()
+                diff = t - t0
+                print('-' * 78)
+                print('Archive name: %s' % args.archive.archive)
+                print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii'))
+                print('Start time: %s' % t0.strftime('%c'))
+                print('End time: %s' % t.strftime('%c'))
+                print('Duration: %s' % format_timedelta(diff))
+                print('Number of files: %d' % archive.stats.nfiles)
+                archive.stats.print_('This archive:', cache)
+                print('-' * 78)
        return self.exit_code

-    def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev):
+    def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev,
+                 read_special=False, dry_run=False):
        if exclude_path(path, excludes):
            return
        try:
@ -171,15 +187,21 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
        if restrict_dev and st.st_dev != restrict_dev:
            return
        status = None
-        if stat.S_ISREG(st.st_mode):
-            try:
-                status = archive.process_file(path, st, cache)
-            except IOError as e:
-                self.print_error('%s: %s', path, e)
+        # Ignore if nodump flag is set
+        if has_lchflags and (st.st_flags & stat.UF_NODUMP):
+            return
+        if (stat.S_ISREG(st.st_mode) or
+            read_special and not stat.S_ISDIR(st.st_mode)):
+            if not dry_run:
+                try:
+                    status = archive.process_file(path, st, cache)
+                except IOError as e:
+                    self.print_error('%s: %s', path, e)
        elif stat.S_ISDIR(st.st_mode):
            if exclude_caches and is_cachedir(path):
                return
-            status = archive.process_dir(path, st)
+            if not dry_run:
+                status = archive.process_dir(path, st)
            try:
                entries = os.listdir(path)
            except OSError as e:
@ -188,13 +210,17 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                for filename in sorted(entries):
                    entry_path = os.path.normpath(os.path.join(path, filename))
                    self._process(archive, cache, excludes, exclude_caches, skip_inodes,
-                                  entry_path, restrict_dev)
+                                  entry_path, restrict_dev, read_special=read_special,
+                                  dry_run=dry_run)
        elif stat.S_ISLNK(st.st_mode):
-            status = archive.process_symlink(path, st)
+            if not dry_run:
+                status = archive.process_symlink(path, st)
        elif stat.S_ISFIFO(st.st_mode):
-            status = archive.process_fifo(path, st)
+            if not dry_run:
+                status = archive.process_fifo(path, st)
        elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
-            status = archive.process_dev(path, st)
+            if not dry_run:
+                status = archive.process_dev(path, st)
        elif stat.S_ISSOCK(st.st_mode):
            # Ignore unix sockets
            return
@ -210,7 +236,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
        # Note: A/M/U is relative to the "files" cache, not to the repo.
        # This would be an issue if the files cache is not used.
        if status is None:
-            status = '?'  # need to add a status code somewhere
+            if not dry_run:
+                status = '?'  # need to add a status code somewhere
+            else:
+                status = '-'  # dry run, item was not backed up
        # output ALL the stuff - it can be easily filtered using grep.
        # even stuff considered unchanged might be interesting.
        self.print_verbose("%1s %s", status, remove_surrogates(path))
@ -220,7 +249,6 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
        # be restrictive when restoring files, restore permissions later
        if sys.getfilesystemencoding() == 'ascii':
            print('Warning: File system encoding is "ascii", extracting non-ascii filenames will not be supported.')
-        os.umask(0o077)
        repository = self.open_repository(args.archive)
        manifest, key = Manifest.load(repository)
        archive = Archive(repository, key, manifest, args.archive.archive,
@ -288,11 +316,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
            print("You requested to completely DELETE the repository *including* all archives it contains:")
            for archive_info in manifest.list_archive_infos(sort_by='ts'):
                print(format_archive(archive_info))
-            print("""Type "YES" if you understand this and want to continue.\n""")
-            if input('Do you want to continue? ') == 'YES':
-                repository.destroy()
-                cache.destroy()
-                print("Repository and corresponding cache were deleted.")
+            if not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
+                print("""Type "YES" if you understand this and want to continue.\n""")
+                if input('Do you want to continue? ') != 'YES':
+                    self.exit_code = 1
+                    return self.exit_code
+            repository.destroy()
+            cache.destroy()
+            print("Repository and corresponding cache were deleted.")
        return self.exit_code

    def do_mount(self, args):
@ -327,34 +358,38 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
        repository = self.open_repository(args.src)
        manifest, key = Manifest.load(repository)
        if args.src.archive:
-            tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 0o10: '-', 0o12: 'l', 0o14: 's'}
            archive = Archive(repository, key, manifest, args.src.archive)
-            for item in archive.iter_items():
-                type = tmap.get(item[b'mode'] // 4096, '?')
-                mode = format_file_mode(item[b'mode'])
-                size = 0
-                if type == '-':
+            if args.short:
+                for item in archive.iter_items():
+                    print(remove_surrogates(item[b'path']))
+            else:
+                tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 0o10: '-', 0o12: 'l', 0o14: 's'}
+                for item in archive.iter_items():
+                    type = tmap.get(item[b'mode'] // 4096, '?')
+                    mode = format_file_mode(item[b'mode'])
+                    size = 0
+                    if type == '-':
+                        try:
+                            size = sum(size for _, size, _ in item[b'chunks'])
+                        except KeyError:
+                            pass
                    try:
-                        size = sum(size for _, size, _ in item[b'chunks'])
-                    except KeyError:
-                        pass
-                try:
-                    mtime = datetime.fromtimestamp(bigint_to_int(item[b'mtime']) / 1e9)
-                except ValueError:
-                    # likely a broken mtime and datetime did not want to go beyond year 9999
-                    mtime = datetime(9999, 12, 31, 23, 59, 59)
-                if b'source' in item:
-                    if type == 'l':
-                        extra = ' -> %s' % item[b'source']
+                        mtime = datetime.fromtimestamp(bigint_to_int(item[b'mtime']) / 1e9)
+                    except ValueError:
+                        # likely a broken mtime and datetime did not want to go beyond year 9999
+                        mtime = datetime(9999, 12, 31, 23, 59, 59)
+                    if b'source' in item:
+                        if type == 'l':
+                            extra = ' -> %s' % item[b'source']
+                        else:
+                            type = 'h'
+                            extra = ' link to %s' % item[b'source']
                    else:
-                        type = 'h'
-                        extra = ' link to %s' % item[b'source']
-                else:
-                    extra = ''
-                print('%s%s %-6s %-6s %8d %s %s%s' % (
-                    type, mode, item[b'user'] or item[b'uid'],
-                    item[b'group'] or item[b'gid'], size, format_time(mtime),
-                    remove_surrogates(item[b'path']), extra))
+                        extra = ''
+                    print('%s%s %-6s %-6s %8d %s %s%s' % (
+                        type, mode, item[b'user'] or item[b'uid'],
+                        item[b'group'] or item[b'gid'], size, format_time(mtime),
+                        remove_surrogates(item[b'path']), extra))
        else:
            for archive_info in manifest.list_archive_infos(sort_by='ts'):
                print(format_archive(archive_info))
@ -508,7 +543,12 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
        common_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
                                   default=False,
                                   help='verbose output')
-        common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false')
+        common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false',
+                                   help='do not load/update the file metadata cache used to detect unchanged files')
+        common_parser.add_argument('--umask', dest='umask', type=lambda s: int(s, 8), default=0o077, metavar='M',
+                                   help='set umask to M (local and remote, default: 0o077)')
+        common_parser.add_argument('--remote-path', dest='remote_path', default='borg', metavar='PATH',
+                                   help='set remote path to executable (default: "borg")')

        # We can't use argparse for "serve" since we don't want it to show up in "Available commands"
        if args:
@ -530,36 +570,64 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
        This command initializes an empty repository. A repository is a filesystem
        directory containing the deduplicated data from zero or more archives.
        Encryption can be enabled at repository init time.
+        Please note that the 'passphrase' encryption mode is DEPRECATED (instead of it,
+        consider using 'repokey').
        """)
        subparser = subparsers.add_parser('init', parents=[common_parser],
                                          description=self.do_init.__doc__, epilog=init_epilog,
                                          formatter_class=argparse.RawDescriptionHelpFormatter)
        subparser.set_defaults(func=self.do_init)
-        subparser.add_argument('repository', metavar='REPOSITORY',
+        subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
                               type=location_validator(archive=False),
                               help='repository to create')
        subparser.add_argument('-e', '--encryption', dest='encryption',
-                               choices=('none', 'passphrase', 'keyfile', 'repokey'), default='none',
-                               help='select encryption method')
+                               choices=('none', 'keyfile', 'repokey', 'passphrase'), default='none',
+                               help='select encryption key mode')

        check_epilog = textwrap.dedent("""
-        The check command verifies the consistency of a repository and the corresponding
-        archives. The underlying repository data files are first checked to detect bit rot
-        and other types of damage. After that the consistency and correctness of the archive
-        metadata is verified.
+        The check command verifies the consistency of a repository and the corresponding archives.

-        The archive metadata checks can be time consuming and requires access to the key
-        file and/or passphrase if encryption is enabled. These checks can be skipped using
-        the --repository-only option.
+        First, the underlying repository data files are checked:
+
+        - For all segments the segment magic (header) is checked
+        - For all objects stored in the segments, all metadata (e.g. crc and size) and
+          all data is read. The read data is checked by size and CRC. Bit rot and other
+          types of accidental damage can be detected this way.
+        - If we are in repair mode and a integrity error is detected for a segment,
+          we try to recover as many objects from the segment as possible.
+        - In repair mode, it makes sure that the index is consistent with the data
+          stored in the segments.
+        - If you use a remote repo server via ssh:, the repo check is executed on the
+          repo server without causing significant network traffic.
+        - The repository check can be skipped using the --archives-only option.
+
+        Second, the consistency and correctness of the archive metadata is verified:
+
+        - Is the repo manifest present? If not, it is rebuilt from archive metadata
+          chunks (this requires reading and decrypting of all metadata and data).
+        - Check if archive metadata chunk is present. if not, remove archive from
+          manifest.
+        - For all files (items) in the archive, for all chunks referenced by these
+          files, check if chunk is present (if not and we are in repair mode, replace
+          it with a same-size chunk of zeros). This requires reading of archive and
+          file metadata, but not data.
+        - If we are in repair mode and we checked all the archives: delete orphaned
+          chunks from the repo.
+        - if you use a remote repo server via ssh:, the archive check is executed on
+          the client machine (because if encryption is enabled, the checks will require
+          decryption and this is always done client-side, because key access will be
+          required).
+        - The archive checks can be time consuming, they can be skipped using the
+          --repository-only option.
        """)
        subparser = subparsers.add_parser('check', parents=[common_parser],
                                          description=self.do_check.__doc__,
                                          epilog=check_epilog,
                                          formatter_class=argparse.RawDescriptionHelpFormatter)
        subparser.set_defaults(func=self.do_check)
-        subparser.add_argument('repository', metavar='REPOSITORY',
-                               type=location_validator(archive=False),
-                               help='repository to check consistency of')
+        subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
+                               type=location_validator(),
+                               help='repository or archive to check consistency of')
        subparser.add_argument('--repository-only', dest='repo_only', action='store_true',
                               default=False,
                               help='only perform repository checks')
@ -582,7 +650,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                          epilog=change_passphrase_epilog,
                                          formatter_class=argparse.RawDescriptionHelpFormatter)
        subparser.set_defaults(func=self.do_change_passphrase)
-        subparser.add_argument('repository', metavar='REPOSITORY',
+        subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
                               type=location_validator(archive=False))

        create_epilog = textwrap.dedent("""
@ -632,9 +700,20 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                               metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE',
                               help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS)
        subparser.add_argument('-C', '--compression', dest='compression',
-                               type=int, default=0, metavar='N',
-                               help='select compression algorithm and level. 0..9 is supported and means zlib '
-                                    'level 0 (no compression, fast, default) .. zlib level 9 (high compression, slow).')
+                               type=CompressionSpec, default=dict(name='none'), metavar='COMPRESSION',
+                               help='select compression algorithm (and level): '
+                                    'none == no compression (default), '
+                                    'lz4 == lz4, '
+                                    'zlib == zlib (default level 6), '
+                                    'zlib,0 .. zlib,9 == zlib (with level 0..9), '
+                                    'lzma == lzma (default level 6), '
+                                    'lzma,0 .. lzma,9 == lzma (with level 0..9).')
+        subparser.add_argument('--read-special', dest='read_special',
+                               action='store_true', default=False,
+                               help='open and read special files as if they were regular files')
+        subparser.add_argument('-n', '--dry-run', dest='dry_run',
+                               action='store_true', default=False,
+                               help='do not create a backup archive')
        subparser.add_argument('archive', metavar='ARCHIVE',
                               type=location_validator(archive=True),
                               help='archive to create')
@ -708,7 +787,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
        subparser.add_argument('-s', '--stats', dest='stats',
                               action='store_true', default=False,
                               help='print statistics for the deleted archive')
-        subparser.add_argument('target', metavar='TARGET',
+        subparser.add_argument('target', metavar='TARGET', nargs='?', default='',
                               type=location_validator(),
                               help='archive or repository to delete')

@ -720,7 +799,11 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                          epilog=list_epilog,
                                          formatter_class=argparse.RawDescriptionHelpFormatter)
        subparser.set_defaults(func=self.do_list)
-        subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', type=location_validator(),
+        subparser.add_argument('--short', dest='short',
+                               action='store_true', default=False,
+                               help='only print file/directory names, nothing else')
+        subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
+                               type=location_validator(),
                               help='repository/archive to list contents of')
        mount_epilog = textwrap.dedent("""
        This command mounts an archive as a FUSE filesystem. This can be useful for
@ -803,7 +886,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                               help='number of yearly archives to keep')
        subparser.add_argument('-p', '--prefix', dest='prefix', type=str,
                               help='only consider archive names starting with this prefix')
-        subparser.add_argument('repository', metavar='REPOSITORY',
+        subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='',
                               type=location_validator(archive=False),
                               help='repository to prune')

@ -819,11 +902,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")

        args = parser.parse_args(args or ['-h'])
        self.verbose = args.verbose
+        os.umask(args.umask)
+        RemoteRepository.remote_path = args.remote_path
+        RemoteRepository.umask = args.umask
        update_excludes(args)
        return args.func(args)


-def sig_info_handler(signum, stack):
+def sig_info_handler(signum, stack):  # pragma: no cover
    """search the stack for infos about the currently processed file and print them"""
    for frame in inspect.getouterframes(stack):
        func, loc = frame[3], frame[0].f_locals
@ -846,7 +932,7 @@ def sig_info_handler(signum, stack):
            break


-def setup_signal_handlers():
+def setup_signal_handlers():  # pragma: no cover
    sigs = []
    if hasattr(signal, 'SIGUSR1'):
        sigs.append(signal.SIGUSR1)  # kill -USR1 pid
@ -856,7 +942,7 @@ def setup_signal_handlers():
        signal.signal(sig, sig_info_handler)


-def main():
+def main():  # pragma: no cover
    # Make sure stdout and stderr have errors='replace') to avoid unicode
    # issues when print()-ing unicode file names
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, sys.stdout.encoding, 'replace', line_buffering=True)
--- a/borg/cache.py
+++ b/borg/cache.py
@ -3,6 +3,7 @@ from .remote import cache_if_remote
 import errno
 import msgpack
 import os
+import stat
 import sys
 from binascii import hexlify
 import shutil
@ -96,8 +97,7 @@ class Cache:
        with open(os.path.join(self.path, 'config'), 'w') as fd:
            config.write(fd)
        ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8'))
-        with open(os.path.join(self.path, 'chunks.archive'), 'wb') as fd:
-            pass  # empty file
+        os.makedirs(os.path.join(self.path, 'chunks.archive.d'))
        with open(os.path.join(self.path, 'files'), 'wb') as fd:
            pass  # empty file

@ -153,7 +153,6 @@ class Cache:
        os.mkdir(txn_dir)
        shutil.copy(os.path.join(self.path, 'config'), txn_dir)
        shutil.copy(os.path.join(self.path, 'chunks'), txn_dir)
-        shutil.copy(os.path.join(self.path, 'chunks.archive'), txn_dir)
        shutil.copy(os.path.join(self.path, 'files'), txn_dir)
        os.rename(os.path.join(self.path, 'txn.tmp'),
                  os.path.join(self.path, 'txn.active'))
@ -195,7 +194,6 @@ class Cache:
        if os.path.exists(txn_dir):
            shutil.copy(os.path.join(txn_dir, 'config'), self.path)
            shutil.copy(os.path.join(txn_dir, 'chunks'), self.path)
-            shutil.copy(os.path.join(txn_dir, 'chunks.archive'), self.path)
            shutil.copy(os.path.join(txn_dir, 'files'), self.path)
            os.rename(txn_dir, os.path.join(self.path, 'txn.tmp'))
            if os.path.exists(os.path.join(self.path, 'txn.tmp')):
@ -206,54 +204,31 @@ class Cache:
    def sync(self):
        """Re-synchronize chunks cache with repository.

-        If present, uses a compressed tar archive of known backup archive
-        indices, so it only needs to fetch infos from repo and build a chunk
-        index once per backup archive.
-        If out of sync, the tar gets rebuilt from known + fetched chunk infos,
-        so it has complete and current information about all backup archives.
-        Finally, it builds the master chunks index by merging all indices from
-        the tar.
-
-        Note: compression (esp. xz) is very effective in keeping the tar
-              relatively small compared to the files it contains.
+        Maintains a directory with known backup archive indexes, so it only
+        needs to fetch infos from repo and build a chunk index once per backup
+        archive.
+        If out of sync, missing archive indexes get added, outdated indexes
+        get removed and a new master chunks index is built by merging all
+        archive indexes.
        """
-        in_archive_path = os.path.join(self.path, 'chunks.archive')
-        out_archive_path = os.path.join(self.path, 'chunks.archive.tmp')
+        archive_path = os.path.join(self.path, 'chunks.archive.d')

-        def open_in_archive():
-            try:
-                tf = tarfile.open(in_archive_path, 'r')
-            except OSError as e:
-                if e.errno != errno.ENOENT:
-                    raise
-                # file not found
-                tf = None
-            except tarfile.ReadError:
-                # empty file?
-                tf = None
-            return tf
+        def mkpath(id, suffix=''):
+            id_hex = hexlify(id).decode('ascii')
+            path = os.path.join(archive_path, id_hex + suffix)
+            return path.encode('utf-8')

-        def open_out_archive():
-            for compression in ('xz', 'bz2', 'gz'):
-                # xz needs py 3.3, bz2 and gz also work on 3.2
-                try:
-                    tf = tarfile.open(out_archive_path, 'w:'+compression, format=tarfile.PAX_FORMAT)
-                    break
-                except tarfile.CompressionError:
-                    continue
-            else:  # shouldn't happen
-                tf = None
-            return tf
+        def cached_archives():
+            fns = os.listdir(archive_path)
+            # filenames with 64 hex digits == 256bit
+            return set(unhexlify(fn) for fn in fns if len(fn) == 64)

-        def close_archive(tf):
-            if tf:
-                tf.close()
+        def repo_archives():
+            return set(info[b'id'] for info in self.manifest.archives.values())

-        def delete_in_archive():
-            os.unlink(in_archive_path)
-
-        def rename_out_archive():
-            os.rename(out_archive_path, in_archive_path)
+        def cleanup_outdated(ids):
+            for id in ids:
+                os.unlink(mkpath(id))

        def add(chunk_idx, id, size, csize, incr=1):
            try:
@ -262,16 +237,7 @@ class Cache:
            except KeyError:
                chunk_idx[id] = incr, size, csize

-        def transfer_known_idx(archive_id, tf_in, tf_out):
-            archive_id_hex = hexlify(archive_id).decode('ascii')
-            tarinfo = tf_in.getmember(archive_id_hex)
-            archive_name = tarinfo.pax_headers['archive_name']
-            print('Already known archive:', archive_name)
-            f_in = tf_in.extractfile(archive_id_hex)
-            tf_out.addfile(tarinfo, f_in)
-            return archive_name
-
-        def fetch_and_build_idx(archive_id, repository, key, tmp_dir, tf_out):
+        def fetch_and_build_idx(archive_id, repository, key):
            chunk_idx = ChunkIndex()
            cdata = repository.get(archive_id)
            data = key.decrypt(archive_id, cdata)
@ -280,7 +246,6 @@ class Cache:
            if archive[b'version'] != 1:
                raise Exception('Unknown archive metadata version')
            decode_dict(archive, (b'name',))
-            print('Analyzing new archive:', archive[b'name'])
            unpacker = msgpack.Unpacker()
            for item_id, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])):
                data = key.decrypt(item_id, chunk)
@ -293,60 +258,79 @@ class Cache:
                    if b'chunks' in item:
                        for chunk_id, size, csize in item[b'chunks']:
                            add(chunk_idx, chunk_id, size, csize)
-            archive_id_hex = hexlify(archive_id).decode('ascii')
-            file_tmp = os.path.join(tmp_dir, archive_id_hex).encode('utf-8')
-            chunk_idx.write(file_tmp)
-            tarinfo = tf_out.gettarinfo(file_tmp, archive_id_hex)
-            tarinfo.pax_headers['archive_name'] = archive[b'name']
-            with open(file_tmp, 'rb') as f:
-                tf_out.addfile(tarinfo, f)
-            os.unlink(file_tmp)
+            fn = mkpath(archive_id)
+            fn_tmp = mkpath(archive_id, suffix='.tmp')
+            try:
+                chunk_idx.write(fn_tmp)
+            except Exception:
+                os.unlink(fn_tmp)
+            else:
+                os.rename(fn_tmp, fn)
+            return chunk_idx

-        def create_master_idx(chunk_idx, tf_in, tmp_dir):
+        def lookup_name(archive_id):
+            for name, info in self.manifest.archives.items():
+                if info[b'id'] == archive_id:
+                    return name
+
+        def create_master_idx(chunk_idx):
+            print('Synchronizing chunks cache...')
+            cached_ids = cached_archives()
+            archive_ids = repo_archives()
+            print('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.' % (
+                len(archive_ids), len(cached_ids),
+                len(cached_ids - archive_ids), len(archive_ids - cached_ids), ))
+            # deallocates old hashindex, creates empty hashindex:
            chunk_idx.clear()
-            for tarinfo in tf_in:
-                archive_id_hex = tarinfo.name
-                tf_in.extract(archive_id_hex, tmp_dir)
-                chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8')
-                archive_chunk_idx = ChunkIndex.read(chunk_idx_path)
-                for chunk_id, (count, size, csize) in archive_chunk_idx.iteritems():
-                    add(chunk_idx, chunk_id, size, csize, incr=count)
-                os.unlink(chunk_idx_path)
+            cleanup_outdated(cached_ids - archive_ids)
+            if archive_ids:
+                chunk_idx = None
+                for archive_id in archive_ids:
+                    archive_name = lookup_name(archive_id)
+                    if archive_id in cached_ids:
+                        archive_chunk_idx_path = mkpath(archive_id)
+                        print("Reading cached archive chunk index for %s ..." % archive_name)
+                        archive_chunk_idx = ChunkIndex.read(archive_chunk_idx_path)
+                    else:
+                        print('Fetching and building archive index for %s ...' % archive_name)
+                        archive_chunk_idx = fetch_and_build_idx(archive_id, repository, self.key)
+                    print("Merging into master chunks index ...")
+                    if chunk_idx is None:
+                        # we just use the first archive's idx as starting point,
+                        # to avoid growing the hash table from 0 size and also
+                        # to save 1 merge call.
+                        chunk_idx = archive_chunk_idx
+                    else:
+                        chunk_idx.merge(archive_chunk_idx)
+            print('Done.')
+            return chunk_idx
+
+        def legacy_cleanup():
+            """bring old cache dirs into the desired state (cleanup and adapt)"""
+            try:
+                os.unlink(os.path.join(self.path, 'chunks.archive'))
+            except:
+                pass
+            try:
+                os.unlink(os.path.join(self.path, 'chunks.archive.tmp'))
+            except:
+                pass
+            try:
+                os.mkdir(archive_path)
+            except:
+                pass

        self.begin_txn()
-        print('Synchronizing chunks cache...')
-        # XXX we have to do stuff on disk due to lacking ChunkIndex api
-        with tempfile.TemporaryDirectory(prefix='borg-tmp') as tmp_dir:
-            repository = cache_if_remote(self.repository)
-            out_archive = open_out_archive()
-            in_archive = open_in_archive()
-            if in_archive:
-                known_ids = set(unhexlify(hexid) for hexid in in_archive.getnames())
-            else:
-                known_ids = set()
-            archive_ids = set(info[b'id'] for info in self.manifest.archives.values())
-            print('Rebuilding archive collection. Known: %d Repo: %d Unknown: %d' % (
-                len(known_ids), len(archive_ids), len(archive_ids - known_ids), ))
-            for archive_id in archive_ids & known_ids:
-                transfer_known_idx(archive_id, in_archive, out_archive)
-            close_archive(in_archive)
-            delete_in_archive()  # free disk space
-            for archive_id in archive_ids - known_ids:
-                fetch_and_build_idx(archive_id, repository, self.key, tmp_dir, out_archive)
-            close_archive(out_archive)
-            rename_out_archive()
-            print('Merging collection into master chunks cache...')
-            in_archive = open_in_archive()
-            create_master_idx(self.chunks, in_archive, tmp_dir)
-            close_archive(in_archive)
-            print('Done.')
+        repository = cache_if_remote(self.repository)
+        legacy_cleanup()
+        self.chunks = create_master_idx(self.chunks)

    def add_chunk(self, id, data, stats):
        if not self.txn_active:
            self.begin_txn()
-        if self.seen_chunk(id):
-            return self.chunk_incref(id, stats)
        size = len(data)
+        if self.seen_chunk(id, size):
+            return self.chunk_incref(id, stats)
        data = self.key.encrypt(data)
        csize = len(data)
        self.repository.put(id, data, wait=False)
@ -354,8 +338,14 @@ class Cache:
        stats.update(size, csize, True)
        return id, size, csize

-    def seen_chunk(self, id):
-        return self.chunks.get(id, (0, 0, 0))[0]
+    def seen_chunk(self, id, size=None):
+        refcount, stored_size, _ = self.chunks.get(id, (0, None, None))
+        if size is not None and stored_size is not None and size != stored_size:
+            # we already have a chunk with that id, but different size.
+            # this is either a hash collision (unlikely) or corruption or a bug.
+            raise Exception("chunk has same id [%r], but different size (stored: %d new: %d)!" % (
+                            id, stored_size, size))
+        return refcount

    def chunk_incref(self, id, stats):
        if not self.txn_active:
@ -378,7 +368,7 @@ class Cache:
            stats.update(-size, -csize, False)

    def file_known_and_unchanged(self, path_hash, st):
-        if not self.do_files:
+        if not (self.do_files and stat.S_ISREG(st.st_mode)):
            return None
        if self.files is None:
            self._read_files()
@ -395,7 +385,7 @@ class Cache:
            return None

    def memorize_file(self, path_hash, st, ids):
-        if not self.do_files:
+        if not (self.do_files and stat.S_ISREG(st.st_mode)):
            return
        # Entry: Age, inode, size, mtime, chunk ids
        mtime_ns = st_mtime_ns(st)
--- a/borg/chunker.pyx
+++ b/borg/chunker.pyx
@ -20,7 +20,7 @@ cdef extern from "_chunker.c":
 cdef class Chunker:
    cdef _Chunker *chunker

-    def __cinit__(self, seed, chunk_min_exp, chunk_max_exp, hash_mask_bits, hash_window_size):
+    def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size):
        min_size = 1 << chunk_min_exp
        max_size = 1 << chunk_max_exp
        hash_mask = (1 << hash_mask_bits) - 1
--- a/borg/compress.pyx
+++ b/borg/compress.pyx
@ -0,0 +1,199 @@
+import zlib
+try:
+    import lzma
+except ImportError:
+    lzma = None
+
+cdef extern from "lz4.h":
+    int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
+    int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
+
+
+cdef class CompressorBase:
+    """
+    base class for all (de)compression classes,
+    also handles compression format auto detection and
+    adding/stripping the ID header (which enable auto detection).
+    """
+    ID = b'\xFF\xFF'  # reserved and not used
+                      # overwrite with a unique 2-bytes bytestring in child classes
+    name = 'baseclass'
+
+    @classmethod
+    def detect(cls, data):
+        return data.startswith(cls.ID)
+
+    def __init__(self, **kwargs):
+        pass
+
+    def compress(self, data):
+        # add ID bytes
+        return self.ID + data
+
+    def decompress(self, data):
+        # strip ID bytes
+        return data[2:]
+
+
+class CNONE(CompressorBase):
+    """
+    none - no compression, just pass through data
+    """
+    ID = b'\x00\x00'
+    name = 'none'
+
+    def compress(self, data):
+        return super().compress(data)
+
+    def decompress(self, data):
+        data = super().decompress(data)
+        if not isinstance(data, bytes):
+            data = bytes(data)
+        return data
+
+
+cdef class LZ4(CompressorBase):
+    """
+    raw LZ4 compression / decompression (liblz4).
+
+    Features:
+        - lz4 is super fast
+        - wrapper releases CPython's GIL to support multithreaded code
+        - buffer given by caller, avoiding frequent reallocation and buffer duplication
+        - uses safe lz4 methods that never go beyond the end of the output buffer
+
+    But beware:
+        - this is not very generic, the given buffer MUST be large enough to
+          handle all compression or decompression output (or it will fail).
+        - you must not do method calls to the same LZ4 instance from different
+          threads at the same time - create one LZ4 instance per thread!
+    """
+    ID = b'\x01\x00'
+    name = 'lz4'
+
+    cdef char *buffer  # helper buffer for (de)compression output
+    cdef int bufsize  # size of this buffer
+
+    def __cinit__(self, **kwargs):
+        buffer = kwargs['buffer']
+        self.buffer = buffer
+        self.bufsize = len(buffer)
+
+    def compress(self, idata):
+        if not isinstance(idata, bytes):
+            idata = bytes(idata)  # code below does not work with memoryview
+        cdef int isize = len(idata)
+        cdef int osize = self.bufsize
+        cdef char *source = idata
+        cdef char *dest = self.buffer
+        with nogil:
+            osize = LZ4_compress_limitedOutput(source, dest, isize, osize)
+        if not osize:
+            raise Exception('lz4 compress failed')
+        return super().compress(dest[:osize])
+
+    def decompress(self, idata):
+        if not isinstance(idata, bytes):
+            idata = bytes(idata)  # code below does not work with memoryview
+        idata = super().decompress(idata)
+        cdef int isize = len(idata)
+        cdef int osize = self.bufsize
+        cdef char *source = idata
+        cdef char *dest = self.buffer
+        with nogil:
+            osize = LZ4_decompress_safe(source, dest, isize, osize)
+        if osize < 0:
+            # malformed input data, buffer too small, ...
+            raise Exception('lz4 decompress failed')
+        return dest[:osize]
+
+
+class LZMA(CompressorBase):
+    """
+    lzma compression / decompression (python 3.3+ stdlib)
+    """
+    ID = b'\x02\x00'
+    name = 'lzma'
+
+    def __init__(self, level=6, **kwargs):
+        super().__init__(**kwargs)
+        self.level = level
+        if lzma is None:
+            raise ValueError('No lzma support found.')
+
+    def compress(self, data):
+        # we do not need integrity checks in lzma, we do that already
+        data = lzma.compress(data, preset=self.level, check=lzma.CHECK_NONE)
+        return super().compress(data)
+
+    def decompress(self, data):
+        data = super().decompress(data)
+        return lzma.decompress(data)
+
+
+class ZLIB(CompressorBase):
+    """
+    zlib compression / decompression (python stdlib)
+    """
+    ID = b'\x08\x00'  # not used here, see detect()
+                      # avoid all 0x.8.. IDs elsewhere!
+    name = 'zlib'
+
+    @classmethod
+    def detect(cls, data):
+        # matches misc. patterns 0x.8.. used by zlib
+        cmf, flg = data[:2]
+        is_deflate = cmf & 0x0f == 8
+        check_ok = (cmf * 256 + flg) % 31 == 0
+        return check_ok and is_deflate
+
+    def __init__(self, level=6, **kwargs):
+        super().__init__(**kwargs)
+        self.level = level
+
+    def compress(self, data):
+        # note: for compatibility no super call, do not add ID bytes
+        return zlib.compress(data, self.level)
+
+    def decompress(self, data):
+        # note: for compatibility no super call, do not strip ID bytes
+        return zlib.decompress(data)
+
+
+COMPRESSOR_TABLE = {
+    CNONE.name: CNONE,
+    LZ4.name: LZ4,
+    ZLIB.name: ZLIB,
+    LZMA.name: LZMA,
+}
+COMPRESSOR_LIST = [LZ4, CNONE, ZLIB, LZMA, ]  # check fast stuff first
+
+def get_compressor(name, **kwargs):
+    cls = COMPRESSOR_TABLE[name]
+    return cls(**kwargs)
+
+
+class Compressor:
+    """
+    compresses using a compressor with given name and parameters
+    decompresses everything we can handle (autodetect)
+    """
+    def __init__(self, name='null', **kwargs):
+        self.params = kwargs
+        self.compressor = get_compressor(name, **self.params)
+
+    def compress(self, data):
+        return self.compressor.compress(data)
+
+    def decompress(self, data):
+        hdr = bytes(data[:2])  # detect() does not work with memoryview
+        for cls in COMPRESSOR_LIST:
+            if cls.detect(hdr):
+                return cls(**self.params).decompress(data)
+        else:
+            raise ValueError('No decompressor for this data found: %r.', data[:2])
+
+
+# a buffer used for (de)compression result, which can be slightly bigger
+# than the chunk buffer in the worst (incompressible data) case, add 10%:
+COMPR_BUFFER = bytes(int(1.1 * 2 ** 23))  # CHUNK_MAX_EXP == 23
--- a/borg/crypto.pyx
+++ b/borg/crypto.pyx
@ -52,7 +52,7 @@ bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0]
 long_to_bytes = lambda x: _long.pack(x)


-def num_aes_blocks(length):
+def num_aes_blocks(int length):
    """Return the number of AES blocks required to encrypt/decrypt *length* bytes of data.
       Note: this is only correct for modes without padding, like AES-CTR.
    """
--- a/borg/hashindex.pyx
+++ b/borg/hashindex.pyx
@ -14,6 +14,7 @@ cdef extern from "_hashindex.c":
    void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize,
                             long long *unique_size, long long *unique_csize,
                             long long *total_unique_chunks, long long *total_chunks)
+    void hashindex_merge(HashIndex *index, HashIndex *other)
    int hashindex_get_size(HashIndex *index)
    int hashindex_write(HashIndex *index, char *path)
    void *hashindex_get(HashIndex *index, void *key)
@ -24,15 +25,19 @@ cdef extern from "_hashindex.c":
    int _le32toh(int v)


-_NoDefault = object()
+cdef _NoDefault = object()

+cimport cython
+
+@cython.internal
 cdef class IndexBase:
    cdef HashIndex *index
-    key_size = 32
+    cdef int key_size

-    def __cinit__(self, capacity=0, path=None):
+    def __cinit__(self, capacity=0, path=None, key_size=32):
+        self.key_size = key_size
        if path:
-            self.index = hashindex_read(<bytes>os.fsencode(path))
+            self.index = hashindex_read(os.fsencode(path))
            if not self.index:
                raise Exception('hashindex_read failed')
        else:
@ -49,7 +54,7 @@ cdef class IndexBase:
        return cls(path=path)

    def write(self, path):
-        if not hashindex_write(self.index, <bytes>os.fsencode(path)):
+        if not hashindex_write(self.index, os.fsencode(path)):
            raise Exception('hashindex_write failed')

    def clear(self):
@ -63,7 +68,7 @@ cdef class IndexBase:
            self[key] = value

    def __delitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
        if not hashindex_delete(self.index, <char *>key):
            raise Exception('hashindex_delete failed')

@ -92,14 +97,14 @@ cdef class NSIndex(IndexBase):
    value_size = 8

    def __getitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
        data = <int *>hashindex_get(self.index, <char *>key)
        if not data:
            raise KeyError
        return _le32toh(data[0]), _le32toh(data[1])

    def __setitem__(self, key, value):
-        assert len(key) == 32
+        assert len(key) == self.key_size
        cdef int[2] data
        data[0] = _htole32(value[0])
        data[1] = _htole32(value[1])
@ -107,20 +112,20 @@ cdef class NSIndex(IndexBase):
            raise Exception('hashindex_set failed')

    def __contains__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
        data = <int *>hashindex_get(self.index, <char *>key)
        return data != NULL

    def iteritems(self, marker=None):
        cdef const void *key
-        iter = NSKeyIterator()
+        iter = NSKeyIterator(self.key_size)
        iter.idx = self
        iter.index = self.index
        if marker:
            key = hashindex_get(self.index, <char *>marker)
            if marker is None:
                raise IndexError
-            iter.key = key - 32
+            iter.key = key - self.key_size
        return iter


@ -128,9 +133,11 @@ cdef class NSKeyIterator:
    cdef NSIndex idx
    cdef HashIndex *index
    cdef const void *key
+    cdef int key_size

-    def __cinit__(self):
+    def __cinit__(self, key_size):
        self.key = NULL
+        self.key_size = key_size

    def __iter__(self):
        return self
@ -139,8 +146,8 @@ cdef class NSKeyIterator:
        self.key = hashindex_next_key(self.index, <char *>self.key)
        if not self.key:
            raise StopIteration
-        cdef int *value = <int *>(self.key + 32)
-        return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]))
+        cdef int *value = <int *>(self.key + self.key_size)
+        return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]))


 cdef class ChunkIndex(IndexBase):
@ -148,14 +155,14 @@ cdef class ChunkIndex(IndexBase):
    value_size = 12

    def __getitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
        data = <int *>hashindex_get(self.index, <char *>key)
        if not data:
            raise KeyError
        return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])

    def __setitem__(self, key, value):
-        assert len(key) == 32
+        assert len(key) == self.key_size
        cdef int[3] data
        data[0] = _htole32(value[0])
        data[1] = _htole32(value[1])
@ -164,20 +171,20 @@ cdef class ChunkIndex(IndexBase):
            raise Exception('hashindex_set failed')

    def __contains__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
        data = <int *>hashindex_get(self.index, <char *>key)
        return data != NULL

    def iteritems(self, marker=None):
        cdef const void *key
-        iter = ChunkKeyIterator()
+        iter = ChunkKeyIterator(self.key_size)
        iter.idx = self
        iter.index = self.index
        if marker:
            key = hashindex_get(self.index, <char *>marker)
            if marker is None:
                raise IndexError
-            iter.key = key - 32
+            iter.key = key - self.key_size
        return iter

    def summarize(self):
@ -187,14 +194,19 @@ cdef class ChunkIndex(IndexBase):
                            &total_unique_chunks, &total_chunks)
        return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks

+    def merge(self, ChunkIndex other):
+        hashindex_merge(self.index, other.index)
+

 cdef class ChunkKeyIterator:
    cdef ChunkIndex idx
    cdef HashIndex *index
    cdef const void *key
+    cdef int key_size

-    def __cinit__(self):
+    def __cinit__(self, key_size):
        self.key = NULL
+        self.key_size = key_size

    def __iter__(self):
        return self
@ -203,5 +215,5 @@ cdef class ChunkKeyIterator:
        self.key = hashindex_next_key(self.index, <char *>self.key)
        if not self.key:
            raise StopIteration
-        cdef int *value = <int *>(self.key + 32)
-        return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))
+        cdef int *value = <int *>(self.key + self.key_size)
+        return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))
--- a/borg/helpers.py
+++ b/borg/helpers.py
@ -1,12 +1,15 @@
 import argparse
 import binascii
 from collections import namedtuple
+from functools import wraps
 import grp
 import os
 import pwd
 import re
 import sys
 import time
+import unicodedata
+
 from datetime import datetime, timezone, timedelta
 from fnmatch import translate
 from operator import attrgetter
@ -220,6 +223,23 @@ def exclude_path(path, patterns):
 # unify the two cases, we add a path separator to the end of
 # the path before matching.

+def normalized(func):
+    """ Decorator for the Pattern match methods, returning a wrapper that
+    normalizes OSX paths to match the normalized pattern on OSX, and 
+    returning the original method on other platforms"""
+    @wraps(func)
+    def normalize_wrapper(self, path):
+        return func(self, unicodedata.normalize("NFD", path))
+
+    if sys.platform in ('darwin',):
+        # HFS+ converts paths to a canonical form, so users shouldn't be
+        # required to enter an exact match
+        return normalize_wrapper
+    else:
+        # Windows and Unix filesystems allow different forms, so users
+        # always have to enter an exact match
+        return func
+
 class IncludePattern:
    """Literal files or directories listed on the command line
    for some operations (e.g. extract, but not create).
@ -227,8 +247,12 @@ class IncludePattern:
    path match as well.  A trailing slash makes no difference.
    """
    def __init__(self, pattern):
+        if sys.platform in ('darwin',):
+            pattern = unicodedata.normalize("NFD", pattern)
+
        self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep

+    @normalized
    def match(self, path):
        return (path+os.path.sep).startswith(self.pattern)

@ -245,10 +269,15 @@ class ExcludePattern(IncludePattern):
            self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep
        else:
            self.pattern = os.path.normpath(pattern)+os.path.sep+'*'
+
+        if sys.platform in ('darwin',):
+            self.pattern = unicodedata.normalize("NFD", self.pattern)
+
        # fnmatch and re.match both cache compiled regular expressions.
        # Nevertheless, this is about 10 times faster.
        self.regex = re.compile(translate(self.pattern))

+    @normalized
    def match(self, path):
        return self.regex.match(path+os.path.sep) is not None

@ -277,8 +306,44 @@ def timestamp(s):


 def ChunkerParams(s):
-    window_size, chunk_mask, chunk_min, chunk_max = s.split(',')
-    return int(window_size), int(chunk_mask), int(chunk_min), int(chunk_max)
+    chunk_min, chunk_max, chunk_mask, window_size = s.split(',')
+    if int(chunk_max) > 23:
+        # do not go beyond 2**23 (8MB) chunk size now,
+        # COMPR_BUFFER can only cope with up to this size
+        raise ValueError('max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)')
+    return int(chunk_min), int(chunk_max), int(chunk_mask), int(window_size)
+
+
+def CompressionSpec(s):
+    values = s.split(',')
+    count = len(values)
+    if count < 1:
+        raise ValueError
+    compression = values[0]
+    try:
+        compression = int(compression)
+        if count > 1:
+            raise ValueError
+        # DEPRECATED: it is just --compression N
+        if 0 <= compression <= 9:
+            return dict(name='zlib', level=compression)
+        raise ValueError
+    except ValueError:
+        # --compression algo[,...]
+        name = compression
+        if name in ('none', 'lz4', ):
+            return dict(name=name)
+        if name in ('zlib', 'lzma', ):
+            if count < 2:
+                level = 6  # default compression level in py stdlib
+            elif count == 2:
+                level = int(values[1])
+                if not 0 <= level <= 9:
+                    raise ValueError
+            else:
+                raise ValueError
+            return dict(name=name, level=level)
+        raise ValueError


 def is_cachedir(path):
@ -430,13 +495,34 @@ class Location:
                         r'(?P<path>[^:]+)(?:::(?P<archive>.+))?$')
    scp_re = re.compile(r'((?:(?P<user>[^@]+)@)?(?P<host>[^:/]+):)?'
                        r'(?P<path>[^:]+)(?:::(?P<archive>.+))?$')
+    # get the repo from BORG_RE env and the optional archive from param.
+    # if the syntax requires giving REPOSITORY (see "borg mount"),
+    # use "::" to let it use the env var.
+    # if REPOSITORY argument is optional, it'll automatically use the env.
+    env_re = re.compile(r'(?:::(?P<archive>.+)?)?$')

-    def __init__(self, text):
+    def __init__(self, text=''):
        self.orig = text
-        if not self.parse(text):
+        if not self.parse(self.orig):
            raise ValueError

    def parse(self, text):
+        valid = self._parse(text)
+        if valid:
+            return True
+        m = self.env_re.match(text)
+        if not m:
+            return False
+        repo = os.environ.get('BORG_REPO')
+        if repo is None:
+            return False
+        valid = self._parse(repo)
+        if not valid:
+            return False
+        self.archive = m.group('archive')
+        return True
+
+    def _parse(self, text):
        m = self.ssh_re.match(text)
        if m:
            self.proto = m.group('proto')
--- a/borg/key.py
+++ b/borg/key.py
@ -6,9 +6,9 @@ import msgpack
 import textwrap
 import hmac
 from hashlib import sha256
-import zlib

 from .crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks
+from .compress import Compressor, COMPR_BUFFER
 from .helpers import IntegrityError, get_keys_dir, Error

 PREFIX = b'\0' * 8
@ -68,7 +68,7 @@ class KeyBase:
        self.TYPE_STR = bytes([self.TYPE])
        self.repository = repository
        self.target = None  # key location file path / repo obj
-        self.compression_level = 0
+        self.compressor = Compressor('none', buffer=COMPR_BUFFER)

    def id_hash(self, data):
        """Return HMAC hash using the "id" HMAC key
@ -99,12 +99,12 @@ class PlaintextKey(KeyBase):
        return sha256(data).digest()

    def encrypt(self, data):
-        return b''.join([self.TYPE_STR, zlib.compress(data, self.compression_level)])
+        return b''.join([self.TYPE_STR, self.compressor.compress(data)])

    def decrypt(self, id, data):
        if data[0] != self.TYPE:
            raise IntegrityError('Invalid encryption envelope')
-        data = zlib.decompress(memoryview(data)[1:])
+        data = self.compressor.decompress(memoryview(data)[1:])
        if id and sha256(data).digest() != id:
            raise IntegrityError('Chunk id verification failed')
        return data
@ -131,7 +131,7 @@ class AESKeyBase(KeyBase):
        return HMAC(self.id_key, data, sha256).digest()

    def encrypt(self, data):
-        data = zlib.compress(data, self.compression_level)
+        data = self.compressor.compress(data)
        self.enc_cipher.reset()
        data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
        hmac = HMAC(self.enc_hmac_key, data, sha256).digest()
@ -144,7 +144,7 @@ class AESKeyBase(KeyBase):
        if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac:
            raise IntegrityError('Encryption envelope checksum mismatch')
        self.dec_cipher.reset(iv=PREFIX + data[33:41])
-        data = zlib.decompress(self.dec_cipher.decrypt(data[41:]))  # should use memoryview
+        data = self.compressor.decompress(self.dec_cipher.decrypt(data[41:]))
        if id and HMAC(self.id_key, data, sha256).digest() != id:
            raise IntegrityError('Chunk id verification failed')
        return data
--- a/borg/locking.py
+++ b/borg/locking.py
@ -176,7 +176,11 @@ class LockRoster:
            json.dump(data, f)

    def remove(self):
-        os.unlink(self.path)
+        try:
+            os.unlink(self.path)
+        except OSError as e:
+            if e.errno != errno.ENOENT:
+                raise

    def get(self, key):
        roster = self.load()
--- a/borg/lrucache.py
+++ b/borg/lrucache.py
@ -1,42 +1,41 @@
-class LRUCache(dict):
-
-    def __init__(self, capacity):
-        super().__init__()
+class LRUCache:
+    def __init__(self, capacity, dispose):
+        self._cache = {}
        self._lru = []
        self._capacity = capacity
+        self._dispose = dispose

    def __setitem__(self, key, value):
-        try:
-            self._lru.remove(key)
-        except ValueError:
-            pass
+        assert key not in self._cache, (
+            "Unexpected attempt to replace a cached item,"
+            " without first deleting the old item.")
        self._lru.append(key)
        while len(self._lru) > self._capacity:
            del self[self._lru[0]]
-        return super().__setitem__(key, value)
+        self._cache[key] = value

    def __getitem__(self, key):
-        try:
-            self._lru.remove(key)
-            self._lru.append(key)
-        except ValueError:
-            pass
-        return super().__getitem__(key)
+        value = self._cache[key]  # raise KeyError if not found
+        self._lru.remove(key)
+        self._lru.append(key)
+        return value

    def __delitem__(self, key):
-        try:
-            self._lru.remove(key)
-        except ValueError:
-            pass
-        return super().__delitem__(key)
+        value = self._cache.pop(key)  # raise KeyError if not found
+        self._dispose(value)
+        self._lru.remove(key)

-    def pop(self, key, default=None):
-        try:
-            self._lru.remove(key)
-        except ValueError:
-            pass
-        return super().pop(key, default)
+    def __contains__(self, key):
+        return key in self._cache

-    def _not_implemented(self, *args, **kw):
-        raise NotImplementedError
-    popitem = setdefault = update = _not_implemented
+    def clear(self):
+        for value in self._cache.values():
+            self._dispose(value)
+        self._cache.clear()
+
+    # useful for testing
+    def items(self):
+        return self._cache.items()
+
+    def __len__(self):
+        return len(self._cache)
--- a/borg/remote.py
+++ b/borg/remote.py
@ -28,12 +28,13 @@ class InvalidRPCMethod(Error):
    """RPC method is not valid"""


-class RepositoryServer:
+class RepositoryServer:  # pragma: no cover
    rpc_methods = (
        '__len__',
        'check',
        'commit',
        'delete',
+        'destroy',
        'get',
        'list',
        'negotiate',
@ -107,9 +108,10 @@ class RepositoryServer:

 class RemoteRepository:
    extra_test_args = []
+    remote_path = None
+    umask = None

    class RPCError(Exception):
-
        def __init__(self, name):
            self.name = name

@ -123,9 +125,11 @@ class RemoteRepository:
        self.responses = {}
        self.unpacker = msgpack.Unpacker(use_list=False)
        self.p = None
+        # use local umask also for the remote process
+        umask = ['--umask', '%03o' % self.umask]
        if location.host == '__testsuite__':
-            args = [sys.executable, '-m', 'borg.archiver', 'serve'] + self.extra_test_args
-        else:
+            args = [sys.executable, '-m', 'borg.archiver', 'serve'] + umask + self.extra_test_args
+        else:  # pragma: no cover
            args = ['ssh']
            if location.port:
                args += ['-p', str(location.port)]
@ -133,7 +137,7 @@ class RemoteRepository:
                args.append('%s@%s' % (location.user, location.host))
            else:
                args.append('%s' % location.host)
-            args += ['borg', 'serve']
+            args += [self.remote_path, 'serve'] + umask
        self.p = Popen(args, bufsize=0, stdin=PIPE, stdout=PIPE)
        self.stdin_fd = self.p.stdin.fileno()
        self.stdout_fd = self.p.stdout.fileno()
--- a/borg/repository.py
+++ b/borg/repository.py
@ -50,14 +50,14 @@ class Repository:
        """Object with key {} not found in repository {}."""

    def __init__(self, path, create=False, exclusive=False):
-        self.path = path
+        self.path = os.path.abspath(path)
        self.io = None
        self.lock = None
        self.index = None
        self._active_txn = False
        if create:
-            self.create(path)
-        self.open(path, exclusive)
+            self.create(self.path)
+        self.open(self.path, exclusive)

    def __del__(self):
        self.close()
@ -334,7 +334,6 @@ class Repository:
            report_error('Adding commit tag to segment {}'.format(transaction_id))
            self.io.segment = transaction_id + 1
            self.io.write_commit()
-            self.io.close_segment()
        if current_index and not repair:
            if len(current_index) != len(self.index):
                report_error('Index object count mismatch. {} != {}'.format(len(current_index), len(self.index)))
@ -433,7 +432,8 @@ class LoggedIO:

    def __init__(self, path, limit, segments_per_dir, capacity=90):
        self.path = path
-        self.fds = LRUCache(capacity)
+        self.fds = LRUCache(capacity,
+                            dispose=lambda fd: fd.close())
        self.segment = 0
        self.limit = limit
        self.segments_per_dir = segments_per_dir
@ -441,9 +441,8 @@ class LoggedIO:
        self._write_fd = None

    def close(self):
-        for segment in list(self.fds.keys()):
-            self.fds.pop(segment).close()
        self.close_segment()
+        self.fds.clear()
        self.fds = None  # Just to make sure we're disabled

    def segment_iterator(self, reverse=False):
@ -517,6 +516,8 @@ class LoggedIO:
            return fd

    def delete_segment(self, segment):
+        if segment in self.fds:
+            del self.fds[segment]
        try:
            os.unlink(self.segment_filename(segment))
        except OSError:
@ -559,7 +560,8 @@ class LoggedIO:
            header = fd.read(self.header_fmt.size)

    def recover_segment(self, segment, filename):
-        self.fds.pop(segment).close()
+        if segment in self.fds:
+            del self.fds[segment]
        # FIXME: save a copy of the original file
        with open(filename, 'rb') as fd:
            data = memoryview(fd.read())
--- a/borg/testsuite/init.py
+++ b/borg/testsuite/init.py
@ -73,7 +73,7 @@ class BaseTestCase(unittest.TestCase):
            d1 = [filename] + [getattr(s1, a) for a in attrs]
            d2 = [filename] + [getattr(s2, a) for a in attrs]
            if not os.path.islink(path1) or utime_supports_fd:
-                # Older versions of llfuse does not support ns precision properly
+                # Older versions of llfuse do not support ns precision properly
                if fuse and not have_fuse_mtime_ns:
                    d1.append(round(st_mtime_ns(s1), -4))
                    d2.append(round(st_mtime_ns(s2), -4))
@ -94,28 +94,3 @@ class BaseTestCase(unittest.TestCase):
                return
            time.sleep(.1)
        raise Exception('wait_for_mount(%s) timeout' % path)
-
-
-def get_tests(suite):
-    """Generates a sequence of tests from a test suite
-    """
-    for item in suite:
-        try:
-            # TODO: This could be "yield from..." with Python 3.3+
-            for i in get_tests(item):
-                yield i
-        except TypeError:
-            yield item
-
-
-class TestLoader(unittest.TestLoader):
-    """A customized test loader that properly detects and filters our test cases
-    """
-
-    def loadTestsFromName(self, pattern, module=None):
-        suite = self.discover('borg.testsuite', '*.py')
-        tests = unittest.TestSuite()
-        for test in get_tests(suite):
-            if pattern.lower() in test.id().lower():
-                tests.addTest(test)
-        return tests
--- a/borg/testsuite/archive.py
+++ b/borg/testsuite/archive.py
@ -1,12 +1,12 @@
 from datetime import datetime, timezone

 import msgpack
+from mock import Mock

 from ..archive import Archive, CacheChunkBuffer, RobustUnpacker
 from ..key import PlaintextKey
 from ..helpers import Manifest
 from . import BaseTestCase
-from .mock import Mock


 class MockCache:
--- a/borg/testsuite/archiver.py
+++ b/borg/testsuite/archiver.py
@ -11,6 +11,9 @@ import time
 import unittest
 from hashlib import sha256

+from mock import patch
+import pytest
+
 from .. import xattr
 from ..archive import Archive, ChunkBuffer, CHUNK_MAX_EXP
 from ..archiver import Archiver
@ -20,7 +23,6 @@ from ..helpers import Manifest
 from ..remote import RemoteRepository, PathNotAllowed
 from ..repository import Repository
 from . import BaseTestCase
-from .mock import patch

 try:
    import llfuse
@ -32,6 +34,12 @@ has_lchflags = hasattr(os, 'lchflags')

 src_dir = os.path.join(os.getcwd(), os.path.dirname(__file__), '..')

+# Python <= 3.2 raises OSError instead of PermissionError (See #164)
+try:
+    PermissionError = PermissionError
+except NameError:
+    PermissionError = OSError
+

 class changedir:
    def __init__(self, dir):
@ -57,7 +65,9 @@ class environment_variable:

    def __exit__(self, *args, **kw):
        for k, v in self.old_values.items():
-            if v is not None:
+            if v is None:
+                del os.environ[k]
+            else:
                os.environ[k] = v


@ -88,8 +98,8 @@ class ArchiverTestCaseBase(BaseTestCase):
        os.chdir(self.tmpdir)

    def tearDown(self):
-        shutil.rmtree(self.tmpdir)
        os.chdir(self._old_wd)
+        shutil.rmtree(self.tmpdir)

    def cmd(self, *args, **kw):
        exit_code = kw.get('exit_code', 0)
@ -151,15 +161,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
        self.create_regular_file('flagfile', size=1024)
        # Directory
        self.create_regular_file('dir2/file2', size=1024 * 80)
-        # File owner
-        os.chown('input/file1', 100, 200)
        # File mode
        os.chmod('input/file1', 0o7755)
-        os.chmod('input/dir2', 0o555)
-        # Block device
-        os.mknod('input/bdev', 0o600 | stat.S_IFBLK, os.makedev(10, 20))
-        # Char device
-        os.mknod('input/cdev', 0o600 | stat.S_IFCHR, os.makedev(30, 40))
        # Hard link
        os.link(os.path.join(self.input_path, 'file1'),
                os.path.join(self.input_path, 'hardlink'))
@ -177,19 +180,54 @@ class ArchiverTestCase(ArchiverTestCaseBase):
        os.mkfifo(os.path.join(self.input_path, 'fifo1'))
        if has_lchflags:
            os.lchflags(os.path.join(self.input_path, 'flagfile'), stat.UF_NODUMP)
+        try:
+            # Block device
+            os.mknod('input/bdev', 0o600 | stat.S_IFBLK, os.makedev(10, 20))
+            # Char device
+            os.mknod('input/cdev', 0o600 | stat.S_IFCHR, os.makedev(30, 40))
+            # File mode
+            os.chmod('input/dir2', 0o555)  # if we take away write perms, we need root to remove contents
+            # File owner
+            os.chown('input/file1', 100, 200)
+            have_root = True  # we have (fake)root
+        except PermissionError:
+            have_root = False
+        return have_root

    def test_basic_functionality(self):
-        self.create_test_files()
+        have_root = self.create_test_files()
        self.cmd('init', self.repository_location)
        self.cmd('create', self.repository_location + '::test', 'input')
-        self.cmd('create', self.repository_location + '::test.2', 'input')
+        self.cmd('create', '--stats', self.repository_location + '::test.2', 'input')
        with changedir('output'):
            self.cmd('extract', self.repository_location + '::test')
        self.assert_equal(len(self.cmd('list', self.repository_location).splitlines()), 2)
-        self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), 11)
+        expected =  [
+            'input',
+            'input/bdev',
+            'input/cdev',
+            'input/dir2',
+            'input/dir2/file2',
+            'input/empty',
+            'input/fifo1',
+            'input/file1',
+            'input/flagfile',
+            'input/hardlink',
+            'input/link1',
+        ]
+        if not have_root:
+            # we could not create these device files without (fake)root
+            expected.remove('input/bdev')
+            expected.remove('input/cdev')
+        if has_lchflags:
+            # remove the file we did not backup, so input and output become equal
+            expected.remove('input/flagfile') # this file is UF_NODUMP
+            os.remove(os.path.join('input', 'flagfile'))
+        self.assert_equal(self.cmd('list', '--short', self.repository_location + '::test').splitlines(), expected)
        self.assert_dirs_equal('input', 'output/input')
        info_output = self.cmd('info', self.repository_location + '::test')
-        self.assert_in('Number of files: 4', info_output)
+        item_count = 3 if has_lchflags else 4  # one file is UF_NODUMP
+        self.assert_in('Number of files: %d' % item_count, info_output)
        shutil.rmtree(self.cache_path)
        with environment_variable(BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK='1'):
            info_output2 = self.cmd('info', self.repository_location + '::test')
@ -243,6 +281,19 @@ class ArchiverTestCase(ArchiverTestCaseBase):
        if sparse_support and hasattr(st, 'st_blocks'):
            self.assert_true(st.st_blocks * 512 < total_len / 10)  # is output sparse?

+    def test_unusual_filenames(self):
+        filenames = ['normal', 'with some blanks', '(with_parens)', ]
+        for filename in filenames:
+            filename = os.path.join(self.input_path, filename)
+            with open(filename, 'wb') as fd:
+                pass
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        for filename in filenames:
+            with changedir('output'):
+                self.cmd('extract', self.repository_location + '::test', os.path.join('input', filename))
+            assert os.path.exists(os.path.join('output', 'input', filename))
+
    def test_repository_swap_detection(self):
        self.create_test_files()
        os.environ['BORG_PASSPHRASE'] = 'passphrase'
@ -389,11 +440,21 @@ class ArchiverTestCase(ArchiverTestCaseBase):
        self.cmd('extract', '--dry-run', self.repository_location + '::test.2')
        self.cmd('delete', self.repository_location + '::test')
        self.cmd('extract', '--dry-run', self.repository_location + '::test.2')
-        self.cmd('delete', self.repository_location + '::test.2')
+        self.cmd('delete', '--stats', self.repository_location + '::test.2')
        # Make sure all data except the manifest has been deleted
        repository = Repository(self.repository_path)
        self.assert_equal(len(repository), 1)

+    def test_delete_repo(self):
+        self.create_regular_file('file1', size=1024 * 80)
+        self.create_regular_file('dir2/file2', size=1024 * 80)
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        self.cmd('create', self.repository_location + '::test.2', 'input')
+        self.cmd('delete', self.repository_location)
+        # Make sure the repo is gone
+        self.assertFalse(os.path.exists(self.repository_path))
+
    def test_corrupted_repository(self):
        self.cmd('init', self.repository_location)
        self.create_src_archive('test')
@ -405,6 +466,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
            fd.write(b'XXXX')
        self.cmd('check', self.repository_location, exit_code=1)

+    # we currently need to be able to create a lock directory inside the repo:
+    @pytest.mark.xfail(reason="we need to be able to create the lock directory inside the repo")
    def test_readonly_repository(self):
        self.cmd('init', self.repository_location)
        self.create_src_archive('test')
@ -415,6 +478,21 @@ class ArchiverTestCase(ArchiverTestCaseBase):
            # Restore permissions so shutil.rmtree is able to delete it
            os.system('chmod -R u+w ' + self.repository_path)

+    def test_umask(self):
+        self.create_regular_file('file1', size=1024 * 80)
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        mode = os.stat(self.repository_path).st_mode
+        self.assertEqual(stat.S_IMODE(mode), 0o700)
+
+    def test_create_dry_run(self):
+        self.cmd('init', self.repository_location)
+        self.cmd('create', '--dry-run', self.repository_location + '::test', 'input')
+        # Make sure no archive has been created
+        repository = Repository(self.repository_path)
+        manifest, key = Manifest.load(repository)
+        self.assert_equal(len(manifest.archives), 0)
+
    def test_cmdline_compatibility(self):
        self.create_regular_file('file1', size=1024 * 80)
        self.cmd('init', self.repository_location)
@ -439,10 +517,38 @@ class ArchiverTestCase(ArchiverTestCaseBase):
        self.assert_not_in('test1', output)
        self.assert_in('test2', output)

+    def test_prune_repository_prefix(self):
+        self.cmd('init', self.repository_location)
+        self.cmd('create', self.repository_location + '::foo-2015-08-12-10:00', src_dir)
+        self.cmd('create', self.repository_location + '::foo-2015-08-12-20:00', src_dir)
+        self.cmd('create', self.repository_location + '::bar-2015-08-12-10:00', src_dir)
+        self.cmd('create', self.repository_location + '::bar-2015-08-12-20:00', src_dir)
+        output = self.cmd('prune', '-v', '--dry-run', self.repository_location, '--keep-daily=2', '--prefix=foo-')
+        self.assert_in('Keeping archive: foo-2015-08-12-20:00', output)
+        self.assert_in('Would prune:     foo-2015-08-12-10:00', output)
+        output = self.cmd('list', self.repository_location)
+        self.assert_in('foo-2015-08-12-10:00', output)
+        self.assert_in('foo-2015-08-12-20:00', output)
+        self.assert_in('bar-2015-08-12-10:00', output)
+        self.assert_in('bar-2015-08-12-20:00', output)
+        self.cmd('prune', self.repository_location, '--keep-daily=2', '--prefix=foo-')
+        output = self.cmd('list', self.repository_location)
+        self.assert_not_in('foo-2015-08-12-10:00', output)
+        self.assert_in('foo-2015-08-12-20:00', output)
+        self.assert_in('bar-2015-08-12-10:00', output)
+        self.assert_in('bar-2015-08-12-20:00', output)
+
    def test_usage(self):
        self.assert_raises(SystemExit, lambda: self.cmd())
        self.assert_raises(SystemExit, lambda: self.cmd('-h'))

+    def test_help(self):
+        assert 'Borg' in self.cmd('help')
+        assert 'patterns' in self.cmd('help', 'patterns')
+        assert 'Initialize' in self.cmd('help', 'init')
+        assert 'positional arguments' not in self.cmd('help', 'init', '--epilog-only')
+        assert 'This command initializes' not in self.cmd('help', 'init', '--usage-only')
+
    @unittest.skipUnless(has_llfuse, 'llfuse not installed')
    def test_fuse_mount_repository(self):
        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
--- a/borg/testsuite/compress.py
+++ b/borg/testsuite/compress.py
@ -0,0 +1,102 @@
+import zlib
+try:
+    import lzma
+except ImportError:
+    lzma = None
+
+import pytest
+
+from ..compress import get_compressor, Compressor, CNONE, ZLIB, LZ4
+
+
+buffer = bytes(2**16)
+data = b'fooooooooobaaaaaaaar' * 10
+params = dict(name='zlib', level=6, buffer=buffer)
+
+
+def test_get_compressor():
+    c = get_compressor(name='none')
+    assert isinstance(c, CNONE)
+    c = get_compressor(name='lz4', buffer=buffer)
+    assert isinstance(c, LZ4)
+    c = get_compressor(name='zlib')
+    assert isinstance(c, ZLIB)
+    with pytest.raises(KeyError):
+        get_compressor(name='foobar')
+
+
+def test_cnull():
+    c = get_compressor(name='none')
+    cdata = c.compress(data)
+    assert len(cdata) > len(data)
+    assert data in cdata  # it's not compressed and just in there 1:1
+    assert data == c.decompress(cdata)
+    assert data == Compressor(**params).decompress(cdata)  # autodetect
+
+
+def test_lz4():
+    c = get_compressor(name='lz4', buffer=buffer)
+    cdata = c.compress(data)
+    assert len(cdata) < len(data)
+    assert data == c.decompress(cdata)
+    assert data == Compressor(**params).decompress(cdata)  # autodetect
+
+
+def test_zlib():
+    c = get_compressor(name='zlib')
+    cdata = c.compress(data)
+    assert len(cdata) < len(data)
+    assert data == c.decompress(cdata)
+    assert data == Compressor(**params).decompress(cdata)  # autodetect
+
+
+def test_lzma():
+    if lzma is None:
+        pytest.skip("No lzma support found.")
+    c = get_compressor(name='lzma')
+    cdata = c.compress(data)
+    assert len(cdata) < len(data)
+    assert data == c.decompress(cdata)
+    assert data == Compressor(**params).decompress(cdata)  # autodetect
+
+
+def test_autodetect_invalid():
+    with pytest.raises(ValueError):
+        Compressor(**params).decompress(b'\xff\xfftotalcrap')
+    with pytest.raises(ValueError):
+        Compressor(**params).decompress(b'\x08\x00notreallyzlib')
+
+
+def test_zlib_compat():
+    # for compatibility reasons, we do not add an extra header for zlib,
+    # nor do we expect one when decompressing / autodetecting
+    for level in range(10):
+        c = get_compressor(name='zlib', level=level)
+        cdata1 = c.compress(data)
+        cdata2 = zlib.compress(data, level)
+        assert cdata1 == cdata2
+        data2 = c.decompress(cdata2)
+        assert data == data2
+        data2 = Compressor(**params).decompress(cdata2)
+        assert data == data2
+
+
+def test_compressor():
+    params_list = [
+        dict(name='none', buffer=buffer),
+        dict(name='lz4', buffer=buffer),
+        dict(name='zlib', level=0, buffer=buffer),
+        dict(name='zlib', level=6, buffer=buffer),
+        dict(name='zlib', level=9, buffer=buffer),
+    ]
+    if lzma:
+        params_list += [
+            dict(name='lzma', level=0, buffer=buffer),
+            dict(name='lzma', level=6, buffer=buffer),
+            # we do not test lzma on level 9 because of the huge memory needs
+        ]
+    for params in params_list:
+        c = Compressor(**params)
+        assert data == c.decompress(c.compress(data))
+
+
--- a/borg/testsuite/hashindex.py
+++ b/borg/testsuite/hashindex.py
@ -6,6 +6,11 @@ from ..hashindex import NSIndex, ChunkIndex
 from . import BaseTestCase


+def H(x):
+    # make some 32byte long thing that depends on x
+    return bytes('%-0.32d' % x, 'ascii')
+
+
 class HashIndexTestCase(BaseTestCase):

    def _generic_test(self, cls, make_value, sha):
@ -78,3 +83,20 @@ class HashIndexTestCase(BaseTestCase):
        second_half = list(idx.iteritems(marker=all[49][0]))
        self.assert_equal(len(second_half), 50)
        self.assert_equal(second_half, all[50:])
+
+    def test_chunkindex_merge(self):
+        idx1 = ChunkIndex()
+        idx1[H(1)] = 1, 100, 100
+        idx1[H(2)] = 2, 200, 200
+        idx1[H(3)] = 3, 300, 300
+        # no H(4) entry
+        idx2 = ChunkIndex()
+        idx2[H(1)] = 4, 100, 100
+        idx2[H(2)] = 5, 200, 200
+        # no H(3) entry
+        idx2[H(4)] = 6, 400, 400
+        idx1.merge(idx2)
+        assert idx1[H(1)] == (5, 100, 100)
+        assert idx1[H(2)] == (7, 200, 200)
+        assert idx1[H(3)] == (3, 300, 300)
+        assert idx1[H(4)] == (6, 400, 400)
--- a/borg/testsuite/helpers.py
+++ b/borg/testsuite/helpers.py
@ -2,11 +2,13 @@ import hashlib
 from time import mktime, strptime
 from datetime import datetime, timezone, timedelta

+import pytest
+import sys
 import msgpack

-from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \
+from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, \
    prune_within, prune_split, \
-    StableDict, int_to_bigint, bigint_to_int, parse_timestamp
+    StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams
 from . import BaseTestCase


@ -22,42 +24,115 @@ class BigIntTestCase(BaseTestCase):
        self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70)


-class LocationTestCase(BaseTestCase):
+class TestLocationWithoutEnv:
+    def test_ssh(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('ssh://user@host:1234/some/path::archive')) == \
+               "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')"
+        assert repr(Location('ssh://user@host:1234/some/path')) == \
+               "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive=None)"

-    def test(self):
-        self.assert_equal(
-            repr(Location('ssh://user@host:1234/some/path::archive')),
-            "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('file:///some/path::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('user@host:/some/path::archive')),
-            "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('path::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('/some/absolute/path::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')"
-        )
-        self.assert_equal(
-            repr(Location('some/relative/path::archive')),
-            "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')"
-        )
-        self.assert_raises(ValueError, lambda: Location('ssh://localhost:22/path:archive'))
+    def test_file(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('file:///some/path::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')"
+        assert repr(Location('file:///some/path')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive=None)"

-    def test_canonical_path(self):
+    def test_scp(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('user@host:/some/path::archive')) == \
+               "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')"
+        assert repr(Location('user@host:/some/path')) == \
+               "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)"
+
+    def test_folder(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('path::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')"
+        assert repr(Location('path')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='path', archive=None)"
+
+    def test_abspath(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('/some/absolute/path::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')"
+        assert repr(Location('/some/absolute/path')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive=None)"
+
+    def test_relpath(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        assert repr(Location('some/relative/path::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')"
+        assert repr(Location('some/relative/path')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive=None)"
+
+    def test_underspecified(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        with pytest.raises(ValueError):
+            Location('::archive')
+        with pytest.raises(ValueError):
+            Location('::')
+        with pytest.raises(ValueError):
+            Location()
+
+    def test_no_double_colon(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
+        with pytest.raises(ValueError):
+            Location('ssh://localhost:22/path:archive')
+
+    def test_canonical_path(self, monkeypatch):
+        monkeypatch.delenv('BORG_REPO', raising=False)
        locations = ['some/path::archive', 'file://some/path::archive', 'host:some/path::archive',
                     'host:~user/some/path::archive', 'ssh://host/some/path::archive',
                     'ssh://user@host:1234/some/path::archive']
        for location in locations:
-            self.assert_equal(Location(location).canonical_path(),
-                              Location(Location(location).canonical_path()).canonical_path())
+            assert Location(location).canonical_path() == \
+                   Location(Location(location).canonical_path()).canonical_path()
+
+
+class TestLocationWithEnv:
+    def test_ssh(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'ssh://user@host:1234/some/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive=None)"
+
+    def test_file(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'file:///some/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive=None)"
+
+    def test_scp(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'user@host:/some/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)"
+
+    def test_folder(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='file', user=None, host=None, port=None, path='path', archive=None)"
+
+    def test_abspath(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', '/some/absolute/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive=None)"
+
+    def test_relpath(self, monkeypatch):
+        monkeypatch.setenv('BORG_REPO', 'some/relative/path')
+        assert repr(Location('::archive')) == \
+               "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')"
+        assert repr(Location()) == \
+               "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive=None)"


 class FormatTimedeltaTestCase(BaseTestCase):
@ -104,6 +179,103 @@ class PatternTestCase(BaseTestCase):
                          ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg'])


+@pytest.mark.skipif(sys.platform in ('darwin',), reason='all but OS X test')
+class PatternNonAsciiTestCase(BaseTestCase):
+    def testComposedUnicode(self):
+        pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert not i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+        assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert not e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+
+    def testDecomposedUnicode(self):
+        pattern = 'ba\N{COMBINING ACUTE ACCENT}'
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+        assert not e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+    
+    def testInvalidUnicode(self):
+        pattern = str(b'ba\x80', 'latin1')
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert not i.match("ba/foo")
+        assert i.match(str(b"ba\x80/foo", 'latin1'))
+        assert not e.match("ba/foo")
+        assert e.match(str(b"ba\x80/foo", 'latin1'))
+
+
+@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test')
+class OSXPatternNormalizationTestCase(BaseTestCase):
+    def testComposedUnicode(self):
+        pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+        assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+    
+    def testDecomposedUnicode(self):
+        pattern = 'ba\N{COMBINING ACUTE ACCENT}'
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+        assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+    
+    def testInvalidUnicode(self):
+        pattern = str(b'ba\x80', 'latin1')
+        i = IncludePattern(pattern)
+        e = ExcludePattern(pattern)
+
+        assert not i.match("ba/foo")
+        assert i.match(str(b"ba\x80/foo", 'latin1'))
+        assert not e.match("ba/foo")
+        assert e.match(str(b"ba\x80/foo", 'latin1'))
+
+
+def test_compression_specs():
+    with pytest.raises(ValueError):
+        CompressionSpec('')
+    assert CompressionSpec('0') == dict(name='zlib', level=0)
+    assert CompressionSpec('1') == dict(name='zlib', level=1)
+    assert CompressionSpec('9') == dict(name='zlib', level=9)
+    with pytest.raises(ValueError):
+        CompressionSpec('10')
+    assert CompressionSpec('none') == dict(name='none')
+    assert CompressionSpec('lz4') == dict(name='lz4')
+    assert CompressionSpec('zlib') == dict(name='zlib', level=6)
+    assert CompressionSpec('zlib,0') == dict(name='zlib', level=0)
+    assert CompressionSpec('zlib,9') == dict(name='zlib', level=9)
+    with pytest.raises(ValueError):
+        CompressionSpec('zlib,9,invalid')
+    assert CompressionSpec('lzma') == dict(name='lzma', level=6)
+    assert CompressionSpec('lzma,0') == dict(name='lzma', level=0)
+    assert CompressionSpec('lzma,9') == dict(name='lzma', level=9)
+    with pytest.raises(ValueError):
+        CompressionSpec('lzma,9,invalid')
+    with pytest.raises(ValueError):
+        CompressionSpec('invalid')
+
+
+def test_chunkerparams():
+    assert ChunkerParams('19,23,21,4095') == (19, 23, 21, 4095)
+    assert ChunkerParams('10,23,16,4095') == (10, 23, 16, 4095)
+    with pytest.raises(ValueError):
+        ChunkerParams('19,24,21,4095')
+
+
 class MakePathSafeTestCase(BaseTestCase):

    def test(self):
--- a/borg/testsuite/lrucache.py
+++ b/borg/testsuite/lrucache.py
@ -1,40 +1,52 @@
 from ..lrucache import LRUCache
-from . import BaseTestCase
+import pytest
+from tempfile import TemporaryFile


-class LRUCacheTestCase(BaseTestCase):
+class TestLRUCache:

-    def test(self):
-        c = LRUCache(2)
-        self.assert_equal(len(c), 0)
+    def test_lrucache(self):
+        c = LRUCache(2, dispose=lambda _: None)
+        assert len(c) == 0
+        assert c.items() == set()
        for i, x in enumerate('abc'):
            c[x] = i
-        self.assert_equal(len(c), 2)
-        self.assert_equal(set(c), set(['b', 'c']))
-        self.assert_equal(set(c.items()), set([('b', 1), ('c', 2)]))
-        self.assert_equal(False, 'a' in c)
-        self.assert_equal(True, 'b' in c)
-        self.assert_raises(KeyError, lambda: c['a'])
-        self.assert_equal(c['b'], 1)
-        self.assert_equal(c['c'], 2)
+        assert len(c) == 2
+        assert c.items() == set([('b', 1), ('c', 2)])
+        assert 'a' not in c
+        assert 'b' in c
+        with pytest.raises(KeyError):
+            c['a']
+        assert c['b'] == 1
+        assert c['c'] == 2
        c['d'] = 3
-        self.assert_equal(len(c), 2)
-        self.assert_equal(c['c'], 2)
-        self.assert_equal(c['d'], 3)
-        c['c'] = 22
-        c['e'] = 4
-        self.assert_equal(len(c), 2)
-        self.assert_raises(KeyError, lambda: c['d'])
-        self.assert_equal(c['c'], 22)
-        self.assert_equal(c['e'], 4)
+        assert len(c) == 2
+        assert c['c'] == 2
+        assert c['d'] == 3
        del c['c']
-        self.assert_equal(len(c), 1)
-        self.assert_raises(KeyError, lambda: c['c'])
-        self.assert_equal(c['e'], 4)
+        assert len(c) == 1
+        with pytest.raises(KeyError):
+            c['c']
+        assert c['d'] == 3
+        c.clear()
+        assert c.items() == set()

-    def test_pop(self):
-        c = LRUCache(2)
-        c[1] = 1
-        c[2] = 2
-        c.pop(1)
-        c[3] = 3
+    def test_dispose(self):
+        c = LRUCache(2, dispose=lambda f: f.close())
+        f1 = TemporaryFile()
+        f2 = TemporaryFile()
+        f3 = TemporaryFile()
+        c[1] = f1
+        c[2] = f2
+        assert not f2.closed
+        c[3] = f3
+        assert 1 not in c
+        assert f1.closed
+        assert 2 in c
+        assert not f2.closed
+        del c[2]
+        assert 2 not in c
+        assert f2.closed
+        c.clear()
+        assert c.items() == set()
+        assert f3.closed
--- a/borg/testsuite/mock.py
+++ b/borg/testsuite/mock.py
@ -1,14 +0,0 @@
-"""
-Mocking
-
-Note: unittest.mock is broken on at least python 3.3.6 and 3.4.0.
-      it silently ignores mistyped method names starting with assert_...,
-      does nothing and just succeeds.
-      The issue was fixed in the separately distributed "mock" lib, you
-      get an AttributeError there. So, always use that one!
-
-Details:
-
-http://engineeringblog.yelp.com/2015/02/assert_called_once-threat-or-menace.html
-"""
-from mock import *
--- a/borg/testsuite/repository.py
+++ b/borg/testsuite/repository.py
@ -2,13 +2,14 @@ import os
 import shutil
 import tempfile

+from mock import patch
+
 from ..hashindex import NSIndex
 from ..helpers import Location, IntegrityError
 from ..locking import UpgradableLock
 from ..remote import RemoteRepository, InvalidRPCMethod
 from ..repository import Repository
 from . import BaseTestCase
-from .mock import patch


 class RepositoryTestCaseBase(BaseTestCase):
--- a/borg/testsuite/run.py
+++ b/borg/testsuite/run.py
@ -1,11 +0,0 @@
-import unittest
-
-from . import TestLoader
-
-
-def main():
-    unittest.main(testLoader=TestLoader(), defaultTest='')
-
-
-if __name__ == '__main__':
-    main()
--- a/docs/_themes/local/sidebarusefullinks.html
+++ b/docs/_themes/local/sidebarusefullinks.html
@ -5,8 +5,11 @@
 <ul>
  <li><a href="https://borgbackup.github.io/borgbackup/">Main Web Site</a></li>
  <li><a href="https://pypi.python.org/pypi/borgbackup">PyPI packages</a></li>
+  <li><a href="https://github.com/borgbackup/borg/issues/147">Binary Packages</a></li>
+  <li><a href="https://github.com/borgbackup/borg/blob/master/CHANGES.rst">Current ChangeLog</a></li>
  <li><a href="https://github.com/borgbackup/borg">GitHub</a></li>
  <li><a href="https://github.com/borgbackup/borg/issues">Issue Tracker</a></li>
+  <li><a href="https://www.bountysource.com/teams/borgbackup">Bounties &amp; Fundraisers</a></li>
  <li><a href="http://librelist.com/browser/borgbackup/">Mailing List</a></li>
 </ul>

--- a/docs/_themes/local/static/favicon.ico
+++ b/docs/_themes/local/static/favicon.ico
--- a/docs/_themes/local/static/local.css_t
+++ b/docs/_themes/local/static/local.css_t
@ -31,7 +31,7 @@ div.documentwrapper {
  float: right;
  width: 760px;
  padding: 0 20px 20px 20px;
-  color: #00aa00;
+  color: #00cc00;
  background-color: #000000;
  margin-bottom: 2em;
 }
@ -48,7 +48,7 @@ div.sphinxsidebar {

 h1, h2, h3 {
  font-weight: normal;
-  color: #33dd33;
+  color: #33ff33;
 }

 h1 {
@ -99,12 +99,12 @@ div.sphinxsidebar a:link, div.sphinxsidebar a:visited {
 }

 div.sphinxsidebar {
-  color: #00aa00;
+  color: #00cc00;
  background: 0000000;
 }

 div.sphinxsidebar input {
-  color: #00cc00;
+  color: #00ff00;
  background: 0000000;
  border: 1px solid #444444;
 }
@ -171,4 +171,3 @@ div.seealso {
  border-radius: .4em;
  box-shadow: 2px 2px #dd6;
 }
-
--- a/docs/changes.rst
+++ b/docs/changes.rst
@ -0,0 +1,4 @@
+.. include:: global.rst.inc
+.. _changelog:
+
+.. include:: ../CHANGES.rst
--- a/docs/conf.py
+++ b/docs/conf.py
@ -11,13 +11,13 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.

-from borg import __version__ as sw_version
-
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#import sys, os
-#sys.path.insert(0, os.path.abspath('.'))
+import sys, os
+sys.path.insert(0, os.path.abspath('..'))
+
+from borg import __version__ as sw_version

 # -- General configuration -----------------------------------------------------

@ -42,7 +42,7 @@ master_doc = 'index'

 # General information about the project.
 project = 'Borg - Deduplicating Archiver'
-copyright = '2010-2014, Jonas Borgström'
+copyright = '2010-2014 Jonas Borgström, 2015 The Borg Collective (see AUTHORS file)'

 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@ -116,7 +116,7 @@ html_theme_path = ['_themes']
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-#html_favicon = None
+html_favicon = 'favicon.ico'

 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
@ -134,7 +134,7 @@ html_static_path = []
 # Custom sidebar templates, maps document names to template names.
 html_sidebars = {
    'index': ['sidebarlogo.html', 'sidebarusefullinks.html', 'searchbox.html'],
-    '**': ['sidebarlogo.html', 'localtoc.html', 'relations.html', 'sidebarusefullinks.html', 'searchbox.html']
+    '**': ['sidebarlogo.html', 'relations.html', 'searchbox.html', 'localtoc.html', 'sidebarusefullinks.html']
 }
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
--- a/docs/development.rst
+++ b/docs/development.rst
@ -0,0 +1,68 @@
+.. include:: global.rst.inc
+.. _development:
+
+Development
+===========
+
+This chapter will get you started with |project_name|' development.
+
+|project_name| is written in Python (with a little bit of Cython and C for
+the performance critical parts).
+
+
+Building a development environment
+----------------------------------
+
+First, just install borg into a virtual env as described before.
+
+To install some additional packages needed for running the tests, activate your
+virtual env and run::
+
+  pip install -r requirements.d/development.txt
+
+
+Running the tests
+-----------------
+
+The tests are in the borg/testsuite package.
+
+To run all the tests, you need to have fakeroot installed. If you do not have
+fakeroot, you still will be able to run most tests, just leave away the
+`fakeroot -u` from the given command lines.
+
+To run the test suite use the following command::
+
+  fakeroot -u tox  # run all tests
+
+Some more advanced examples::
+
+  # verify a changed tox.ini (run this after any change to tox.ini):
+  fakeroot -u tox --recreate
+
+  fakeroot -u tox -e py32  # run all tests, but only on python 3.2
+
+  fakeroot -u tox borg.testsuite.locking  # only run 1 test module
+
+  fakeroot -u tox borg.testsuite.locking -- -k '"not Timer"'  # exclude some tests
+
+  fakeroot -u tox borg.testsuite -- -v  # verbose py.test
+
+Important notes:
+
+- When using -- to give options to py.test, you MUST also give borg.testsuite[.module].
+
+Building the docs with Sphinx
+-----------------------------
+
+The documentation (in reStructuredText format, .rst) is in docs/.
+
+To build the html version of it, you need to have sphinx installed::
+
+  pip3 install sphinx
+
+Now run::
+
+  cd docs/
+  make html
+
+Then point a web browser at docs/_build/html/index.html.
--- a/docs/faq.rst
+++ b/docs/faq.rst
@ -86,6 +86,18 @@ If it crashes with a UnicodeError, what can I do?

        export LANG=en_US.UTF-8  # or similar, important is correct charset

+I can't extract non-ascii filenames by giving them on the commandline on OS X!?
+    This is due to different ways to represent some characters in unicode.
+    HFS+ likes the decomposed form while the commandline seems to be the composed
+    form usually. If you run into that, for now maybe just try:
+
+    - avoiding the non-ascii characters on the commandline by e.g. extracting
+      the parent directory (or even everything)
+    - try to enter the composed form on the commandline
+    - mount the repo using FUSE and use some file manager
+
+    See issue #143 on the issue tracker for more about this.
+
 If I want to run |project_name| on a ARM CPU older than ARM v6?
    You need to enable the alignment trap handler to fixup misaligned accesses::
    
@ -97,7 +109,8 @@ Can |project_name| add redundancy to the backup data to deal with hardware malfu
    of low-level storage layout information and control which we do not have (and also can't
    get, even if we wanted).

-    So, if you need that, consider RAID1 or a filesystems that offers redundant storage.
+    So, if you need that, consider RAID1 or a filesystem that offers redundant storage
+    or just make 2 backups to different locations / different hardware.

 Can |project_name| verify data integrity of a backup archive?
    Yes, if you want to detect accidental data damage (like bit rot), use the ``check``
--- a/docs/foreword.rst
+++ b/docs/foreword.rst
@ -1,65 +0,0 @@
-.. include:: global.rst.inc
-.. _foreword:
-
-Foreword
-========
-
-|project_name| is a secure backup program for Linux, FreeBSD and Mac OS X. 
-|project_name| is designed for efficient data storage where only new or
-modified data is stored.
-
-Features
--------
-
-Space efficient storage
-    Variable block size `deduplication`_ is used to reduce the number of bytes 
-    stored by detecting redundant data. Each file is split into a number of
-    variable length chunks and only chunks that have never been seen before
-    are added to the repository (and optionally compressed).
-
-Optional data encryption
-    All data can be protected using 256-bit AES_ encryption and data integrity
-    and authenticity is verified using `HMAC-SHA256`_.
-
-Off-site backups
-    |project_name| can store data on any remote host accessible over SSH as
-    long as |project_name| is installed. If you don't have |project_name|
-    installed there, you can use some network filesytem (sshfs, nfs, ...)
-    to mount a filesystem located on your remote host and use it like it was
-    local (but that will be slower).
-
-Backups mountable as filesystems
-    Backup archives are :ref:`mountable <borg_mount>` as
-    `userspace filesystems`_ for easy backup verification and restores.
-
-
-Glossary
--------
-
-.. _deduplication_def:
-
-Deduplication
-    Deduplication is a technique for improving storage utilization by
-    eliminating redundant data. 
-
-.. _archive_def:
-
-Archive
-    An archive is a collection of files along with metadata that include file
-    permissions, directory structure and various file attributes.
-    Since each archive in a repository must have a unique name a good naming
-    convention is ``hostname-YYYY-MM-DD``.
-
-.. _repository_def:
-
-Repository
-    A repository is a filesystem directory storing data from zero or more
-    archives. The data in a repository is both deduplicated and 
-    optionally encrypted making it both efficient and safe. Repositories are
-    created using :ref:`borg_init` and the contents can be listed using
-    :ref:`borg_list`.
-
-Key file
-    When a repository is initialized a key file containing a password
-    protected encryption key is created. It is vital to keep this file safe
-    since the repository data is totally inaccessible without it.
--- a/docs/global.rst.inc
+++ b/docs/global.rst.inc
@ -13,6 +13,7 @@
 .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2
 .. _ACL: https://en.wikipedia.org/wiki/Access_control_list
 .. _libacl: http://savannah.nongnu.org/projects/acl/
+.. _liblz4: https://github.com/Cyan4973/lz4
 .. _OpenSSL: https://www.openssl.org/
 .. _Python: http://www.python.org/
 .. _Buzhash: https://en.wikipedia.org/wiki/Buzhash
--- a/docs/index.rst
+++ b/docs/index.rst
@ -1,81 +1,18 @@
 .. include:: global.rst.inc

-Welcome to Borg
-================
-|project_name| is a deduplicating backup program.
-Optionally, it also supports compression and authenticated encryption.

-The main goal of |project_name| is to provide an efficient and secure way
-to backup data. The data deduplication technique used makes |project_name|
-suitable for daily backups since only the changes are stored. The authenticated
-encryption makes it suitable for backups to not fully trusted targets.
-
-|project_name| is written in Python (with a little bit of Cython and C for
-the performance critical parts).
-
-
-Easy to use
-----------
-Initialize a new backup :ref:`repository <repository_def>` and create your
-first backup :ref:`archive <archive_def>` in two lines::
-
-    $ borg init /mnt/backup
-    $ borg create /mnt/backup::Monday ~/Documents
-    $ borg create --stats /mnt/backup::Tuesday ~/Documents
-    Archive name: Tuesday
-    Archive fingerprint: 387a5e3f9b0e792e91ce87134b0f4bfe17677d9248cb5337f3fbf3a8e157942a
-    Start time: Tue Mar 25 12:00:10 2014
-    End time:   Tue Mar 25 12:00:10 2014
-    Duration: 0.08 seconds
-    Number of files: 358
-                           Original size      Compressed size    Deduplicated size
-    This archive:               57.16 MB             46.78 MB            151.67 kB
-    All archives:              114.02 MB             93.46 MB             44.81 MB
-
-See the :ref:`quickstart` chapter for a more detailed example.
-
-Easy installation
-----------------
-You can use pip to install |project_name| quickly and easily::
-
-    $ pip3 install borgbackup
-
-Need more help with installing? See :ref:`installation`.
-
-User's Guide
-============
+Borg Documentation
+==================

 .. toctree::
   :maxdepth: 2

-   foreword
+   intro
   installation
   quickstart
   usage
   faq
+   support
+   changes
   internals
-
-Getting help
-============
-
-If you've found a bug or have a concrete feature request, please create a new
-ticket on the project's `issue tracker`_ (after checking whether someone else
-already has reported the same thing).
-
-For more general questions or discussions, IRC or mailing list are preferred.
-
-IRC
---
-Join us on channel #borgbackup on chat.freenode.net. As usual on IRC, just
-ask or tell directly and then patiently wait for replies. Stay connected.
-
-Mailing list
------------
-
-There is a mailing list for Borg on librelist_ that you can use for feature
-requests and general discussions about Borg. A mailing list archive is
-available `here <http://librelist.com/browser/borgbackup/>`_.
-
-To subscribe to the list, send an email to borgbackup@librelist.com and reply
-to the confirmation mail. Likewise, to unsubscribe, send an email to 
-borgbackup-unsubscribe@librelist.com and reply to the confirmation mail.
+   development
--- a/docs/installation.rst
+++ b/docs/installation.rst
@ -9,20 +9,44 @@ Installation
 * Python_ >= 3.2
 * OpenSSL_ >= 1.0.0
 * libacl_
+* liblz4_
 * some python dependencies, see install_requires in setup.py

 General notes
 -------------
-Even though Python 3 is not the default Python version on many systems, it is
-usually available as an optional install.
+You need to do some platform specific preparation steps (to install libraries
+and tools) followed by the generic installation of |project_name| itself:
+
+Below, we describe different ways to install |project_name|.
+
+- **dist package** - easy and fast, needs a distribution and platform specific
+  binary package (for your Linux/*BSD/OS X/... distribution).
+- **wheel** - easy and fast, needs a platform specific borgbackup binary wheel,
+  which matches your platform [OS and CPU]).
+- **pypi** - installing a source package from pypi needs more installation steps
+  and will compile stuff - try this if there is no binary wheel that works for
+  you.
+- **git** - for developers and power users who want to have the latest code or
+  use revision control (each release is tagged).
+
+**Python 3**: Even though this is not the default Python version on many systems,
+it is usually available as an optional install.

 Virtualenv_ can be used to build and install |project_name| without affecting
 the system Python or requiring root access.

+Important:
+If you install into a virtual environment, you need to **activate**
+the virtual env first (``source borg-env/bin/activate``).
+Alternatively, directly run ``borg-env/bin/borg`` (or symlink that into some
+directory that is in your PATH so you can just run ``borg``).
+Using a virtual environment is optional, but recommended except for the most
+simple use cases.
+
 The llfuse_ python package is also required if you wish to mount an
 archive as a FUSE filesystem. Only FUSE >= 2.8.0 can support llfuse.

-You only need Cython to compile the .pyx files to the respective .c files
+You only need **Cython** to compile the .pyx files to the respective .c files
 when using |project_name| code from git. For |project_name| releases, the .c
 files will be bundled, so you won't need Cython to install a release.

@ -35,17 +59,57 @@ Mac OS X: You may need to get a recent enough OpenSSL version from homebrew_.
 Mac OS X: You need OS X FUSE >= 3.0.


-Debian / Ubuntu installation (from git)
---------------------------------------
-Note: this uses latest, unreleased development code from git.
-While we try not to break master, there are no guarantees on anything.
+Installation (dist package)
+---------------------------
+Some Linux, BSD and OS X distributions might offer a ready-to-use
+`borgbackup` package (which can be easily installed in the usual way).

-Some of the steps detailled below might be useful also for non-git installs.
+As |project_name| is still relatively new, such a package might be not
+available for your system yet. Please ask package maintainers to build a
+package or, if you can package / submit it yourself, please help us with
+that!
+
+If a package is available, it might be interesting for you to check its version
+and compare that to our latest release and review the change log (see links on
+our web site).
+
+
+Debian Jessie / Ubuntu 14.04 preparations (wheel)
+-------------------------------------------------

 .. parsed-literal::

-    # Python 3.x (>= 3.2) + Headers, Py Package Installer
-    apt-get install python3 python3-dev python3-pip
+    # Python stuff we need
+    apt-get install python3 python3-pip
+
+    # Libraries we need (fuse is optional)
+    apt-get install openssl libacl1 liblz4-1 fuse
+
+
+Installation (wheel)
+--------------------
+
+This uses the latest binary wheel release.
+
+.. parsed-literal::
+
+    # Check https://github.com/borgbackup/borg/issues/147 for the correct
+    # platform-specific binary wheel, download and install it:
+
+    # system-wide installation, needs sudo/root permissions:
+    sudo pip install borgbackup.whl
+
+    # home directory installation, no sudo/root needed:
+    pip install --user borgbackup.whl
+
+
+Debian Jessie / Ubuntu 14.04 preparations (git/pypi)
+----------------------------------------------------
+
+.. parsed-literal::
+
+    # Python 3.x (>= 3.2) + Headers, Py Package Installer, VirtualEnv
+    apt-get install python3 python3-dev python3-pip python-virtualenv

    # we need OpenSSL + Headers for Crypto
    apt-get install libssl-dev openssl
@ -53,97 +117,120 @@ Some of the steps detailled below might be useful also for non-git installs.
    # ACL support Headers + Library
    apt-get install libacl1-dev libacl1

+    # lz4 super fast compression support Headers + Library
+    apt-get install liblz4-dev liblz4-1
+
    # if you do not have gcc / make / etc. yet
    apt-get install build-essential

-    # optional: lowlevel FUSE py binding - to mount backup archives
-    apt-get install python3-llfuse fuse
+    # optional: FUSE support - to mount backup archives
+    # in case you get complaints about permission denied on /etc/fuse.conf:
+    # on ubuntu this means your user is not in the "fuse" group. just add
+    # yourself there, log out and log in again.
+    apt-get install libfuse-dev fuse pkg-config

    # optional: for unit testing
    apt-get install fakeroot

-    # get |project_name| from github, install it
-    git clone |git_url|

-    apt-get install python-virtualenv
-    virtualenv --python=python3 borg-env
-    source borg-env/bin/activate   # always before using!
-
-    # install borg + dependencies into virtualenv
-    pip install cython  # compile .pyx -> .c
-    pip install tox pytest  # optional, for running unit tests
-    pip install sphinx  # optional, to build the docs
-    cd borg
-    pip install -e .  # in-place editable mode
-
-    # optional: run all the tests, on all supported Python versions
-    fakeroot -u tox
-
-
-Korora / Fedora 21 installation (from git)
+Korora / Fedora 21 preparations (git/pypi)
 ------------------------------------------
-Note: this uses latest, unreleased development code from git.
-While we try not to break master, there are no guarantees on anything.
-
-Some of the steps detailled below might be useful also for non-git installs.

 .. parsed-literal::
-    # Python 3.x (>= 3.2) + Headers, Py Package Installer
-    sudo dnf install python3 python3-devel python3-pip
+
+    # Python 3.x (>= 3.2) + Headers, Py Package Installer, VirtualEnv
+    sudo dnf install python3 python3-devel python3-pip python3-virtualenv

    # we need OpenSSL + Headers for Crypto
    sudo dnf install openssl-devel openssl

    # ACL support Headers + Library
    sudo dnf install libacl-devel libacl
-    
-    # optional: lowlevel FUSE py binding - to mount backup archives
-    sudo dnf install python3-llfuse fuse
+
+    # lz4 super fast compression support Headers + Library
+    sudo dnf install lz4-devel
+
+    # optional: FUSE support - to mount backup archives
+    sudo dnf install fuse-devel fuse pkgconfig
    
    # optional: for unit testing
    sudo dnf install fakeroot
-    
-    # get |project_name| from github, install it
-    git clone |git_url|
-
-    dnf install python3-virtualenv
-    virtualenv --python=python3 borg-env
-    source borg-env/bin/activate   # always before using!
-
-    # install borg + dependencies into virtualenv
-    pip install cython  # compile .pyx -> .c
-    pip install tox pytest  # optional, for running unit tests
-    pip install sphinx  # optional, to build the docs
-    cd borg
-    pip install -e .  # in-place editable mode
-
-    # optional: run all the tests, on all supported Python versions
-    fakeroot -u tox


-Cygwin (from git)
-----------------
-Please note that running under cygwin is rather experimental.
+Cygwin preparations (git/pypi)
+------------------------------
+
+Please note that running under cygwin is rather experimental, stuff has been
+tested with CygWin (x86-64) v2.1.0.

 You'll need at least (use the cygwin installer to fetch/install these):

 ::
-    python3
-    python3-setuptools
-    python3-cython
-    binutils
-    gcc-core
-    git
-    libopenssl
-    make
-    openssh
-    openssl-devel
+
+    python3 python3-setuptools
+    python3-cython  # not needed for releases
+    binutils gcc-core
+    libopenssl openssl-devel
+    liblz4_1 liblz4-devel  # from cygwinports.org
+    git make openssh

 You can then install ``pip`` and ``virtualenv``:

 ::

-    easy_install pip
+    easy_install-3.4 pip
    pip install virtualenv

-And now continue as for Linux (see above).
+And now continue with the generic installation (see below).
+
+In case that creation of the virtual env fails, try deleting this file:
+
+::
+
+    /usr/lib/python3.4/__pycache__/platform.cpython-34.pyc
+
+
+Installation (pypi)
+-------------------
+
+This uses the latest (source package) release from PyPi.
+
+.. parsed-literal::
+
+    virtualenv --python=python3 borg-env
+    source borg-env/bin/activate   # always before using!
+
+    # install borg + dependencies into virtualenv
+    pip install 'llfuse<0.41'  # optional, for FUSE support
+                               # 0.41 and 0.41.1 have unicode issues at install time
+    pip install borgbackup
+
+Note: we install into a virtual environment here, but this is not a requirement.
+
+
+Installation (git)
+------------------
+
+This uses latest, unreleased development code from git.
+While we try not to break master, there are no guarantees on anything.
+
+.. parsed-literal::
+
+    # get |project_name| from github, install it
+    git clone |git_url|
+
+    virtualenv --python=python3 borg-env
+    source borg-env/bin/activate   # always before using!
+
+    # install borg + dependencies into virtualenv
+    pip install sphinx  # optional, to build the docs
+    pip install 'llfuse<0.41'  # optional, for FUSE support
+                               # 0.41 and 0.41.1 have unicode issues at install time
+    cd borg
+    pip install -r requirements.d/development.txt
+    pip install -e .  # in-place editable mode
+
+    # optional: run all the tests, on all supported Python versions
+    fakeroot -u tox
+
+Note: as a developer or power user, you always want to use a virtual environment.
--- a/docs/internals.rst
+++ b/docs/internals.rst
@ -8,7 +8,6 @@ This page documents the internal data structures and storage
 mechanisms of |project_name|. It is partly based on `mailing list
 discussion about internals`_ and also on static code analysis.

-It may not be exactly up to date with the current source code.

 Repository and Archives
 -----------------------
@ -41,6 +40,32 @@ lock.roster and lock.exclusive/*
  used by the locking system to manage shared and exclusive locks


+Lock files
+----------
+
+|project_name| uses locks to get (exclusive or shared) access to the cache and
+the repository.
+
+The locking system is based on creating a directory `lock.exclusive` (for
+exclusive locks). Inside the lock directory, there is a file indication
+hostname, process id and thread id of the lock holder.
+
+There is also a json file `lock.roster` that keeps a directory of all shared
+and exclusive lockers.
+
+If the process can create the `lock.exclusive` directory for a resource, it has
+the lock for it. If creation fails (because the directory has already been
+created by some other process), lock acquisition fails.
+
+The cache lock is usually in `~/.cache/borg/REPOID/lock.*`.
+The repository lock is in `repository/lock.*`.
+
+In case you run into troubles with the locks, you can just delete the `lock.*`
+directory and file IF you first make sure that no |project_name| process is
+running on any machine that accesses this resource. Be very careful, the cache
+or repository might get damaged if multiple processes use it at the same time.
+
+
 Config file
 -----------

@ -125,6 +150,9 @@ Each archive info contains:
 It is the last object stored, in the last segment, and is replaced
 each time.

+The Archive
+-----------
+
 The archive metadata does not contain the file items directly. Only
 references to other objects that contain that data. An archive is an
 object that contains:
@ -137,6 +165,10 @@ object that contains:
 * username
 * time

+
+The Item
+--------
+
 Each item represents a file, directory or other fs item and is stored as an
 ``item`` dictionary that contains:

@ -194,7 +226,7 @@ what files you have based on a specific set of chunk sizes).
 Indexes / Caches
 ----------------

-The files cache is stored in ``cache/files`` and is indexed on the
+The **files cache** is stored in ``cache/files`` and is indexed on the
 ``file path hash``. At backup time, it is used to quickly determine whether we
 need to chunk a given file (or whether it is unchanged and we already have all
 its pieces).
@ -213,7 +245,7 @@ archives in different setups.
 The files cache is stored as a python associative array storing
 python objects, which generates a lot of overhead.

-The chunks cache is stored in ``cache/chunks`` and is indexed on the
+The **chunks cache** is stored in ``cache/chunks`` and is indexed on the
 ``chunk id_hash``. It is used to determine whether we already have a specific
 chunk, to count references to it and also for statistics.
 It contains:
@ -222,7 +254,7 @@ It contains:
 * size
 * encrypted/compressed size

-The repository index is stored in ``repo/index.%d`` and is indexed on the
+The **repository index** is stored in ``repo/index.%d`` and is indexed on the
 ``chunk id_hash``. It is used to determine a chunk's location in the repository.
 It contains:

@ -382,10 +414,35 @@ representation of the repository id.
 Compression
 -----------

-|project_name| currently always pipes all data through a zlib compressor which
-supports compression levels 0 (no compression, fast) to 9 (high compression, slow).
+|project_name| supports the following compression methods:
+
+- none (no compression, pass through data 1:1)
+- lz4 (low compression, but super fast)
+- zlib (level 0-9, level 0 is no compression [but still adding zlib overhead],
+  level 1 is low, level 9 is high compression)
+- lzma (level 0-9, level 0 is low, level 9 is high compression).
+
+Speed:  none > lz4 > zlib > lzma
+Compression: lzma > zlib > lz4 > none
+
+Be careful, higher zlib and especially lzma compression levels might take a
+lot of resources (CPU and memory).
+
+The overall speed of course also depends on the speed of your target storage.
+If that is slow, using a higher compression level might yield better overall
+performance. You need to experiment a bit. Maybe just watch your CPU load, if
+that is relatively low, increase compression until 1 core is 70-100% loaded.
+
+Even if your target storage is rather fast, you might see interesting effects:
+while doing no compression at all (none) is a operation that takes no time, it
+likely will need to store more data to the storage compared to using lz4.
+The time needed to transfer and store the additional data might be much more
+than if you had used lz4 (which is super fast, but still might compress your
+data about 2:1). This is assuming your data is compressible (if you backup
+already compressed data, trying to compress them at backup time is usually
+pointless).
+
+Compression is applied after deduplication, thus using different compression
+methods in one repo does not influence deduplication.

 See ``borg create --help`` about how to specify the compression level and its default.
-
-Note: zlib level 0 creates a little bit more output data than it gets as input,
-due to zlib protocol overhead.
--- a/docs/intro.rst
+++ b/docs/intro.rst
@ -0,0 +1,7 @@
+.. include:: global.rst.inc
+.. _foreword:
+
+Introduction
+============
+
+.. include:: ../README.rst
--- a/docs/misc/create_compression.txt
+++ b/docs/misc/create_compression.txt
@ -1,130 +0,0 @@
-data compression
-================
-
-borg create --compression N repo::archive data
-
-Currently, borg only supports zlib compression. There are plans to expand this
-to other, faster or better compression algorithms in the future.
-
-N == 0 -> zlib level 0 == very quick, no compression
-N == 1 -> zlib level 1 == quick, low compression
-...
-N == 9 -> zlib level 9 == slow, high compression
-
-Measurements made on a Haswell Ultrabook, SSD storage, Linux.
-
-
-Example 1: lots of relatively small text files (linux kernel src)
-----------------------------------------------------------------
-
-N == 1 does a good job here, it saves the additional time needed for
-compression because it needs to store less into storage (see N == 0).
-
-N == 6 is also quite ok, a little slower, a little less repo size.
-6 was the old default of borg.
-
-High compression levels only give a little more compression, but take a lot
-of cpu time.
-
-$ borg create --stats --compression 0
------------------------------------------------------------------------------ 
-Duration: 50.40 seconds
-Number of files: 72890
-
-                       Original size      Compressed size    Deduplicated size
-This archive:                1.17 GB              1.18 GB              1.01 GB
-
-                       Unique chunks         Total chunks
-Chunk index:                   70263                82309
------------------------------------------------------------------------------ 
-
-$ borg create --stats --compression 1
------------------------------------------------------------------------------ 
-Duration: 49.29 seconds
-Number of files: 72890
-
-                       Original size      Compressed size    Deduplicated size
-This archive:                1.17 GB            368.62 MB            295.22 MB
-
-                       Unique chunks         Total chunks
-Chunk index:                   70280                82326
------------------------------------------------------------------------------
-
-$ borg create --stats --compression 5
------------------------------------------------------------------------------ 
-Duration: 59.99 seconds
-Number of files: 72890
-
-                       Original size      Compressed size    Deduplicated size
-This archive:                1.17 GB            331.70 MB            262.20 MB
-
-                       Unique chunks         Total chunks
-Chunk index:                   70290                82336
------------------------------------------------------------------------------
-
-$ borg create --stats --compression 6
------------------------------------------------------------------------------ 
-Duration: 1 minutes 13.64 seconds
-Number of files: 72890
-
-                       Original size      Compressed size    Deduplicated size
-This archive:                1.17 GB            328.79 MB            259.56 MB
-
-                       Unique chunks         Total chunks
-Chunk index:                   70279                82325
------------------------------------------------------------------------------
-
-$ borg create --stats --compression 9
------------------------------------------------------------------------------
-Duration: 3 minutes 1.58 seconds
-Number of files: 72890
-
-                       Original size      Compressed size    Deduplicated size
-This archive:                1.17 GB            326.57 MB            257.57 MB
-
-                       Unique chunks         Total chunks
-Chunk index:                   70292                82338
------------------------------------------------------------------------------
-
-
-Example 2: large VM disk file (sparse file)
-------------------------------------------
-
-The file's directory size is 80GB, but a lot of it is sparse (and reads as
-zeros).
-
-$ borg create --stats --compression 0
------------------------------------------------------------------------------
-Duration: 13 minutes 48.47 seconds
-Number of files: 1
-
-                       Original size      Compressed size    Deduplicated size
-This archive:               80.54 GB             80.55 GB             10.87 GB
-
-                       Unique chunks         Total chunks
-Chunk index:                  147307               177109
------------------------------------------------------------------------------
-
-$ borg create --stats --compression 1
------------------------------------------------------------------------------
-Duration: 15 minutes 31.34 seconds
-Number of files: 1
-
-                       Original size      Compressed size    Deduplicated size
-This archive:               80.54 GB              6.68 GB              5.67 GB
-
-                       Unique chunks         Total chunks
-Chunk index:                  147309               177111
------------------------------------------------------------------------------
-
-$ borg create --stats --compression 6
------------------------------------------------------------------------------
-Duration: 18 minutes 57.54 seconds
-Number of files: 1
-
-                       Original size      Compressed size    Deduplicated size
-This archive:               80.54 GB              6.19 GB              5.44 GB
-
-                       Unique chunks         Total chunks
-Chunk index:                  147307               177109
------------------------------------------------------------------------------
--- a/docs/quickstart.rst
+++ b/docs/quickstart.rst
@ -89,6 +89,31 @@ certain number of old archives::
    # and 6 monthly archives.
    borg prune -v $REPOSITORY --keep-daily=7 --keep-weekly=4 --keep-monthly=6

+.. backup_compression:
+
+Backup compression
+------------------
+
+Default is no compression, but we support different methods with high speed
+or high compression:
+
+If you have a quick repo storage and you want a little compression:
+
+    $ borg create --compression lz4 /mnt/backup::repo ~
+
+If you have a medium fast repo storage and you want a bit more compression (N=0..9,
+0 means no compression, 9 means high compression):
+
+    $ borg create --compression zlib,N /mnt/backup::repo ~
+
+If you have a very slow repo storage and you want high compression (N=0..9, 0 means
+low compression, 9 means high compression):
+
+    $ borg create --compression lzma,N /mnt/backup::repo ~
+
+You'll need to experiment a bit to find the best compression for your use case.
+Keep an eye on CPU load and throughput.
+
 .. _encrypted_repos:

 Repository encryption
@ -96,7 +121,7 @@ Repository encryption

 Repository encryption is enabled at repository creation time::

-    $ borg init --encryption=passphrase|keyfile PATH
+    $ borg init --encryption=repokey|keyfile PATH

 When repository encryption is enabled all data is encrypted using 256-bit AES_
 encryption and the integrity and authenticity is verified using `HMAC-SHA256`_.
@ -105,28 +130,29 @@ All data is encrypted before being written to the repository. This means that
 an attacker who manages to compromise the host containing an encrypted
 archive will not be able to access any of the data.

-|project_name| supports two different methods to derive the AES and HMAC keys.
+|project_name| supports different methods to store the AES and HMAC keys.

-Passphrase based encryption
-    This method uses a user supplied passphrase to derive the keys using the
-    PBKDF2_ key derivation function. This method is convenient to use since
-    there is no key file to keep track of and secure as long as a *strong*
-    passphrase is used.
+``repokey`` mode
+    The key is stored inside the repository (in its "config" file).
+    Use this mode if you trust in your good passphrase giving you enough
+    protection.

-    .. Note::
-        For automated backups the passphrase can be specified using the
-        `BORG_PASSPHRASE` environment variable.
+``keyfile`` mode
+    The key is stored on your local disk (in ``~/.borg/keys/``).
+    Use this mode if you want "passphrase and having-the-key" security.

-Key file based encryption
-    This method generates random keys at repository initialization time that
-    are stored in a password protected file in the ``~/.borg/keys/`` directory.
-    The key file is a printable text file. This method is secure and suitable
-    for automated backups.
+In both modes, the key is stored in encrypted form and can be only decrypted
+by providing the correct passphrase.

-    .. Note::
-        The repository data is totally inaccessible without the key file
-        so it must be kept **safe**.
+For automated backups the passphrase can be specified using the
+`BORG_PASSPHRASE` environment variable.

+**The repository data is totally inaccessible without the key:**
+    Make a backup copy of the key file (``keyfile`` mode) or repo config
+    file (``repokey`` mode) and keep it at a safe place, so you still have
+    the key in case it gets corrupted or lost.
+    The backup that is encrypted with that key won't help you with that,
+    of course.

 .. _remote_repos:

@ -159,6 +185,3 @@ mounting the remote filesystem, for example, using sshfs::
  $ borg init /mnt/backup
  $ fusermount -u /mnt

-However, be aware that sshfs doesn't fully implement POSIX locks, so
-you must be sure to not have two processes trying to access the same
-repository at the same time.
--- a/docs/support.rst
+++ b/docs/support.rst
@ -0,0 +1,57 @@
+.. include:: global.rst.inc
+.. _support:
+
+Support
+=======
+
+Please first read the docs and existing issue tracker issues and mailing
+list posts, a lot of stuff is already documented / explained / discussed /
+filed there.
+
+Issue Tracker
+-------------
+
+If you've found a bug or have a concrete feature request, please create a new
+ticket on the project's `issue tracker`_.
+
+For more general questions or discussions, IRC or mailing list are preferred.
+
+IRC
+---
+Join us on channel #borgbackup on chat.freenode.net.
+
+As usual on IRC, just ask or tell directly and then patiently wait for replies.
+Stay connected.
+
+Mailing list
+------------
+
+There is a mailing list for Borg on librelist_ that you can use for feature
+requests and general discussions about Borg. A mailing list archive is
+available `here <http://librelist.com/browser/borgbackup/>`_.
+
+To subscribe to the list, send an email to borgbackup@librelist.com and reply
+to the confirmation mail.
+
+To unsubscribe, send an email to borgbackup-unsubscribe@librelist.com and reply
+to the confirmation mail.
+
+Bounties and Fundraisers
+------------------------
+
+We use `BountySource <https://www.bountysource.com/teams/borgbackup>`_ to allow
+monetary contributions to the project and the developers, who push it forward.
+
+There, you can give general funds to the borgbackup members (the developers will
+then spend the funds as they deem fit). If you do not have some specific bounty
+(see below), you can use this as a general way to say "Thank You!" and support
+the software / project you like.
+
+If you want to encourage developers to fix some specific issue or implement some
+specific feature suggestion, you can post a new bounty or back an existing one
+(they always refer to an issue in our `issue tracker`_).
+
+As a developer, you can become a Bounty Hunter and win bounties (earn money) by
+contributing to |project_name|, a free and open source software project.
+
+We might also use BountySource to fund raise for some bigger goals.
--- a/docs/usage.rst
+++ b/docs/usage.rst
@ -41,22 +41,36 @@ Environment Variables

 |project_name| uses some environment variables for automation:

-::
+General:
+    BORG_REPO
+        When set, use the value to give the default repository location. If a command needs an archive
+        parameter, you can abbreviate as `::archive`. If a command needs a repository parameter, you
+        can either leave it away or abbreviate as `::`, if a positional parameter is required.
+    BORG_PASSPHRASE
+        When set, use the value to answer the passphrase question for encrypted repositories.
+    TMPDIR
+        where temporary files are stored (might need a lot of temporary space for some operations)

-    Specifying a passphrase:
-        BORG_PASSPHRASE : When set, use the value to answer the passphrase question for encrypted repositories.
+Some "yes" sayers (if set, they automatically confirm that you really want to do X even if there is that warning):
+    BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK
+        For "Warning: Attempting to access a previously unknown unencrypted repository"
+    BORG_RELOCATED_REPO_ACCESS_IS_OK
+        For "Warning: The repository at location ... was previously located at ..."
+    BORG_CHECK_I_KNOW_WHAT_I_AM_DOING
+        For "Warning: 'check --repair' is an experimental feature that might result in data loss."

-    Some "yes" sayers (if set, they automatically confirm that you really want to do X even if there is that warning):
-        BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK : For "Warning: Attempting to access a previously unknown unencrypted repository"
-        BORG_RELOCATED_REPO_ACCESS_IS_OK : For "Warning: The repository at location ... was previously located at ..."
-        BORG_CHECK_I_KNOW_WHAT_I_AM_DOING : For "Warning: 'check --repair' is an experimental feature that might result in data loss."
+Directories:
+    BORG_KEYS_DIR
+        Default to '~/.borg/keys'. This directory contains keys for encrypted repositories.
+    BORG_CACHE_DIR
+        Default to '~/.cache/borg'. This directory contains the local cache and might need a lot
+        of space for dealing with big repositories).

-    Directories:
-        BORG_KEYS_DIR : Default to '~/.borg/keys'. This directory contains keys for encrypted repositories.
-        BORG_CACHE_DIR : Default to '~/.cache/borg'. This directory contains the local cache.
-
-    Building:
-        BORG_OPENSSL_PREFIX : Adds given OpenSSL header file directory to the default locations (setup.py).
+Building:
+    BORG_OPENSSL_PREFIX
+        Adds given OpenSSL header file directory to the default locations (setup.py).
+    BORG_LZ4_PREFIX
+        Adds given LZ4 header file directory to the default locations (setup.py).


 Please note:
@ -66,6 +80,52 @@ Please note:
  (e.g. mode 600, root:root).


+Resource Usage
+--------------
+
+|project_name| might use a lot of resources depending on the size of the data set it is dealing with.
+
+CPU:
+    It won't go beyond 100% of 1 core as the code is currently single-threaded.
+    Especially higher zlib and lzma compression levels use significant amounts
+    of CPU cycles.
+
+Memory (RAM):
+    The chunks index and the files index are read into memory for performance
+    reasons.
+    Compression, esp. lzma compression with high levels might need substantial
+    amounts of memory.
+
+Temporary files:
+    Reading data and metadata from a FUSE mounted repository will consume about
+    the same space as the deduplicated chunks used to represent them in the
+    repository.
+
+Cache files:
+    Contains the chunks index and files index (plus a compressed collection of
+    single-archive chunk indexes).
+
+Chunks index:
+    Proportional to the amount of data chunks in your repo. Lots of small chunks
+    in your repo imply a big chunks index. You may need to tweak the chunker
+    params (see create options) if you have a lot of data and you want to keep
+    the chunks index at some reasonable size.
+
+Files index:
+    Proportional to the amount of files in your last backup. Can be switched
+    off (see create options), but next backup will be much slower if you do.
+
+Network:
+    If your repository is remote, all deduplicated (and optionally compressed/
+    encrypted) data of course has to go over the connection (ssh: repo url).
+    If you use a locally mounted network filesystem, additionally some copy
+    operations used for transaction support also go over the connection. If
+    you backup multiple sources to one target repository, additional traffic
+    happens for cache resynchronization.
+
+In case you are interested in more details, please read the internals documentation.
+
+
 .. include:: usage/init.rst.inc

 Examples
@ -139,6 +199,26 @@ Examples
    # Backup huge files with little chunk management overhead
    $ borg create --chunker-params 19,23,21,4095 /mnt/backup::VMs /srv/VMs

+    # Backup a raw device (must not be active/in use/mounted at that time)
+    $ dd if=/dev/sda bs=10M | borg create /mnt/backup::my-sda -
+
+    # No compression (default)
+    $ borg create /mnt/backup::repo ~
+
+    # Super fast, low compression
+    $ borg create --compression lz4 /mnt/backup::repo ~
+
+    # Less fast, higher compression (N = 0..9)
+    $ borg create --compression zlib,N /mnt/backup::repo ~
+
+    # Even slower, even higher compression (N = 0..9)
+    $ borg create --compression lzma,N /mnt/backup::repo ~
+
+    # Backup some LV snapshots (you have to create the snapshots before this
+    # and remove them afterwards). We also backup the output of lvdisplay so
+    # we can see the LV sizes at restore time. See also "borg extract" examples.
+    $ lvdisplay > lvdisplay.txt
+    $ borg create --read-special /mnt/backup::repo lvdisplay.txt /dev/vg0/*-snapshot

 .. include:: usage/extract.rst.inc

@ -158,6 +238,11 @@ Examples
    # Extract the "src" directory but exclude object files
    $ borg extract /mnt/backup::my-files home/USERNAME/src --exclude '*.o'

+    # Restore LV snapshots (the target LVs /dev/vg0/* of correct size have
+    # to be already available and will be overwritten by this command!)
+    $ borg extract --stdout /mnt/backup::repo dev/vg0/root-snapshot > /dev/vg0/root
+    $ borg extract --stdout /mnt/backup::repo dev/vg0/home-snapshot > /dev/vg0/home
+
 Note: currently, extract always writes into the current working directory ("."),
      so make sure you ``cd`` to the right place before calling ``borg extract``.

--- a/requirements.d/development.txt
+++ b/requirements.d/development.txt
@ -0,0 +1,5 @@
+tox
+mock
+pytest
+pytest-cov<2.0.0
+Cython
--- a/setup.py
+++ b/setup.py
@ -3,22 +3,29 @@ import os
 import sys
 from glob import glob

-import versioneer
-versioneer.VCS = 'git'
-versioneer.style = 'pep440'
-versioneer.versionfile_source = 'borg/_version.py'
-versioneer.versionfile_build = 'borg/_version.py'
-versioneer.tag_prefix = ''
-versioneer.parentdir_prefix = 'borgbackup-'  # dirname like 'myproject-1.2.0'
-
 min_python = (3, 2)
-if sys.version_info < min_python:
+my_python = sys.version_info
+
+if my_python < min_python:
    print("Borg requires Python %d.%d or later" % min_python)
    sys.exit(1)

+# msgpack pure python data corruption was fixed in 0.4.6.
+# Also, we might use some rather recent API features.
+install_requires=['msgpack-python>=0.4.6', ]
+
+if (my_python < (3, 2, 4) or
+    (3, 3, 0) <= my_python < (3, 3, 1)):
+    # argparse in stdlib does not work there due to a bug,
+    # pull a fixed argparse from pypi
+    install_requires.append("argparse>=1.4.0")
+

 from setuptools import setup, Extension
+from setuptools.command.sdist import sdist

+
+compress_source = 'borg/compress.pyx'
 crypto_source = 'borg/crypto.pyx'
 chunker_source = 'borg/chunker.pyx'
 hashindex_source = 'borg/hashindex.pyx'
@ -30,14 +37,15 @@ try:
    from Cython.Distutils import build_ext
    import Cython.Compiler.Main as cython_compiler

-    class Sdist(versioneer.cmd_sdist):
+    class Sdist(sdist):
        def __init__(self, *args, **kwargs):
            for src in glob('borg/*.pyx'):
                cython_compiler.compile(src, cython_compiler.default_options)
-            versioneer.cmd_sdist.__init__(self, *args, **kwargs)
+            super().__init__(*args, **kwargs)

        def make_distribution(self):
            self.filelist.extend([
+                'borg/compress.c',
                'borg/crypto.c',
                'borg/chunker.c', 'borg/_chunker.c',
                'borg/hashindex.c', 'borg/_hashindex.c',
@ -48,10 +56,11 @@ try:
            super().make_distribution()

 except ImportError:
-    class Sdist(versioneer.cmd_sdist):
+    class Sdist(sdist):
        def __init__(self, *args, **kwargs):
            raise Exception('Cython is required to run sdist')

+    compress_source = compress_source.replace('.pyx', '.c')
    crypto_source = crypto_source.replace('.pyx', '.c')
    chunker_source = chunker_source.replace('.pyx', '.c')
    hashindex_source = hashindex_source.replace('.pyx', '.c')
@ -59,7 +68,9 @@ except ImportError:
    platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c')
    platform_darwin_source = platform_darwin_source.replace('.pyx', '.c')
    from distutils.command.build_ext import build_ext
-    if not all(os.path.exists(path) for path in [crypto_source, chunker_source, hashindex_source, platform_linux_source, platform_freebsd_source]):
+    if not all(os.path.exists(path) for path in [
+        compress_source, crypto_source, chunker_source, hashindex_source,
+        platform_linux_source, platform_freebsd_source]):
        raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version')


@ -72,23 +83,45 @@ def detect_openssl(prefixes):
                    return prefix


+def detect_lz4(prefixes):
+    for prefix in prefixes:
+        filename = os.path.join(prefix, 'include', 'lz4.h')
+        if os.path.exists(filename):
+            with open(filename, 'r') as fd:
+                if 'LZ4_decompress_safe' in fd.read():
+                    return prefix
+
+
+include_dirs = []
+library_dirs = []
+
 possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl', '/usr/local/borg', '/opt/local']
 if os.environ.get('BORG_OPENSSL_PREFIX'):
    possible_openssl_prefixes.insert(0, os.environ.get('BORG_OPENSSL_PREFIX'))
 ssl_prefix = detect_openssl(possible_openssl_prefixes)
 if not ssl_prefix:
    raise Exception('Unable to find OpenSSL >= 1.0 headers. (Looked here: {})'.format(', '.join(possible_openssl_prefixes)))
-include_dirs = [os.path.join(ssl_prefix, 'include')]
-library_dirs = [os.path.join(ssl_prefix, 'lib')]
+include_dirs.append(os.path.join(ssl_prefix, 'include'))
+library_dirs.append(os.path.join(ssl_prefix, 'lib'))
+
+
+possible_lz4_prefixes = ['/usr', '/usr/local', '/usr/local/borg', '/opt/local']
+if os.environ.get('BORG_LZ4_PREFIX'):
+    possible_openssl_prefixes.insert(0, os.environ.get('BORG_LZ4_PREFIX'))
+lz4_prefix = detect_lz4(possible_lz4_prefixes)
+if not lz4_prefix:
+    raise Exception('Unable to find LZ4 headers. (Looked here: {})'.format(', '.join(possible_lz4_prefixes)))
+include_dirs.append(os.path.join(lz4_prefix, 'include'))
+library_dirs.append(os.path.join(lz4_prefix, 'lib'))


 with open('README.rst', 'r') as fd:
    long_description = fd.read()

-cmdclass = versioneer.get_cmdclass()
-cmdclass.update({'build_ext': build_ext, 'sdist': Sdist})
+cmdclass = {'build_ext': build_ext, 'sdist': Sdist}

 ext_modules = [
+    Extension('borg.compress', [compress_source], libraries=['lz4'], include_dirs=include_dirs, library_dirs=library_dirs),
    Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs),
    Extension('borg.chunker', [chunker_source]),
    Extension('borg.hashindex', [hashindex_source])
@ -102,7 +135,9 @@ elif sys.platform == 'darwin':

 setup(
    name='borgbackup',
-    version=versioneer.get_version(),
+    use_scm_version={
+        'write_to': 'borg/_version.py',
+    },
    author='The Borg Collective (see AUTHORS file)',
    author_email='borgbackup@librelist.com',
    url='https://borgbackup.github.io/',
@ -134,7 +169,6 @@ setup(
    },
    cmdclass=cmdclass,
    ext_modules=ext_modules,
-    # msgpack pure python data corruption was fixed in 0.4.6.
-    # Also, we might use some rather recent API features.
-    install_requires=['msgpack-python>=0.4.6']
+    setup_requires=['setuptools_scm>=1.7'],
+    install_requires=install_requires,
 )
--- a/tox.ini
+++ b/tox.ini
@ -1,16 +1,5 @@
 # tox configuration - if you change anything here, run this to verify:
 # fakeroot -u tox --recreate
-#
-# Invokation examples:
-# fakeroot -u tox  # run all tests
-# fakeroot -u tox -e py32  # run all tests, but only on python 3.2
-# fakeroot -u tox borg.testsuite.locking  # only run 1 test module
-# fakeroot -u tox borg.testsuite.locking -- -k '"not Timer"'  # exclude some tests
-# fakeroot -u tox borg.testsuite -- -v  # verbose py.test
-#
-# Important notes:
-# Without fakeroot -u some tests will fail.
-# When using -- to give options to py.test, you MUST also give borg.testsuite[.module].

 [tox]
 envlist = py32, py33, py34
@ -19,9 +8,7 @@ envlist = py32, py33, py34
 # Change dir to avoid import problem for cython code. The directory does
 # not really matter, should be just different from the toplevel dir.
 changedir = {toxworkdir}
-deps =
-    pytest
-    mock
-commands = py.test --pyargs {posargs:borg.testsuite}
+deps = -rrequirements.d/development.txt
+commands = py.test --cov=borg --pyargs {posargs:borg.testsuite}
 # fakeroot -u needs some env vars:
 passenv = *
--- a/versioneer.py
+++ b/versioneer.py