From 15c8a6323f98a04c68cc2050e2754583055758e8 Mon Sep 17 00:00:00 2001 From: Yuri D'Elia Date: Sun, 14 Dec 2014 19:15:54 +0100 Subject: [PATCH 001/241] Normalize source/exclude paths before matching This normalizes the file names in the dot directory when specified explicitly, along with exclude/include patterns. This fixes several mismatches when including relative paths that involve the current directory. --- attic/archiver.py | 3 ++- attic/helpers.py | 6 +++--- attic/testsuite/archiver.py | 19 +++++++++++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d4..3f579e0e6 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -173,8 +173,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") self.print_error('%s: %s', path, e) else: for filename in sorted(entries): + entry_path = os.path.normpath(os.path.join(path, filename)) self._process(archive, cache, excludes, exclude_caches, skip_inodes, - os.path.join(path, filename), restrict_dev) + entry_path, restrict_dev) elif stat.S_ISLNK(st.st_mode): archive.process_symlink(path, st) elif stat.S_ISFIFO(st.st_mode): diff --git a/attic/helpers.py b/attic/helpers.py index ac5266980..9fadbd1d8 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -228,7 +228,7 @@ class IncludePattern: path match as well. A trailing slash makes no difference. """ def __init__(self, pattern): - self.pattern = pattern.rstrip(os.path.sep)+os.path.sep + self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep def match(self, path): return (path+os.path.sep).startswith(self.pattern) @@ -243,9 +243,9 @@ class ExcludePattern(IncludePattern): """ def __init__(self, pattern): if pattern.endswith(os.path.sep): - self.pattern = pattern+'*'+os.path.sep + self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep else: - self.pattern = pattern+os.path.sep+'*' + self.pattern = os.path.normpath(pattern)+os.path.sep+'*' # fnmatch and re.match both cache compiled regular expressions. # Nevertheless, this is about 10 times faster. self.regex = re.compile(translate(self.pattern)) diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 382fcc854..160b36244 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -217,6 +217,25 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_not_in('..', output) self.assert_in(' input/dir1/dir2/file', output) + def test_exclude_normalization(self): + self.attic('init', self.repository_location) + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('file2', size=1024 * 80) + with changedir('input'): + self.attic('create', '--exclude=file1', self.repository_location + '::test1', '.') + with changedir('output'): + self.attic('extract', self.repository_location + '::test1') + self.assert_equal(sorted(os.listdir('output')), ['file2']) + with changedir('input'): + self.attic('create', '--exclude=./file1', self.repository_location + '::test2', '.') + with changedir('output'): + self.attic('extract', self.repository_location + '::test2') + self.assert_equal(sorted(os.listdir('output')), ['file2']) + self.attic('create', '--exclude=input/./file1', self.repository_location + '::test3', 'input') + with changedir('output'): + self.attic('extract', self.repository_location + '::test3') + self.assert_equal(sorted(os.listdir('output/input')), ['file2']) + def test_repeated_files(self): self.create_regular_file('file1', size=1024 * 80) self.attic('init', self.repository_location) From 7d2ec79f95e837b050be1ef12e0a3d75e88c7317 Mon Sep 17 00:00:00 2001 From: anarcat Date: Mon, 15 Dec 2014 21:53:05 -0500 Subject: [PATCH 002/241] document that hardlinks and symlinks are supported it seems that hardlinks are supported, but were not explicitely documented in the documentation. the FAQ seems like the right place to do this. closes #133. --- docs/faq.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/faq.rst b/docs/faq.rst index 849c82e3e..acaf79f8c 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -17,6 +17,7 @@ Which file attributes are preserved? * Name * Contents + * Hardlinks and symlinks * Time of last modification (nanosecond precision with Python >= 3.3) * User ID of owner * Group ID of owner From ac87360c7c7e029d685d4f0e8c8b87cce46f4303 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Mon, 15 Dec 2014 22:02:43 -0500 Subject: [PATCH 003/241] document better different dependencies this is written with recent Ubuntu and Debian in mind, but should be working everywhere. the idea here is to make sure anyone can install this without knowning too much about ACLs or anything similar. closes #135 --- docs/global.rst.inc | 1 + docs/installation.rst | 24 ++++++++++++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/docs/global.rst.inc b/docs/global.rst.inc index 694f4d967..e809ad9a9 100644 --- a/docs/global.rst.inc +++ b/docs/global.rst.inc @@ -9,6 +9,7 @@ .. _HMAC-SHA256: http://en.wikipedia.org/wiki/HMAC .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2 .. _ACL: https://en.wikipedia.org/wiki/Access_control_list +.. _libacl: http://savannah.nongnu.org/projects/acl/ .. _github: https://github.com/jborg/attic .. _OpenSSL: https://www.openssl.org/ .. _Python: http://www.python.org/ diff --git a/docs/installation.rst b/docs/installation.rst index 44e31e4ec..2956728b0 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -12,6 +12,7 @@ Other dependencies: * `msgpack-python`_ >= 0.1.10 * OpenSSL_ >= 1.0.0 +* libacl_ The OpenSSL version bundled with Mac OS X and FreeBSD is most likey too old. Newer versions are available from homebrew_ on OS X and from FreeBSD ports. @@ -19,11 +20,26 @@ Newer versions are available from homebrew_ on OS X and from FreeBSD ports. The llfuse_ python package is also required if you wish to mount an archive as a FUSE filesystem. +Common compilation pre-requisites +--------------------------------- + +The following Debian packages are generally necessary to compile +|project_name|, either through pip, the tarball or git:: + + $ sudo apt-get install python3 python3-dev python3-msgpack python3-sphinx libssl-dev libacl1-dev + Installing from PyPI using pip ------------------------------ -:: - $ pip install Attic +To install |project_name| system-wide:: + + $ sudo pip3 install Attic + +To install it in a user-specific account:: + + $ pip3 install --user Attic + +Then add ``$HOME/.library/bin`` to your ``$PATH``. Installing from source tarballs ------------------------------- @@ -32,7 +48,7 @@ Installing from source tarballs $ curl -O :targz_url:`Attic` $ tar -xvzf |package_filename| $ cd |package_dirname| - $ python setup.py install + $ sudo python3 setup.py install Installing from git ------------------- @@ -40,7 +56,7 @@ Installing from git $ git clone |git_url| $ cd attic - $ python setup.py install + $ sudo python3 setup.py install Please note that when installing from git, Cython_ is required to generate some files that are normally bundled with the release tarball. From a8ab9c3445eff718f8201ef2eefdaeb69a9fb867 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Tue, 16 Dec 2014 09:10:21 -0500 Subject: [PATCH 004/241] document key files in a new Internals page this is still incomplete as it only describes key files, but doesn't clearly say how chunks are encrypted or decrypted. this address parts of #29 but eventually that document should also cover #27, #28 and maybe #45 --- docs/global.rst.inc | 4 +++ docs/internals.rst | 69 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 docs/internals.rst diff --git a/docs/global.rst.inc b/docs/global.rst.inc index 694f4d967..15f38ea01 100644 --- a/docs/global.rst.inc +++ b/docs/global.rst.inc @@ -12,6 +12,10 @@ .. _github: https://github.com/jborg/attic .. _OpenSSL: https://www.openssl.org/ .. _Python: http://www.python.org/ +.. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2 +.. _SHA256: https://en.wikipedia.org/wiki/SHA-256 +.. _HMAC: https://en.wikipedia.org/wiki/HMAC +.. _msgpack: http://msgpack.org/ .. _`msgpack-python`: https://pypi.python.org/pypi/msgpack-python/ .. _llfuse: https://pypi.python.org/pypi/llfuse/ .. _homebrew: http://mxcl.github.io/homebrew/ diff --git a/docs/internals.rst b/docs/internals.rst new file mode 100644 index 000000000..c2554872f --- /dev/null +++ b/docs/internals.rst @@ -0,0 +1,69 @@ +.. include:: global.rst.inc +.. _internals: + +Internals +========= + + +Key files +--------- + +When initialized with the ``init -e keyfile`` command, |project_name| +needs an associated file in ``$HOME/.attic/keys`` to read and write +the repository. As with most crypto code in |project_name|, the format +of those files is defined in `attic/key.py`_. The format is based on +msgpack_, base64 encoding and PBKDF2_ SHA256 encryption, which is +then encoded again in a msgpack_. + +The internal data structure is as follows: + +version + currently always an integer, 1 + +repository_id + the ``id`` field in the ``config`` ``INI`` file of the repository. + +enc_key + the AES encryption key + +enc_hmac_key + the HMAC key (32 bytes) + +id_key + another HMAC key? unclear. + +chunk_seed + unknown + +Those fields are encoded using msgpack_. The utf-8-encoded phassphrase +is encrypted with a PBKDF2_ and SHA256_ using 100000 iterations and a +random 32 bytes salt to give us a derived key. The derived key is 32 +bytes long. A HMAC_ SHA256_ checksum of the above fields is generated +with the derived key, then the derived key is also used to encrypt the +above pack of fields. Then the result is stored in a another msgpack_ +formatted as follows: + +version + currently always an integer, 1 + +salt + random 32 bytes salt used to encrypt the passphrase + +iterations + number of iterations used to encrypt the passphrase + +algorithm + the hashing algorithm used to encrypt the passphrase and do the HMAC + checksum + +hash + the HMAC checksum of the encrypted passphrase key + +data + the passphrase key, encrypted with AES over a PBKDF2_ SHA256 hash + described above + +The resulting msgpack_ is then encoded using base64 and written to the +key file, wrapped using the textwrap_ module with a header. The header +is a single line with the string ``ATTIC_KEY``, a space and a +hexadecimal representation of the repository id. From 9f0ed2a8c04c5dbed30b4d1ce1ef534ef4b0303b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Tue, 16 Dec 2014 10:03:20 -0500 Subject: [PATCH 005/241] clarify some bits I missed --- docs/global.rst.inc | 1 + docs/index.rst | 1 + docs/internals.rst | 23 ++++++++++++----------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/docs/global.rst.inc b/docs/global.rst.inc index 15f38ea01..a6236f60d 100644 --- a/docs/global.rst.inc +++ b/docs/global.rst.inc @@ -15,6 +15,7 @@ .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2 .. _SHA256: https://en.wikipedia.org/wiki/SHA-256 .. _HMAC: https://en.wikipedia.org/wiki/HMAC +.. _AES: https://en.wikipedia.org/wiki/AES .. _msgpack: http://msgpack.org/ .. _`msgpack-python`: https://pypi.python.org/pypi/msgpack-python/ .. _llfuse: https://pypi.python.org/pypi/llfuse/ diff --git a/docs/index.rst b/docs/index.rst index 3d9f11986..711eaf153 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -50,6 +50,7 @@ User's Guide quickstart usage faq + internals Getting help ============ diff --git a/docs/internals.rst b/docs/internals.rst index c2554872f..bdcf6aa09 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -24,21 +24,22 @@ repository_id the ``id`` field in the ``config`` ``INI`` file of the repository. enc_key - the AES encryption key + the key used to encrypt data with AES (256 bits) enc_hmac_key - the HMAC key (32 bytes) + the key used to HMAC the resulting AES-encrypted data (256 bits) id_key - another HMAC key? unclear. + the key used to HMAC the above chunks, the resulting hash is + stored out of band (256 bits) chunk_seed - unknown + the seed for the buzhash chunking table (signed 32 bit integer) Those fields are encoded using msgpack_. The utf-8-encoded phassphrase is encrypted with a PBKDF2_ and SHA256_ using 100000 iterations and a -random 32 bytes salt to give us a derived key. The derived key is 32 -bytes long. A HMAC_ SHA256_ checksum of the above fields is generated +random 256 bits salt to give us a derived key. The derived key is 256 +bits long. A HMAC_ SHA256_ checksum of the above fields is generated with the derived key, then the derived key is also used to encrypt the above pack of fields. Then the result is stored in a another msgpack_ formatted as follows: @@ -47,20 +48,20 @@ version currently always an integer, 1 salt - random 32 bytes salt used to encrypt the passphrase + random 256 bits salt used to encrypt the passphrase iterations - number of iterations used to encrypt the passphrase + number of iterations used to encrypt the passphrase (currently 100000) algorithm the hashing algorithm used to encrypt the passphrase and do the HMAC - checksum + checksum (currently the string ``sha256``) hash - the HMAC checksum of the encrypted passphrase key + the HMAC checksum of the encrypted derived key data - the passphrase key, encrypted with AES over a PBKDF2_ SHA256 hash + the derived key, encrypted with AES over a PBKDF2_ SHA256 hash described above The resulting msgpack_ is then encoded using base64 and written to the From 3f27c367fe644d2df8691e9cf532957ac6beedad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Tue, 16 Dec 2014 10:04:35 -0500 Subject: [PATCH 006/241] document more internals, based on mailing list discussion this should address #27, #28 and #29 at least at a basic level it is mostly based on the mailing list discussion mentionned in #27, with some reformatting and merging of different posts. --- docs/global.rst.inc | 2 + docs/internals.rst | 105 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) diff --git a/docs/global.rst.inc b/docs/global.rst.inc index a6236f60d..72d15126f 100644 --- a/docs/global.rst.inc +++ b/docs/global.rst.inc @@ -12,6 +12,7 @@ .. _github: https://github.com/jborg/attic .. _OpenSSL: https://www.openssl.org/ .. _Python: http://www.python.org/ +.. _Buzhash: https://en.wikipedia.org/wiki/Buzhash .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2 .. _SHA256: https://en.wikipedia.org/wiki/SHA-256 .. _HMAC: https://en.wikipedia.org/wiki/HMAC @@ -28,3 +29,4 @@ .. _Arch Linux: https://aur.archlinux.org/packages/attic/ .. _Slackware: http://slackbuilds.org/result/?search=Attic .. _Cython: http://cython.org/ +.. _mailing list discussion about internals: http://librelist.com/browser/attic/2014/5/6/questions-and-suggestions-about-inner-working-of-attic> \ No newline at end of file diff --git a/docs/internals.rst b/docs/internals.rst index bdcf6aa09..94eef02fa 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -4,6 +4,111 @@ Internals ========= +This page documents the internal data structures and storage +mechanisms of |project_name|. It is partly based on `mailing list +discussion about internals`_ and also on static code analysis. It may +not be exactly up to date with the current source code. + +Indexes and memory usage +------------------------ + +Repository index + 40 bytes x N ~ 200MB (If a remote repository is + used this will be allocated on the remote side) + +Chunk lookup index + 44 bytes x N ~ 220MB + +File chunk cache + probably 80-100 bytes x N ~ 400MB + +The chunk lookup index (chunk hash -> reference count, size, ciphered +size ; in file cache/chunk) and the repository index (chunk hash -> +segment, offset ; in file repo/index.%d) are stored in a sort of hash +table, directly mapped in memory from the file content, with only one +slot per bucket, but that spreads the collisions to the following +buckets. As a consequence the hash is just a start position for a linear +search, and if the element is not in the table the index is linearly +crossed until an empty bucket is found. When the table is full at 90% +its size is doubled, when it's empty at 25% its size is halfed. So +operations on it have a variable complexity between constant and linear +with low factor, and memory overhead varies between 10% and 300%. + +The file chunk cache (file path hash -> age, inode number, size, +mtime_ns, chunks hashes ; in file cache/files) is stored as a python +associative array storing python objects, which generate a lot of +overhead. This takes around 240 bytes per file without the chunk +list, to be compared to at most 64 bytes of real data (depending on data +alignment), and around 80 bytes per chunk hash (vs 32), with a minimum +of ~250 bytes even if only one chunck hash. The inode number is stored +to make sure we distinguish between different files, as a single path +may not be unique accross different archives in different setups. + +Repository structure +-------------------- + +|project_name| is a "filesystem based transactional key value store". + +Objects referenced by a key (256bits id/hash) are stored in line in +files (segments) of size approx 5MB in repo/data. They contain : +header size, crc, size, tag, key, data. Tag is either ``PUT``, +``DELETE``, or ``COMMIT``. Segments are built locally, and then +uploaded. + +A segment file is basically a transaction log where each repository +operation is appended to the file. So if an object is written to the +repository a ``PUT`` tag is written to the file followed by the object +id and data. And if an object is deleted a ``DELETE`` tag is appended +followed by the object id. A ``COMMIT`` tag is written when a +repository transaction is committed. When a repository is opened any +``PUT`` or ``DELETE`` operations not followed by a ``COMMIT`` tag are +discarded since they are part of a partial/uncommitted transaction. + +The manifest is an object with an id of only zeros (32 bytes), that +references all the archives. It contains : version, list of archives, +timestamp, config. Each archive contains: name, id, time. It is the last +object stored, in the last segment, and is replaced each time. + +The archive metadata does not contain the file items directly. Only +references to other objects that contain that data. An archive is an +object that contain metadata : version, name, items list, cmdline, +hostname, username, time. Each item represents a file or directory or +symlink is stored as a ``item`` dictionnary that contains: path, list +of chunks, user, group, uid, gid, mode (item type + permissions), +source (for links), rdev (for devices), mtime, xattrs, acl, +bsdfiles. ``ctime`` (change time) is not stored because there is no +API to set it and it is reset every time an inode's metadata is changed. + +All items are serialized using msgpack and the resulting byte stream +is fed into the same chunker used for regular file data and turned +into deduplicated chunks. The reference to these chunks is then added +to the archvive metadata. This allows the archive to store many files, +beyond the ``MAX_OBJECT_SIZE`` barrier of 20MB. + +A chunk is an object as well, of course, and its id is the hash of its +(unencrypted and uncompressed) content. + +Hints are stored in a file (repo/hints) and contain: version, list of +segments, compact. + +Chunks +------ + +|project_name| uses a rolling checksum with Buzhash_ algorithm, with +window size of 4095 bytes, with a minimum of 1024, and triggers when +the last 16 bits of the checksum are null, producing chunks of 64kB on +average. All these parameters are fixed. The buzhash table is altered +by XORing it with a seed randomly generated once for the archive, and +stored encrypted in the keyfile. + +Encryption +---------- + +AES_ is used with CTR mode of operation (so no need of padding). A 64 +bits initialization vector is used, a SHA256_ based HMAC_ is computed +on the encrypted chunk with a random 64 bits nonce and both are stored +in the chunk. The header of each chunk is actually : TYPE(1) + +HMAC(32) + NONCE(8). Encryption and HMAC use two different keys. Key files --------- From fd56bf0887d9097825f8a95961ac6d8dc325c023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Tue, 16 Dec 2014 10:20:23 -0500 Subject: [PATCH 007/241] document the repo config file and more storage properties again taken from the mailing list, mostly --- docs/internals.rst | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/docs/internals.rst b/docs/internals.rst index 94eef02fa..b4694034c 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -24,7 +24,7 @@ File chunk cache The chunk lookup index (chunk hash -> reference count, size, ciphered size ; in file cache/chunk) and the repository index (chunk hash -> -segment, offset ; in file repo/index.%d) are stored in a sort of hash +segment, offset ; in file ``repo/index.%d``) are stored in a sort of hash table, directly mapped in memory from the file content, with only one slot per bucket, but that spreads the collisions to the following buckets. As a consequence the hash is just a start position for a linear @@ -44,16 +44,19 @@ of ~250 bytes even if only one chunck hash. The inode number is stored to make sure we distinguish between different files, as a single path may not be unique accross different archives in different setups. +The ``index.%d`` files are random access but those files can be +recreated if damaged or lost using "attic check --repair". + Repository structure -------------------- |project_name| is a "filesystem based transactional key value store". Objects referenced by a key (256bits id/hash) are stored in line in -files (segments) of size approx 5MB in repo/data. They contain : +files (segments) of size approx 5MB in ``repo/data``. They contain : header size, crc, size, tag, key, data. Tag is either ``PUT``, ``DELETE``, or ``COMMIT``. Segments are built locally, and then -uploaded. +uploaded. Those files are strictly append-only and modified only once. A segment file is basically a transaction log where each repository operation is appended to the file. So if an object is written to the @@ -101,6 +104,26 @@ average. All these parameters are fixed. The buzhash table is altered by XORing it with a seed randomly generated once for the archive, and stored encrypted in the keyfile. +Repository config file +---------------------- + +Each repository has a ``config`` file which which is a ``INI`` +formatted file which looks like this: + + [repository] + version = 1 + segments_per_dir = 10000 + max_segment_size = 5242880 + id = 57d6c1d52ce76a836b532b0e42e677dec6af9fca3673db511279358828a21ed6 + +This is where the ``repository.id`` is stored. It is a unique +identifier for repositories. It will not change if you move the +repository around so you can make a local transfer then decide to move +the repository in another (even remote) location at a later time. + +|project_name| will do a POSIX read lock on that file when operating +on the repository. + Encryption ---------- From 1fde2a97711b5894d59132495fe7e8ba095244e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Tue, 16 Dec 2014 10:20:52 -0500 Subject: [PATCH 008/241] add more details on how encryption works --- docs/internals.rst | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/docs/internals.rst b/docs/internals.rst index b4694034c..a31fbb1cf 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -127,11 +127,28 @@ on the repository. Encryption ---------- -AES_ is used with CTR mode of operation (so no need of padding). A 64 +AES_ is used with CTR mode of operation (so no need for padding). A 64 bits initialization vector is used, a SHA256_ based HMAC_ is computed on the encrypted chunk with a random 64 bits nonce and both are stored -in the chunk. The header of each chunk is actually : TYPE(1) + -HMAC(32) + NONCE(8). Encryption and HMAC use two different keys. +in the chunk. The header of each chunk is : ``TYPE(1)` + +``HMAC(32)`` + ``NONCE(8)`` + ``CIPHERTEXT``. Encryption and HMAC use +two different keys. + +In AES CTR mode you can think of the IV as the start value for the +counter. The counter itself is incremented by one after each 16 byte +block. The IV/counter is not required to be random but it must NEVER be +reused. So to accomplish this Attic initializes the encryption counter +to be higher than any previously used counter value before encrypting +new data. + +To reduce payload size only 8 bytes of the 16 bytes nonce is saved in +the payload, the first 8 bytes are always zeros. This does not affect +security but limits the maximum repository capacity to only 295 +exabytes (2**64 * 16 bytes). + +Encryption keys are either a passphrase, passed through the +``ATTIC_PASSPHRASE`` environment or prompted on the commandline, or +stored in automatically generated key files. Key files --------- From ddca3b856bb41a33cc68a848f715282859b823a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Tue, 16 Dec 2014 10:30:57 -0500 Subject: [PATCH 009/241] add a more gentle introduction --- docs/internals.rst | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/docs/internals.rst b/docs/internals.rst index a31fbb1cf..0e01336bb 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -9,6 +9,30 @@ mechanisms of |project_name|. It is partly based on `mailing list discussion about internals`_ and also on static code analysis. It may not be exactly up to date with the current source code. +|project_name| stores its data in a `Repository`. Each repository can +hold multiple `Archives`, which represent individual backups that +contain a full archive of the files specified when the backup was +performed. Deduplication is performed across multiple backups, both on +data and metadata, using `Segments` chunked with the Buzhash_ +algorithm. Each repository has the following file structure: + +README + simple text file describing the repository + +config + description of the repository, includes the unique identifier. also + acts as a lock file + +data/ + directory where the actual data (`segments`) is stored + +hints.%d + undocumented + +index.%d + cache of the file indexes. those files can be regenerated with + ``check --repair`` + Indexes and memory usage ------------------------ @@ -45,7 +69,7 @@ to make sure we distinguish between different files, as a single path may not be unique accross different archives in different setups. The ``index.%d`` files are random access but those files can be -recreated if damaged or lost using "attic check --repair". +recreated if damaged or lost using ``check --repair``. Repository structure -------------------- From 688ba109ef27a4e493dd8257e2eabb3a8c05ff58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Tue, 16 Dec 2014 10:35:48 -0500 Subject: [PATCH 010/241] reorder to be more logical and more gentle --- docs/internals.rst | 104 ++++++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 49 deletions(-) diff --git a/docs/internals.rst b/docs/internals.rst index 0e01336bb..ede9fb0ef 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -33,43 +33,25 @@ index.%d cache of the file indexes. those files can be regenerated with ``check --repair`` -Indexes and memory usage ------------------------- +Repository config file +---------------------- -Repository index - 40 bytes x N ~ 200MB (If a remote repository is - used this will be allocated on the remote side) +Each repository has a ``config`` file which which is a ``INI`` +formatted file which looks like this: -Chunk lookup index - 44 bytes x N ~ 220MB + [repository] + version = 1 + segments_per_dir = 10000 + max_segment_size = 5242880 + id = 57d6c1d52ce76a836b532b0e42e677dec6af9fca3673db511279358828a21ed6 -File chunk cache - probably 80-100 bytes x N ~ 400MB +This is where the ``repository.id`` is stored. It is a unique +identifier for repositories. It will not change if you move the +repository around so you can make a local transfer then decide to move +the repository in another (even remote) location at a later time. -The chunk lookup index (chunk hash -> reference count, size, ciphered -size ; in file cache/chunk) and the repository index (chunk hash -> -segment, offset ; in file ``repo/index.%d``) are stored in a sort of hash -table, directly mapped in memory from the file content, with only one -slot per bucket, but that spreads the collisions to the following -buckets. As a consequence the hash is just a start position for a linear -search, and if the element is not in the table the index is linearly -crossed until an empty bucket is found. When the table is full at 90% -its size is doubled, when it's empty at 25% its size is halfed. So -operations on it have a variable complexity between constant and linear -with low factor, and memory overhead varies between 10% and 300%. - -The file chunk cache (file path hash -> age, inode number, size, -mtime_ns, chunks hashes ; in file cache/files) is stored as a python -associative array storing python objects, which generate a lot of -overhead. This takes around 240 bytes per file without the chunk -list, to be compared to at most 64 bytes of real data (depending on data -alignment), and around 80 bytes per chunk hash (vs 32), with a minimum -of ~250 bytes even if only one chunck hash. The inode number is stored -to make sure we distinguish between different files, as a single path -may not be unique accross different archives in different setups. - -The ``index.%d`` files are random access but those files can be -recreated if damaged or lost using ``check --repair``. +|project_name| will do a POSIX read lock on that file when operating +on the repository. Repository structure -------------------- @@ -115,7 +97,7 @@ beyond the ``MAX_OBJECT_SIZE`` barrier of 20MB. A chunk is an object as well, of course, and its id is the hash of its (unencrypted and uncompressed) content. -Hints are stored in a file (repo/hints) and contain: version, list of +Hints are stored in a file (``repo/hints``) and contain: version, list of segments, compact. Chunks @@ -128,25 +110,49 @@ average. All these parameters are fixed. The buzhash table is altered by XORing it with a seed randomly generated once for the archive, and stored encrypted in the keyfile. -Repository config file ----------------------- +Indexes +------- -Each repository has a ``config`` file which which is a ``INI`` -formatted file which looks like this: +The chunk lookup index (chunk hash -> reference count, size, ciphered +size ; in file cache/chunk) and the repository index (chunk hash -> +segment, offset ; in file ``repo/index.%d``) are stored in a sort of hash +table, directly mapped in memory from the file content, with only one +slot per bucket, but that spreads the collisions to the following +buckets. As a consequence the hash is just a start position for a linear +search, and if the element is not in the table the index is linearly +crossed until an empty bucket is found. When the table is full at 90% +its size is doubled, when it's empty at 25% its size is halfed. So +operations on it have a variable complexity between constant and linear +with low factor, and memory overhead varies between 10% and 300%. - [repository] - version = 1 - segments_per_dir = 10000 - max_segment_size = 5242880 - id = 57d6c1d52ce76a836b532b0e42e677dec6af9fca3673db511279358828a21ed6 +The file chunk cache (file path hash -> age, inode number, size, +mtime_ns, chunks hashes ; in file cache/files) is stored as a python +associative array storing python objects, which generate a lot of +overhead. This takes around 240 bytes per file without the chunk +list, to be compared to at most 64 bytes of real data (depending on data +alignment), and around 80 bytes per chunk hash (vs 32), with a minimum +of ~250 bytes even if only one chunck hash. The inode number is stored +to make sure we distinguish between different files, as a single path +may not be unique accross different archives in different setups. -This is where the ``repository.id`` is stored. It is a unique -identifier for repositories. It will not change if you move the -repository around so you can make a local transfer then decide to move -the repository in another (even remote) location at a later time. +The ``index.%d`` files are random access but those files can be +recreated if damaged or lost using ``check --repair``. -|project_name| will do a POSIX read lock on that file when operating -on the repository. +Indexes memory usage +-------------------- + +Here is the estimated memory usage of |project_name| when using those +indexes: + +Repository index + 40 bytes x N ~ 200MB (If a remote repository is + used this will be allocated on the remote side) + +Chunk lookup index + 44 bytes x N ~ 220MB + +File chunk cache + probably 80-100 bytes x N ~ 400MB Encryption ---------- From d58b6ddf28ef07d87f77a10b472637ec7ea159fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Tue, 16 Dec 2014 10:55:03 -0500 Subject: [PATCH 011/241] fix reference errors and remove reference to source code --- docs/internals.rst | 171 +++++++++++++++++++++++++++++++-------------- 1 file changed, 120 insertions(+), 51 deletions(-) diff --git a/docs/internals.rst b/docs/internals.rst index ede9fb0ef..585266a9a 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -56,37 +56,81 @@ on the repository. Repository structure -------------------- -|project_name| is a "filesystem based transactional key value store". +|project_name| is a "filesystem based transactional key value +store". It makes extensive use of msgpack_ to store data and, unless +otherwise noted, data is stored in msgpack_ encoded files. -Objects referenced by a key (256bits id/hash) are stored in line in -files (segments) of size approx 5MB in ``repo/data``. They contain : -header size, crc, size, tag, key, data. Tag is either ``PUT``, -``DELETE``, or ``COMMIT``. Segments are built locally, and then -uploaded. Those files are strictly append-only and modified only once. +Objects referenced by a key (256bits id/hash) are stored inline in +files (`segments`) of size approx 5MB in ``repo/data``. They contain: -A segment file is basically a transaction log where each repository -operation is appended to the file. So if an object is written to the -repository a ``PUT`` tag is written to the file followed by the object -id and data. And if an object is deleted a ``DELETE`` tag is appended +* header size +* crc +* size +* tag +* key +* data + +Segments are built locally, and then uploaded. Those files are +strictly append-only and modified only once. + +Tag is either ``PUT``, ``DELETE``, or ``COMMIT``. A segment file is +basically a transaction log where each repository operation is +appended to the file. So if an object is written to the repository a +``PUT`` tag is written to the file followed by the object id and +data. And if an object is deleted a ``DELETE`` tag is appended followed by the object id. A ``COMMIT`` tag is written when a repository transaction is committed. When a repository is opened any ``PUT`` or ``DELETE`` operations not followed by a ``COMMIT`` tag are discarded since they are part of a partial/uncommitted transaction. The manifest is an object with an id of only zeros (32 bytes), that -references all the archives. It contains : version, list of archives, -timestamp, config. Each archive contains: name, id, time. It is the last -object stored, in the last segment, and is replaced each time. +references all the archives. It contains: + +* version +* list of archives +* timestamp +* config + +Each archive contains: + +* name +* id +* time + +It is the last object stored, in the last segment, and is replaced +each time. The archive metadata does not contain the file items directly. Only references to other objects that contain that data. An archive is an -object that contain metadata : version, name, items list, cmdline, -hostname, username, time. Each item represents a file or directory or -symlink is stored as a ``item`` dictionnary that contains: path, list -of chunks, user, group, uid, gid, mode (item type + permissions), -source (for links), rdev (for devices), mtime, xattrs, acl, -bsdfiles. ``ctime`` (change time) is not stored because there is no -API to set it and it is reset every time an inode's metadata is changed. +object that contain metadata: + +* version +* name +* items list +* cmdline +* hostname +* username +* time + +Each item represents a file or directory or +symlink is stored as a ``item`` dictionnary that contains: + +* path +* list of chunks +* user +* group +* uid +* gid +* mode (item type + permissions) +* source (for links) +* rdev (for devices) +* mtime +* xattrs +* acl +* bsdfiles + +``ctime`` (change time) is not stored because there is no API to set +it and it is reset every time an inode's metadata is changed. All items are serialized using msgpack and the resulting byte stream is fed into the same chunker used for regular file data and turned @@ -97,8 +141,11 @@ beyond the ``MAX_OBJECT_SIZE`` barrier of 20MB. A chunk is an object as well, of course, and its id is the hash of its (unencrypted and uncompressed) content. -Hints are stored in a file (``repo/hints``) and contain: version, list of -segments, compact. +Hints are stored in a file (``repo/hints``) and contain: + +* version +* list of segments +* compact Chunks ------ @@ -113,31 +160,55 @@ stored encrypted in the keyfile. Indexes ------- -The chunk lookup index (chunk hash -> reference count, size, ciphered -size ; in file cache/chunk) and the repository index (chunk hash -> -segment, offset ; in file ``repo/index.%d``) are stored in a sort of hash -table, directly mapped in memory from the file content, with only one -slot per bucket, but that spreads the collisions to the following -buckets. As a consequence the hash is just a start position for a linear -search, and if the element is not in the table the index is linearly -crossed until an empty bucket is found. When the table is full at 90% -its size is doubled, when it's empty at 25% its size is halfed. So -operations on it have a variable complexity between constant and linear -with low factor, and memory overhead varies between 10% and 300%. +There are two main indexes: the chunk lookup index and the repository +index. There is also the file chunk cache. -The file chunk cache (file path hash -> age, inode number, size, -mtime_ns, chunks hashes ; in file cache/files) is stored as a python -associative array storing python objects, which generate a lot of -overhead. This takes around 240 bytes per file without the chunk -list, to be compared to at most 64 bytes of real data (depending on data -alignment), and around 80 bytes per chunk hash (vs 32), with a minimum -of ~250 bytes even if only one chunck hash. The inode number is stored -to make sure we distinguish between different files, as a single path -may not be unique accross different archives in different setups. +The chunk lookup index is stored in ``cache/chunk`` and is indexed on +the ``chunk hash``. It contains: -The ``index.%d`` files are random access but those files can be +* reference count +* size +* ciphered size + +The repository index is stored in ``repo/index.%d`` and is also +indexed on ``chunk hash`` and contains: + +* segment +* offset + +The repository index files are random access but those files can be recreated if damaged or lost using ``check --repair``. +Both indexes are stored as hash tables, directly mapped in memory from +the file content, with only one slot per bucket, but that spreads the +collisions to the following buckets. As a consequence the hash is just +a start position for a linear search, and if the element is not in the +table the index is linearly crossed until an empty bucket is +found. When the table is full at 90% its size is doubled, when it's +empty at 25% its size is halfed. So operations on it have a variable +complexity between constant and linear with low factor, and memory +overhead varies between 10% and 300%. + +The file chunk cache is stored in ``cache/files`` and is indexed on +the ``file path hash`` and contains: + +* age +* inode number +* size +* mtime_ns +* chunks hashes + +The inode number is stored to make sure we distinguish between +different files, as a single path may not be unique accross different +archives in different setups. + +The file chunk cache is stored as a python associative array storing +python objects, which generate a lot of overhead. This takes around +240 bytes per file without the chunk list, to be compared to at most +64 bytes of real data (depending on data alignment), and around 80 +bytes per chunk hash (vs 32), with a minimum of ~250 bytes even if +only one chunck hash. + Indexes memory usage -------------------- @@ -158,9 +229,9 @@ Encryption ---------- AES_ is used with CTR mode of operation (so no need for padding). A 64 -bits initialization vector is used, a SHA256_ based HMAC_ is computed +bits initialization vector is used, a `HMAC-SHA256`_ is computed on the encrypted chunk with a random 64 bits nonce and both are stored -in the chunk. The header of each chunk is : ``TYPE(1)` + +in the chunk. The header of each chunk is : ``TYPE(1)`` + ``HMAC(32)`` + ``NONCE(8)`` + ``CIPHERTEXT``. Encryption and HMAC use two different keys. @@ -185,10 +256,8 @@ Key files When initialized with the ``init -e keyfile`` command, |project_name| needs an associated file in ``$HOME/.attic/keys`` to read and write -the repository. As with most crypto code in |project_name|, the format -of those files is defined in `attic/key.py`_. The format is based on -msgpack_, base64 encoding and PBKDF2_ SHA256 encryption, which is -then encoded again in a msgpack_. +the repository. The format is based on msgpack_, base64 encoding and +PBKDF2_ SHA256 encryption, which is then encoded again in a msgpack_. The internal data structure is as follows: @@ -212,9 +281,9 @@ chunk_seed the seed for the buzhash chunking table (signed 32 bit integer) Those fields are encoded using msgpack_. The utf-8-encoded phassphrase -is encrypted with a PBKDF2_ and SHA256_ using 100000 iterations and a +is encrypted with PBKDF2_ and SHA256_ using 100000 iterations and a random 256 bits salt to give us a derived key. The derived key is 256 -bits long. A HMAC_ SHA256_ checksum of the above fields is generated +bits long. A `HMAC-SHA256`_ checksum of the above fields is generated with the derived key, then the derived key is also used to encrypt the above pack of fields. Then the result is stored in a another msgpack_ formatted as follows: From b7c26735f77a228fd38ecb931ddb8b42b25178e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Tue, 16 Dec 2014 10:59:02 -0500 Subject: [PATCH 012/241] fix formatting issues --- docs/global.rst.inc | 5 +---- docs/internals.rst | 18 +++++++++--------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/docs/global.rst.inc b/docs/global.rst.inc index 72d15126f..08e548012 100644 --- a/docs/global.rst.inc +++ b/docs/global.rst.inc @@ -7,16 +7,13 @@ .. _deduplication: https://en.wikipedia.org/wiki/Data_deduplication .. _AES: https://en.wikipedia.org/wiki/Advanced_Encryption_Standard .. _HMAC-SHA256: http://en.wikipedia.org/wiki/HMAC +.. _SHA256: https://en.wikipedia.org/wiki/SHA-256 .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2 .. _ACL: https://en.wikipedia.org/wiki/Access_control_list .. _github: https://github.com/jborg/attic .. _OpenSSL: https://www.openssl.org/ .. _Python: http://www.python.org/ .. _Buzhash: https://en.wikipedia.org/wiki/Buzhash -.. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2 -.. _SHA256: https://en.wikipedia.org/wiki/SHA-256 -.. _HMAC: https://en.wikipedia.org/wiki/HMAC -.. _AES: https://en.wikipedia.org/wiki/AES .. _msgpack: http://msgpack.org/ .. _`msgpack-python`: https://pypi.python.org/pypi/msgpack-python/ .. _llfuse: https://pypi.python.org/pypi/llfuse/ diff --git a/docs/internals.rst b/docs/internals.rst index 585266a9a..9d6d8b7ac 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -37,13 +37,13 @@ Repository config file ---------------------- Each repository has a ``config`` file which which is a ``INI`` -formatted file which looks like this: +formatted file which looks like this:: - [repository] - version = 1 - segments_per_dir = 10000 - max_segment_size = 5242880 - id = 57d6c1d52ce76a836b532b0e42e677dec6af9fca3673db511279358828a21ed6 + [repository] + version = 1 + segments_per_dir = 10000 + max_segment_size = 5242880 + id = 57d6c1d52ce76a836b532b0e42e677dec6af9fca3673db511279358828a21ed6 This is where the ``repository.id`` is stored. It is a unique identifier for repositories. It will not change if you move the @@ -309,6 +309,6 @@ data described above The resulting msgpack_ is then encoded using base64 and written to the -key file, wrapped using the textwrap_ module with a header. The header -is a single line with the string ``ATTIC_KEY``, a space and a -hexadecimal representation of the repository id. +key file, wrapped using the builtin ``textwrap`` module with a +header. The header is a single line with the string ``ATTIC_KEY``, a +space and a hexadecimal representation of the repository id. From e80e6c4dbb1cf2ca6630e377ab985dbd8fdcba87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Tue, 16 Dec 2014 10:59:12 -0500 Subject: [PATCH 013/241] better titles --- docs/internals.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/internals.rst b/docs/internals.rst index 9d6d8b7ac..cefa8968a 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -33,8 +33,8 @@ index.%d cache of the file indexes. those files can be regenerated with ``check --repair`` -Repository config file ----------------------- +Config file +----------- Each repository has a ``config`` file which which is a ``INI`` formatted file which looks like this:: @@ -53,8 +53,8 @@ the repository in another (even remote) location at a later time. |project_name| will do a POSIX read lock on that file when operating on the repository. -Repository structure --------------------- +Segments and archives +--------------------- |project_name| is a "filesystem based transactional key value store". It makes extensive use of msgpack_ to store data and, unless From 2676c5fae8e10c33f9b91d275633f9812160d852 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Tue, 16 Dec 2014 11:16:30 -0500 Subject: [PATCH 014/241] document that multiple clients can backup to a single remote answer comes from author here: http://librelist.com/browser//attic/2014/11/11/backing-up-multiple-servers-into-a-single-repository/#e96345aa5a3469a87786675d65da492b this should address the last remaining issue in #60 --- docs/faq.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/faq.rst b/docs/faq.rst index 849c82e3e..0844ee327 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -12,6 +12,17 @@ Can I backup VM disk images? Yes, the :ref:`deduplication ` technique used by |project_name| makes sure only the modified parts of the file are stored. +Can I backup from multiple servers into a single repository? + Yes, but in order for the deduplication used by Attic to work, it + needs to keep a local cache containing checksums of all file + chunks already stored in the repository. This cache is stored in + ``~/.cache/attic/``. If Attic detects that a repository has been + modified since the local cache was updated it will need to rebuild + the cache. This rebuild can be quite time consuming. + + So, yes it's possible. But it will be most efficient if a single + repository is only modified from one place. + Which file attributes are preserved? The following attributes are preserved: From b7718f044ddf32b1ad1b3a7d1f0f786787161c4a Mon Sep 17 00:00:00 2001 From: anarcat Date: Wed, 17 Dec 2014 10:11:02 -0500 Subject: [PATCH 015/241] Update internals.rst --- docs/internals.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/internals.rst b/docs/internals.rst index cefa8968a..ef43054ff 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -309,6 +309,6 @@ data described above The resulting msgpack_ is then encoded using base64 and written to the -key file, wrapped using the builtin ``textwrap`` module with a +key file, wrapped using the standard ``textwrap`` module with a header. The header is a single line with the string ``ATTIC_KEY``, a space and a hexadecimal representation of the repository id. From 046b196babc4e5f18bc8f6870ba8b9272898482c Mon Sep 17 00:00:00 2001 From: Daniel Danner Date: Sun, 11 Jan 2015 14:06:59 +0100 Subject: [PATCH 016/241] Only allow whitelisted RPC calls in server mode Without this check, the client is able to call any method of RepositoryServer and Repository, potentially circumventing restrict_to_paths or even run arbitrary code. --- attic/remote.py | 21 ++++++++++++++++++++- attic/testsuite/repository.py | 5 ++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/attic/remote.py b/attic/remote.py index f2a0aed06..5ee91703c 100644 --- a/attic/remote.py +++ b/attic/remote.py @@ -22,8 +22,23 @@ class ConnectionClosed(Error): class PathNotAllowed(Error): """Repository path not allowed""" +class InvalidRPCMethod(Error): + """RPC method is not valid""" class RepositoryServer(object): + rpc_methods = ( + '__len__', + 'check', + 'commit', + 'delete', + 'get', + 'list', + 'negotiate', + 'open', + 'put', + 'repair', + 'rollback', + ) def __init__(self, restrict_to_paths): self.repository = None @@ -47,6 +62,8 @@ class RepositoryServer(object): for type, msgid, method, args in unpacker: method = method.decode('ascii') try: + if not method in self.rpc_methods: + raise InvalidRPCMethod(method) try: f = getattr(self, method) except AttributeError: @@ -155,8 +172,10 @@ class RemoteRepository(object): raise IntegrityError(res) elif error == b'PathNotAllowed': raise PathNotAllowed(*res) - if error == b'ObjectNotFound': + elif error == b'ObjectNotFound': raise Repository.ObjectNotFound(res[0], self.location.orig) + elif error == b'InvalidRPCMethod': + raise InvalidRPCMethod(*res) raise self.RPCError(error) else: yield res diff --git a/attic/testsuite/repository.py b/attic/testsuite/repository.py index 91a822803..e088921cf 100644 --- a/attic/testsuite/repository.py +++ b/attic/testsuite/repository.py @@ -4,7 +4,7 @@ import tempfile from attic.testsuite.mock import patch from attic.hashindex import NSIndex from attic.helpers import Location, IntegrityError, UpgradableLock -from attic.remote import RemoteRepository +from attic.remote import RemoteRepository, InvalidRPCMethod from attic.repository import Repository from attic.testsuite import AtticTestCase @@ -319,6 +319,9 @@ class RemoteRepositoryTestCase(RepositoryTestCase): def open(self, create=False): return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create) + def test_invalid_rpc(self): + self.assert_raises(InvalidRPCMethod, lambda: self.repository.call('__init__', None)) + class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase): From bffc419615301bf8558175652bae4ed75bd3242d Mon Sep 17 00:00:00 2001 From: Daniel Danner Date: Fri, 23 Jan 2015 16:43:45 +0100 Subject: [PATCH 017/241] FUSE: reflect deduplication in allocated blocks Instead of giving all files a fixed block count of 1, this assigns each deduplicated chunk to a certain file. In effect, the cumulative file size that is shown in the mountpoint accurately reflects the amount of actual disk space needed for the repository (barring metadata overhead). Although the block assignment is done arbitrarily, depending on the user's access pattern, the sizes will be consistent within the entire mount point. This facilitates the use of tools like du and ncdu for inspecting the actual disk usage in a repository as opposed to just looking at the original, uncompressed, non-deduplicated file sizes. --- attic/fuse.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/attic/fuse.py b/attic/fuse.py index bc102173e..ad1ae5f87 100644 --- a/attic/fuse.py +++ b/attic/fuse.py @@ -43,6 +43,7 @@ class AtticOperations(llfuse.Operations): self.contents = defaultdict(dict) self.default_dir = {b'mode': 0o40755, b'mtime': int(time.time() * 1e9), b'uid': os.getuid(), b'gid': os.getgid()} self.pending_archives = {} + self.accounted_chunks = {} self.cache = ItemCache() if archive: self.process_archive(archive) @@ -130,8 +131,13 @@ class AtticOperations(llfuse.Operations): def getattr(self, inode): item = self.get_item(inode) size = 0 + dsize = 0 try: - size = sum(size for _, size, _ in item[b'chunks']) + for key, chunksize, _ in item[b'chunks']: + size += chunksize + if self.accounted_chunks.get(key, inode) == inode: + self.accounted_chunks[key] = inode + dsize += chunksize except KeyError: pass entry = llfuse.EntryAttributes() @@ -146,7 +152,7 @@ class AtticOperations(llfuse.Operations): entry.st_rdev = item.get(b'rdev', 0) entry.st_size = size entry.st_blksize = 512 - entry.st_blocks = 1 + entry.st_blocks = dsize / 512 if have_fuse_mtime_ns: entry.st_atime_ns = item[b'mtime'] entry.st_mtime_ns = item[b'mtime'] From 939e75467e586c493e3484ed7fc9d50c753d533f Mon Sep 17 00:00:00 2001 From: Radek Podgorny Date: Wed, 4 Feb 2015 00:04:22 +0100 Subject: [PATCH 018/241] add more .c files to .gitigore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index ec439bf87..0ffe97575 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,10 @@ env .tox hashindex.c chunker.c +crypto.c +platform_darwin.c +platform_freebsd.c +platform_linux.c *.egg-info *.pyc *.pyo From b425950f8b13f48bda2fb3672a36737e801b0e06 Mon Sep 17 00:00:00 2001 From: Evan Hempel Date: Thu, 12 Feb 2015 20:32:20 -0500 Subject: [PATCH 019/241] Extra debug information for 'fread failed' --- attic/_hashindex.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/attic/_hashindex.c b/attic/_hashindex.c index c0c541287..a1f70699d 100644 --- a/attic/_hashindex.c +++ b/attic/_hashindex.c @@ -135,6 +135,7 @@ hashindex_read(const char *path) { FILE *fd; off_t length; + off_t bytes_read; HashHeader header; HashIndex *index = NULL; @@ -142,8 +143,9 @@ hashindex_read(const char *path) EPRINTF_PATH(path, "fopen failed"); return NULL; } - if(fread(&header, 1, sizeof(HashHeader), fd) != sizeof(HashHeader)) { - EPRINTF_PATH(path, "fread failed"); + bytes_read = fread(&header, 1, sizeof(HashHeader), fd); + if(bytes_read != sizeof(HashHeader)) { + EPRINTF_PATH(path, "fread header failed (expected %ld, got %ld)", sizeof(HashHeader), bytes_read); goto fail; } if(fseek(fd, 0, SEEK_END) < 0) { @@ -176,8 +178,9 @@ hashindex_read(const char *path) index = NULL; goto fail; } - if(fread(index->data, 1, length, fd) != length) { - EPRINTF_PATH(path, "fread failed"); + bytes_read = fread(index->data, 1, length, fd); + if(bytes_read != length) { + EPRINTF_PATH(path, "fread hashindex failed (expected %ld, got %ld)", length, bytes_read); free(index->data); free(index); index = NULL; From 0e5ef376bf2b6cc1780e5ffc6aced94443e83283 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 28 Feb 2015 02:33:35 +0100 Subject: [PATCH 020/241] add .idea/ to gitignore, so PyCharm stuff does not get accidentally added/committed --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ec439bf87..23d766386 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ chunker.c *.pyo *.so docs/usage/*.inc +.idea/ From 74768511e1060f969221a94ea6fb6cc5ecab8742 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 28 Feb 2015 02:45:21 +0100 Subject: [PATCH 021/241] do os.fsync like recommended in the python docs (gets a fileno, not file object, also do a flush first) --- attic/helpers.py | 2 +- attic/repository.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/attic/helpers.py b/attic/helpers.py index ac5266980..04f92336d 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -481,7 +481,7 @@ def write_msgpack(filename, d): with open(filename + '.tmp', 'wb') as fd: msgpack.pack(d, fd) fd.flush() - os.fsync(fd) + os.fsync(fd.fileno()) os.rename(filename + '.tmp', filename) diff --git a/attic/repository.py b/attic/repository.py index eed85dc43..f3a721774 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -577,6 +577,7 @@ class LoggedIO(object): if self._write_fd: self.segment += 1 self.offset = 0 - os.fsync(self._write_fd) + self._write_fd.flush() + os.fsync(self._write_fd.fileno()) self._write_fd.close() self._write_fd = None From a8ce0b8b8a3f0cf98268d50c9c8f0dfde912d222 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 28 Feb 2015 03:09:01 +0100 Subject: [PATCH 022/241] remove unused imports --- attic/_version.py | 2 +- attic/helpers.py | 1 - attic/repository.py | 1 - attic/testsuite/helpers.py | 2 +- 4 files changed, 2 insertions(+), 4 deletions(-) diff --git a/attic/_version.py b/attic/_version.py index adcaf93b5..f87a0f5f2 100644 --- a/attic/_version.py +++ b/attic/_version.py @@ -15,7 +15,7 @@ git_full = "$Format:%H$" import subprocess -import sys + def run_command(args, cwd=None, verbose=False): try: diff --git a/attic/helpers.py b/attic/helpers.py index 04f92336d..7c3de34de 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -5,7 +5,6 @@ import msgpack import os import pwd import re -import stat import sys import time from datetime import datetime, timezone, timedelta diff --git a/attic/repository.py b/attic/repository.py index f3a721774..0ee85953a 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -3,7 +3,6 @@ from binascii import hexlify from itertools import islice import errno import os -import shutil import struct import sys from zlib import crc32 diff --git a/attic/testsuite/helpers.py b/attic/testsuite/helpers.py index e01b652c0..bd915179f 100644 --- a/attic/testsuite/helpers.py +++ b/attic/testsuite/helpers.py @@ -4,7 +4,7 @@ from datetime import datetime, timezone, timedelta import os import tempfile import unittest -from attic.helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, to_localtime, \ +from attic.helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, \ StableDict, int_to_bigint, bigint_to_int from attic.testsuite import AtticTestCase import msgpack From 8ffd4c492624f3468edd1201c9ec72308f967a3c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 28 Feb 2015 03:24:30 +0100 Subject: [PATCH 023/241] convert docstrings to triple-double-quoted --- attic/repository.py | 2 +- attic/testsuite/helpers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/attic/repository.py b/attic/repository.py index 0ee85953a..b52d41c61 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -41,7 +41,7 @@ class Repository(object): """{} is not a valid repository""" class CheckNeeded(Error): - '''Inconsistency detected. Please run "attic check {}"''' + """Inconsistency detected. Please run "attic check {}".""" class ObjectNotFound(Error): """Object with key {} not found in repository {}""" diff --git a/attic/testsuite/helpers.py b/attic/testsuite/helpers.py index bd915179f..472b8e2e4 100644 --- a/attic/testsuite/helpers.py +++ b/attic/testsuite/helpers.py @@ -139,7 +139,7 @@ class PruneSplitTestCase(AtticTestCase): def test(self): def local_to_UTC(month, day): - 'Convert noon on the month and day in 2013 to UTC.' + """Convert noon on the month and day in 2013 to UTC.""" seconds = mktime(strptime('2013-%02d-%02d 12:00' % (month, day), '%Y-%m-%d %H:%M')) return datetime.fromtimestamp(seconds, tz=timezone.utc) From db7ce095d27e2ec6d2c52dc83b40b8fc581f6f37 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 28 Feb 2015 03:43:08 +0100 Subject: [PATCH 024/241] fix typos / spelling --- attic/_version.py | 2 +- attic/repository.py | 6 +++--- attic/testsuite/__init__.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/attic/_version.py b/attic/_version.py index f87a0f5f2..bb5f6079b 100644 --- a/attic/_version.py +++ b/attic/_version.py @@ -1,7 +1,7 @@ IN_LONG_VERSION_PY = True # This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag +# git-archive tarball (such as those provided by github's download-from-tag # feature). Distribution tarballs (build by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. diff --git a/attic/repository.py b/attic/repository.py index b52d41c61..d2905727f 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -79,9 +79,9 @@ class Repository(object): config.write(fd) def get_index_transaction_id(self): - indicies = sorted((int(name[6:]) for name in os.listdir(self.path) if name.startswith('index.') and name[6:].isdigit())) - if indicies: - return indicies[-1] + indices = sorted((int(name[6:]) for name in os.listdir(self.path) if name.startswith('index.') and name[6:].isdigit())) + if indices: + return indices[-1] else: return None diff --git a/attic/testsuite/__init__.py b/attic/testsuite/__init__.py index 684eeb91b..6b9d4fe91 100644 --- a/attic/testsuite/__init__.py +++ b/attic/testsuite/__init__.py @@ -19,7 +19,7 @@ except ImportError: has_lchflags = hasattr(os, 'lchflags') -# The mtime get/set precison varies on different OS and Python versions +# The mtime get/set precision varies on different OS and Python versions if 'HAVE_FUTIMENS' in getattr(posix, '_have_functions', []): st_mtime_ns_round = 0 elif 'HAVE_UTIMES' in sysconfig.get_config_vars(): @@ -68,7 +68,7 @@ class AtticTestCase(unittest.TestCase): if has_lchflags: attrs.append('st_flags') if not fuse or not os.path.isdir(path1): - # dir nlink is always 1 on our fuse fileystem + # dir nlink is always 1 on our fuse filesystem attrs.append('st_nlink') d1 = [filename] + [getattr(s1, a) for a in attrs] d2 = [filename] + [getattr(s2, a) for a in attrs] @@ -109,7 +109,7 @@ def get_tests(suite): class TestLoader(unittest.TestLoader): - """A customzied test loader that properly detects and filters our test cases + """A customized test loader that properly detects and filters our test cases """ def loadTestsFromName(self, pattern, module=None): From a3f335e0ff0021f52f3225b9a583d56025397678 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 1 Mar 2015 04:29:44 +0100 Subject: [PATCH 025/241] create: if "-" is given as path, read binary from stdin --- attic/archive.py | 17 +++++++++++++++++ attic/archiver.py | 8 ++++++++ 2 files changed, 25 insertions(+) diff --git a/attic/archive.py b/attic/archive.py index d78ce4b85..0bf1bd58a 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -381,6 +381,23 @@ class Archive: item.update(self.stat_attrs(st, path)) self.add_item(item) + def process_stdin(self, path, cache): + uid, gid = 0, 0 + fd = sys.stdin.buffer # binary + chunks = [] + for chunk in self.chunker.chunkify(fd): + chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats)) + self.stats.nfiles += 1 + item = { + b'path': path, + b'chunks': chunks, + b'mode': 0o100660, # regular file, ug=rw + b'uid': uid, b'user': uid2user(uid), + b'gid': gid, b'group': gid2group(gid), + b'mtime': int_to_bigint(int(time.time()) * 1000000000) + } + self.add_item(item) + def process_file(self, path, st, cache): safe_path = make_path_safe(path) # Is it a hard link? diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d4..585f01537 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -116,6 +116,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") except IOError: pass for path in args.paths: + if path == '-': # stdin + path = 'stdin' + self.print_verbose(path) + try: + archive.process_stdin(path, cache) + except IOError as e: + self.print_error('%s: %s', path, e) + continue path = os.path.normpath(path) if args.dontcross: try: From aab900b16902308feef218ed0f21b90604437faa Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 1 Mar 2015 05:07:29 +0100 Subject: [PATCH 026/241] extract: if --stdout is given, write all extracted binary data to stdout --- attic/archive.py | 11 +++++++---- attic/archiver.py | 8 ++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/attic/archive.py b/attic/archive.py index 0bf1bd58a..acb9934d9 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -230,11 +230,14 @@ class Archive: cache.rollback() return stats - def extract_item(self, item, restore_attrs=True, dry_run=False): - if dry_run: + def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False): + if dry_run or stdout: if b'chunks' in item: - for _ in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True): - pass + for data in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True): + if stdout: + sys.stdout.buffer.write(data) + if stdout: + sys.stdout.buffer.flush() return dest = self.cwd diff --git a/attic/archiver.py b/attic/archiver.py index 585f01537..728393efa 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -202,6 +202,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") numeric_owner=args.numeric_owner) patterns = adjust_patterns(args.paths, args.excludes) dry_run = args.dry_run + stdout = args.stdout strip_components = args.strip_components dirs = [] for item in archive.iter_items(lambda item: not exclude_path(item[b'path'], patterns), preload=True): @@ -212,7 +213,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") continue if not args.dry_run: while dirs and not item[b'path'].startswith(dirs[-1][b'path']): - archive.extract_item(dirs.pop(-1)) + archive.extract_item(dirs.pop(-1), stdout=stdout) self.print_verbose(remove_surrogates(orig_path)) try: if dry_run: @@ -222,7 +223,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") dirs.append(item) archive.extract_item(item, restore_attrs=False) else: - archive.extract_item(item) + archive.extract_item(item, stdout=stdout) except IOError as e: self.print_error('%s: %s', remove_surrogates(orig_path), e) @@ -592,6 +593,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('--strip-components', dest='strip_components', type=int, default=0, metavar='NUMBER', help='Remove the specified number of leading path elements. Pathnames with fewer elements will be silently skipped.') + subparser.add_argument('--stdout', dest='stdout', + action='store_true', default=False, + help='write all extracted data to stdout') subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to extract') From 6c7c2e2e400891012c16798f2fe18d2f257d852b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 3 Mar 2015 19:19:28 +0100 Subject: [PATCH 027/241] cleanup crypto.pyx, make it easier to adapt to other modes There were some small issues: a) it never called EVP_EncryptFinal_ex. For CTR mode, this had no visible consequences as EVP_EncryptUpdate already yielded all ciphertext. For cleanliness and to have correctness even in other modes, the missing call was added. b) decrypt = encrypt hack This is a nice hack to abbreviate, but it only works for modes without padding and without authentication. For cleanliness and to have correctness even in other modes, the missing usage of the decrypt api was added. c) outl == inl assumption Again, True for CTR mode, but not for padding or authenticating modes. Fixed so it computes the ciphertext / plaintext length based on api return values. Other changes: As encrypt and decrypt API calls are different even for initialization/reset, added a is_encrypt flag. Defensive output buffer allocation. Added the length of one extra AES block (16bytes) so it would work even with padding modes. 16bytes are needed because a full block of padding might get added when the plaintext was a multiple of aes block size. These changes are based on some experimental code I did for aes-cbc and aes-gcm. While we likely won't ever want aes-cbc in attic (maybe gcm though?), I think it is cleaner to not make too many mode specific assumptions and hacks, but just use the API as it was meant to be used. --- attic/crypto.pyx | 61 +++++++++++++++++++++++++++++++++------ attic/key.py | 8 ++--- attic/testsuite/crypto.py | 12 +++++--- 3 files changed, 64 insertions(+), 17 deletions(-) diff --git a/attic/crypto.pyx b/attic/crypto.pyx index 1a5bc87b1..6a75dcaa2 100644 --- a/attic/crypto.pyx +++ b/attic/crypto.pyx @@ -28,8 +28,16 @@ cdef extern from "openssl/evp.h": int EVP_EncryptInit_ex(EVP_CIPHER_CTX *ctx,const EVP_CIPHER *cipher, ENGINE *impl, const unsigned char *key, const unsigned char *iv) + int EVP_DecryptInit_ex(EVP_CIPHER_CTX *ctx,const EVP_CIPHER *cipher, ENGINE *impl, + const unsigned char *key, const unsigned char *iv) int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, const unsigned char *in_, int inl) + int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, + int *outl, const unsigned char *in_, int inl) + int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, + int *outl) + int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, + int *outl) int PKCS5_PBKDF2_HMAC(const char *password, int passwordlen, const unsigned char *salt, int saltlen, int iter, @@ -85,11 +93,19 @@ cdef class AES: """A thin wrapper around the OpenSSL EVP cipher API """ cdef EVP_CIPHER_CTX ctx + cdef int is_encrypt - def __cinit__(self, key, iv=None): + def __cinit__(self, is_encrypt, key, iv=None): EVP_CIPHER_CTX_init(&self.ctx) - if not EVP_EncryptInit_ex(&self.ctx, EVP_aes_256_ctr(), NULL, NULL, NULL): - raise Exception('EVP_EncryptInit_ex failed') + self.is_encrypt = is_encrypt + # Set cipher type and mode + cipher_mode = EVP_aes_256_ctr() + if self.is_encrypt: + if not EVP_EncryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL): + raise Exception('EVP_EncryptInit_ex failed') + else: # decrypt + if not EVP_DecryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL): + raise Exception('EVP_DecryptInit_ex failed') self.reset(key, iv) def __dealloc__(self): @@ -102,8 +118,13 @@ cdef class AES: key2 = key if iv: iv2 = iv - if not EVP_EncryptInit_ex(&self.ctx, NULL, NULL, key2, iv2): - raise Exception('EVP_EncryptInit_ex failed') + # Initialise key and IV + if self.is_encrypt: + if not EVP_EncryptInit_ex(&self.ctx, NULL, NULL, key2, iv2): + raise Exception('EVP_EncryptInit_ex failed') + else: # decrypt + if not EVP_DecryptInit_ex(&self.ctx, NULL, NULL, key2, iv2): + raise Exception('EVP_DecryptInit_ex failed') @property def iv(self): @@ -111,15 +132,37 @@ cdef class AES: def encrypt(self, data): cdef int inl = len(data) - cdef int outl - cdef unsigned char *out = malloc(inl) + cdef int ctl = 0 + cdef int outl = 0 + # note: modes that use padding, need up to one extra AES block (16b) + cdef unsigned char *out = malloc(inl+16) if not out: raise MemoryError try: if not EVP_EncryptUpdate(&self.ctx, out, &outl, data, inl): raise Exception('EVP_EncryptUpdate failed') - return out[:inl] + ctl = outl + if not EVP_EncryptFinal_ex(&self.ctx, out+ctl, &outl): + raise Exception('EVP_EncryptFinal failed') + ctl += outl + return out[:ctl] finally: free(out) - decrypt = encrypt + def decrypt(self, data): + cdef int inl = len(data) + cdef int ptl = 0 + cdef int outl = 0 + cdef unsigned char *out = malloc(inl) + if not out: + raise MemoryError + try: + if not EVP_DecryptUpdate(&self.ctx, out, &outl, data, inl): + raise Exception('EVP_DecryptUpdate failed') + ptl = outl + if EVP_DecryptFinal_ex(&self.ctx, out+outl, &outl) <= 0: + raise Exception('EVP_DecryptFinal failed') + ptl += outl + return out[:ptl] + finally: + free(out) diff --git a/attic/key.py b/attic/key.py index ef623f36c..1dbd279a9 100644 --- a/attic/key.py +++ b/attic/key.py @@ -144,8 +144,8 @@ class AESKeyBase(KeyBase): self.chunk_seed = self.chunk_seed - 0xffffffff - 1 def init_ciphers(self, enc_iv=b''): - self.enc_cipher = AES(self.enc_key, enc_iv) - self.dec_cipher = AES(self.enc_key) + self.enc_cipher = AES(is_encrypt=True, key=self.enc_key, iv=enc_iv) + self.dec_cipher = AES(is_encrypt=False, key=self.enc_key) class PassphraseKey(AESKeyBase): @@ -244,7 +244,7 @@ class KeyfileKey(AESKeyBase): assert d[b'version'] == 1 assert d[b'algorithm'] == b'sha256' key = pbkdf2_sha256(passphrase.encode('utf-8'), d[b'salt'], d[b'iterations'], 32) - data = AES(key).decrypt(d[b'data']) + data = AES(is_encrypt=False, key=key).decrypt(d[b'data']) if HMAC(key, data, sha256).digest() != d[b'hash']: return None return data @@ -254,7 +254,7 @@ class KeyfileKey(AESKeyBase): iterations = 100000 key = pbkdf2_sha256(passphrase.encode('utf-8'), salt, iterations, 32) hash = HMAC(key, data, sha256).digest() - cdata = AES(key).encrypt(data) + cdata = AES(is_encrypt=True, key=key).encrypt(data) d = { 'version': 1, 'salt': salt, diff --git a/attic/testsuite/crypto.py b/attic/testsuite/crypto.py index b67d186ca..304ef97c0 100644 --- a/attic/testsuite/crypto.py +++ b/attic/testsuite/crypto.py @@ -30,11 +30,15 @@ class CryptoTestCase(AtticTestCase): def test_aes(self): key = b'X' * 32 data = b'foo' * 10 - aes = AES(key) + # encrypt + aes = AES(is_encrypt=True, key=key) self.assert_equal(bytes_to_long(aes.iv, 8), 0) cdata = aes.encrypt(data) self.assert_equal(hexlify(cdata), b'c6efb702de12498f34a2c2bbc8149e759996d08bf6dc5c610aefc0c3a466') self.assert_equal(bytes_to_long(aes.iv, 8), 2) - self.assert_not_equal(data, aes.decrypt(cdata)) - aes.reset(iv=b'\0' * 16) - self.assert_equal(data, aes.decrypt(cdata)) + # decrypt + aes = AES(is_encrypt=False, key=key) + self.assert_equal(bytes_to_long(aes.iv, 8), 0) + pdata = aes.decrypt(cdata) + self.assert_equal(data, pdata) + self.assert_equal(bytes_to_long(aes.iv, 8), 2) From 550320535c89a189516838ad3e67506f98e46d08 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 3 Mar 2015 20:11:28 +0100 Subject: [PATCH 028/241] crypto.pyx: cosmetic changes, added comments --- attic/crypto.pyx | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/attic/crypto.pyx b/attic/crypto.pyx index 6a75dcaa2..20e6c05cd 100644 --- a/attic/crypto.pyx +++ b/attic/crypto.pyx @@ -8,7 +8,7 @@ from libc.stdlib cimport malloc, free API_VERSION = 2 cdef extern from "openssl/rand.h": - int RAND_bytes(unsigned char *buf,int num) + int RAND_bytes(unsigned char *buf, int num) cdef extern from "openssl/evp.h": @@ -26,18 +26,16 @@ cdef extern from "openssl/evp.h": void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *a) void EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *a) - int EVP_EncryptInit_ex(EVP_CIPHER_CTX *ctx,const EVP_CIPHER *cipher, ENGINE *impl, + int EVP_EncryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ENGINE *impl, const unsigned char *key, const unsigned char *iv) - int EVP_DecryptInit_ex(EVP_CIPHER_CTX *ctx,const EVP_CIPHER *cipher, ENGINE *impl, + int EVP_DecryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ENGINE *impl, const unsigned char *key, const unsigned char *iv) - int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, - int *outl, const unsigned char *in_, int inl) - int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, - int *outl, const unsigned char *in_, int inl) - int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, - int *outl) - int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, - int *outl) + int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, + const unsigned char *in_, int inl) + int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, + const unsigned char *in_, int inl) + int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) + int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) int PKCS5_PBKDF2_HMAC(const char *password, int passwordlen, const unsigned char *salt, int saltlen, int iter, @@ -55,7 +53,8 @@ long_to_bytes = lambda x: _long.pack(x) def num_aes_blocks(length): - """Return the number of AES blocks required to encrypt/decrypt *length* bytes of data + """Return the number of AES blocks required to encrypt/decrypt *length* bytes of data. + Note: this is only correct for modes without padding, like AES-CTR. """ return (length + 15) // 16 @@ -160,7 +159,10 @@ cdef class AES: if not EVP_DecryptUpdate(&self.ctx, out, &outl, data, inl): raise Exception('EVP_DecryptUpdate failed') ptl = outl - if EVP_DecryptFinal_ex(&self.ctx, out+outl, &outl) <= 0: + if EVP_DecryptFinal_ex(&self.ctx, out+ptl, &outl) <= 0: + # this error check is very important for modes with padding or + # authentication. for them, a failure here means corrupted data. + # CTR mode does not use padding nor authentication. raise Exception('EVP_DecryptFinal failed') ptl += outl return out[:ptl] From 1f4077d870f793d0d589d29ef1eed5f11f956a11 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 3 Mar 2015 20:18:28 +0100 Subject: [PATCH 029/241] crypto.pyx: adapt to strange requirements found in the openssl docs https://www.openssl.org/docs/crypto/EVP_aes_256_cbc.html EVP_DecryptInit_ex(), EVP_DecryptUpdate() and EVP_DecryptFinal_ex() are the corresponding decryption operations. EVP_DecryptFinal() will return an error code if padding is enabled and the final block is not correctly formatted. The parameters and restrictions are identical to the encryption operations except that if padding is enabled the decrypted data buffer out passed to EVP_DecryptUpdate() should have sufficient room for (inl + cipher_block_size) bytes unless the cipher block size is 1 in which case inl bytes is sufficient. I doubt this is correct, but let's rather be defensive here. --- attic/crypto.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/attic/crypto.pyx b/attic/crypto.pyx index 20e6c05cd..61dbc42d5 100644 --- a/attic/crypto.pyx +++ b/attic/crypto.pyx @@ -152,7 +152,10 @@ cdef class AES: cdef int inl = len(data) cdef int ptl = 0 cdef int outl = 0 - cdef unsigned char *out = malloc(inl) + # note: modes that use padding, need up to one extra AES block (16b). + # This is what the openssl docs say. I am not sure this is correct, + # but OTOH it will not cause any harm if our buffer is a little bigger. + cdef unsigned char *out = malloc(inl+16) if not out: raise MemoryError try: From 8f8a035e9322834bd1657b63b1124a172a3fcd36 Mon Sep 17 00:00:00 2001 From: anarcat Date: Thu, 5 Mar 2015 08:41:48 -0500 Subject: [PATCH 030/241] fix a bunch of typos this should fix the comments identified as `typo` and other small quirks found by @ThomasWaldmann. --- docs/internals.rst | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/internals.rst b/docs/internals.rst index ef43054ff..598c26eb3 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -113,7 +113,7 @@ object that contain metadata: * time Each item represents a file or directory or -symlink is stored as a ``item`` dictionnary that contains: +symlink is stored as an ``item`` dictionary that contains: * path * list of chunks @@ -135,7 +135,7 @@ it and it is reset every time an inode's metadata is changed. All items are serialized using msgpack and the resulting byte stream is fed into the same chunker used for regular file data and turned into deduplicated chunks. The reference to these chunks is then added -to the archvive metadata. This allows the archive to store many files, +to the archive metadata. This allows the archive to store many files, beyond the ``MAX_OBJECT_SIZE`` barrier of 20MB. A chunk is an object as well, of course, and its id is the hash of its @@ -199,7 +199,7 @@ the ``file path hash`` and contains: * chunks hashes The inode number is stored to make sure we distinguish between -different files, as a single path may not be unique accross different +different files, as a single path may not be unique across different archives in different setups. The file chunk cache is stored as a python associative array storing @@ -207,7 +207,7 @@ python objects, which generate a lot of overhead. This takes around 240 bytes per file without the chunk list, to be compared to at most 64 bytes of real data (depending on data alignment), and around 80 bytes per chunk hash (vs 32), with a minimum of ~250 bytes even if -only one chunck hash. +only one chunk hash. Indexes memory usage -------------------- @@ -238,12 +238,12 @@ two different keys. In AES CTR mode you can think of the IV as the start value for the counter. The counter itself is incremented by one after each 16 byte block. The IV/counter is not required to be random but it must NEVER be -reused. So to accomplish this Attic initializes the encryption counter +reused. So to accomplish this |project_name| initializes the encryption counter to be higher than any previously used counter value before encrypting new data. To reduce payload size only 8 bytes of the 16 bytes nonce is saved in -the payload, the first 8 bytes are always zeros. This does not affect +the payload, the first 8 bytes are always zeroes. This does not affect security but limits the maximum repository capacity to only 295 exabytes (2**64 * 16 bytes). @@ -280,7 +280,7 @@ id_key chunk_seed the seed for the buzhash chunking table (signed 32 bit integer) -Those fields are encoded using msgpack_. The utf-8-encoded phassphrase +Those fields are processed using msgpack_. The utf-8 encoded phassphrase is encrypted with PBKDF2_ and SHA256_ using 100000 iterations and a random 256 bits salt to give us a derived key. The derived key is 256 bits long. A `HMAC-SHA256`_ checksum of the above fields is generated @@ -292,20 +292,20 @@ version currently always an integer, 1 salt - random 256 bits salt used to encrypt the passphrase + random 256 bits salt used to process the passphrase iterations - number of iterations used to encrypt the passphrase (currently 100000) + number of iterations used to process the passphrase (currently 100000) algorithm - the hashing algorithm used to encrypt the passphrase and do the HMAC + the hashing algorithm used to process the passphrase and do the HMAC checksum (currently the string ``sha256``) hash - the HMAC checksum of the encrypted derived key + the HMAC of the encrypted derived key data - the derived key, encrypted with AES over a PBKDF2_ SHA256 hash + the derived key, encrypted with AES over a PBKDF2_ SHA256 key described above The resulting msgpack_ is then encoded using base64 and written to the From 87cb4a481335da75b00cfcacf78adff6a68ee496 Mon Sep 17 00:00:00 2001 From: anarcat Date: Thu, 5 Mar 2015 08:48:23 -0500 Subject: [PATCH 031/241] expand on the chunk id hash mechanism according to @ThomasWaldmann, the algorithm varies according to whether encryption is enabled. --- docs/internals.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/internals.rst b/docs/internals.rst index 598c26eb3..45daa52b7 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -138,8 +138,8 @@ into deduplicated chunks. The reference to these chunks is then added to the archive metadata. This allows the archive to store many files, beyond the ``MAX_OBJECT_SIZE`` barrier of 20MB. -A chunk is an object as well, of course, and its id is the hash of its -(unencrypted and uncompressed) content. +A chunk is an object as well, of course. The chunk id is either +HMAC-SHA256_, when encryption is used, or a SHA256_ hash otherwise. Hints are stored in a file (``repo/hints``) and contain: From 0ba86357d77e7b9e4c14017efff0d0ee613da54f Mon Sep 17 00:00:00 2001 From: anarcat Date: Thu, 5 Mar 2015 08:51:26 -0500 Subject: [PATCH 032/241] clarify that 4095 bytes is not a typo i am actually assuming this right now, i haven't double-checked --- docs/internals.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/internals.rst b/docs/internals.rst index 45daa52b7..bd4022c1b 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -151,7 +151,7 @@ Chunks ------ |project_name| uses a rolling checksum with Buzhash_ algorithm, with -window size of 4095 bytes, with a minimum of 1024, and triggers when +window size of 4095 bytes (`0xFFF`), with a minimum of 1024, and triggers when the last 16 bits of the checksum are null, producing chunks of 64kB on average. All these parameters are fixed. The buzhash table is altered by XORing it with a seed randomly generated once for the archive, and From 5f882e976d0d7746d8b9e54416175a4ef0bb4b50 Mon Sep 17 00:00:00 2001 From: anarcat Date: Thu, 5 Mar 2015 08:57:52 -0500 Subject: [PATCH 033/241] clarify the index memory usage analysis it seems I extracted that data from [this mailing list post][] which in turn takes it from [this github comment][]. [this mailing list post]: http://librelist.com/browser/attic/2014/5/6/questions-and-suggestions-about-inner-working-of-attic/ [this github comment]: https://github.com/jborg/attic/issues/26#issuecomment-35439254 --- docs/internals.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/internals.rst b/docs/internals.rst index bd4022c1b..9172ec204 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -213,7 +213,7 @@ Indexes memory usage -------------------- Here is the estimated memory usage of |project_name| when using those -indexes: +indexes. Repository index 40 bytes x N ~ 200MB (If a remote repository is @@ -225,6 +225,9 @@ Chunk lookup index File chunk cache probably 80-100 bytes x N ~ 400MB +In the above we assume 350GB of data that we divide on an average 64KB +chunk size, so N is around 5.3 million. + Encryption ---------- From ecee5a0b514845e2957f0bb8055de89d2a25792c Mon Sep 17 00:00:00 2001 From: anarcat Date: Thu, 5 Mar 2015 09:00:06 -0500 Subject: [PATCH 034/241] PDKF is a key derivation function do not use the word "encryption", as it is actually closer to "hashing" anyways. --- docs/internals.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/internals.rst b/docs/internals.rst index 9172ec204..52e2938a9 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -260,7 +260,7 @@ Key files When initialized with the ``init -e keyfile`` command, |project_name| needs an associated file in ``$HOME/.attic/keys`` to read and write the repository. The format is based on msgpack_, base64 encoding and -PBKDF2_ SHA256 encryption, which is then encoded again in a msgpack_. +PBKDF2_ SHA256 hashing, which is then encoded again in a msgpack_. The internal data structure is as follows: From effab97d11de60d93c6af68a6c947c39d17be0e9 Mon Sep 17 00:00:00 2001 From: anarcat Date: Thu, 5 Mar 2015 09:06:20 -0500 Subject: [PATCH 035/241] clarify that simultaneous backups may be a problem --- docs/faq.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/faq.rst b/docs/faq.rst index 0844ee327..cb077cc05 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -21,7 +21,9 @@ Can I backup from multiple servers into a single repository? the cache. This rebuild can be quite time consuming. So, yes it's possible. But it will be most efficient if a single - repository is only modified from one place. + repository is only modified from one place. Also keep in mind that + Attic will keep an exclusive lock on the repository while creating + or deleting archives, which may make *simultaneous* backups fail. Which file attributes are preserved? The following attributes are preserved: From e425545c10cb146674ecf38ceab2e020a9cb8374 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 8 Mar 2015 02:32:33 +0100 Subject: [PATCH 036/241] datetime does not like the year 10.000, fixes issue #139 --- attic/archiver.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d4..40d032155 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -281,7 +281,11 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") size = sum(size for _, size, _ in item[b'chunks']) except KeyError: pass - mtime = format_time(datetime.fromtimestamp(bigint_to_int(item[b'mtime']) / 1e9)) + try: + mtime = datetime.fromtimestamp(bigint_to_int(item[b'mtime']) / 1e9) + except ValueError: + # likely a broken mtime and datetime did not want to go beyond year 9999 + mtime = datetime(9999, 12, 31, 23, 59, 59) if b'source' in item: if type == 'l': extra = ' -> %s' % item[b'source'] @@ -291,7 +295,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") else: extra = '' print('%s%s %-6s %-6s %8d %s %s%s' % (type, mode, item[b'user'] or item[b'uid'], - item[b'group'] or item[b'gid'], size, mtime, + item[b'group'] or item[b'gid'], size, format_time(mtime), remove_surrogates(item[b'path']), extra)) else: for archive in sorted(Archive.list_archives(repository, key, manifest), key=attrgetter('ts')): From be29e5f6f4a41789108581af684780f21345f564 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 8 Mar 2015 04:19:25 +0100 Subject: [PATCH 037/241] fix traceback when trying to do unsupported passphrase change, fixes #189 --- attic/key.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/attic/key.py b/attic/key.py index ef623f36c..0ac48cdb1 100644 --- a/attic/key.py +++ b/attic/key.py @@ -190,6 +190,12 @@ class PassphraseKey(AESKeyBase): except IntegrityError: passphrase = getpass(prompt) + def change_passphrase(self): + class ImmutablePassphraseError(Error): + """The passphrase for this encryption key type can't be changed.""" + + raise ImmutablePassphraseError + def init(self, repository, passphrase): self.init_from_random_data(pbkdf2_sha256(passphrase.encode('utf-8'), repository.id, self.iterations, 100)) self.init_ciphers() From 463393141394adce7320d5c4116b4fc6012e5b3d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 8 Mar 2015 15:01:24 +0100 Subject: [PATCH 038/241] add global option --no-cache-files to lower memory consumption When given, attic does not use the "files" cache. Saves about 240B RAM per file (that sounds only a little, but consider that backups nowadays are often millions of files). So try this if attic eats more memory than you have as RAM (usually means paging or MemoryErrors). Of course, saving memory is not for free. In my one experiment, run time increased from 3.5 to 23 minutes (my system has enough RAM). --- attic/archiver.py | 11 +++++++---- attic/cache.py | 8 +++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d4..573021462 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -97,7 +97,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") t0 = datetime.now() repository = self.open_repository(args.archive, exclusive=True) manifest, key = Manifest.load(repository) - cache = Cache(repository, key, manifest) + cache = Cache(repository, key, manifest, do_files=args.cache_files) archive = Archive(repository, key, manifest, args.archive.archive, cache=cache, create=True, checkpoint_interval=args.checkpoint_interval, numeric_owner=args.numeric_owner) @@ -227,7 +227,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") """Delete an existing archive""" repository = self.open_repository(args.archive, exclusive=True) manifest, key = Manifest.load(repository) - cache = Cache(repository, key, manifest) + cache = Cache(repository, key, manifest, do_files=args.cache_files) archive = Archive(repository, key, manifest, args.archive.archive, cache=cache) stats = Statistics() archive.delete(stats) @@ -302,7 +302,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") """Show archive details such as disk space used""" repository = self.open_repository(args.archive) manifest, key = Manifest.load(repository) - cache = Cache(repository, key, manifest) + cache = Cache(repository, key, manifest, do_files=args.cache_files) archive = Archive(repository, key, manifest, args.archive.archive, cache=cache) stats = archive.calc_stats(cache) print('Name:', archive.name) @@ -319,7 +319,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") """Prune repository archives according to specified rules""" repository = self.open_repository(args.repository, exclusive=True) manifest, key = Manifest.load(repository) - cache = Cache(repository, key, manifest) + cache = Cache(repository, key, manifest, do_files=args.cache_files) archives = list(sorted(Archive.list_archives(repository, key, manifest, cache), key=attrgetter('ts'), reverse=True)) if args.hourly + args.daily + args.weekly + args.monthly + args.yearly == 0 and args.within is None: @@ -447,6 +447,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") common_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', default=False, help='verbose output') + common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false', + default=True, + help='do not use the "files" cache') # We can't use argparse for "serve" since we don't want it to show up in "Available commands" if args: diff --git a/attic/cache.py b/attic/cache.py index acbc76653..ce5996564 100644 --- a/attic/cache.py +++ b/attic/cache.py @@ -16,13 +16,14 @@ class Cache(object): class RepositoryReplay(Error): """Cache is newer than repository, refusing to continue""" - def __init__(self, repository, key, manifest, path=None, sync=True): + def __init__(self, repository, key, manifest, path=None, sync=True, do_files=False): self.timestamp = None self.txn_active = False self.repository = repository self.key = key self.manifest = manifest self.path = path or os.path.join(get_cache_dir(), hexlify(repository.id).decode('ascii')) + self.do_files = do_files if not os.path.exists(self.path): self.create() self.open() @@ -83,6 +84,7 @@ class Cache(object): u.feed(data) for path_hash, item in u: item[0] += 1 + # in the end, this takes about 240 Bytes per file self.files[path_hash] = msgpack.packb(item) def begin_txn(self): @@ -206,6 +208,8 @@ class Cache(object): stats.update(-size, -csize, False) def file_known_and_unchanged(self, path_hash, st): + if not self.do_files: + return None if self.files is None: self._read_files() entry = self.files.get(path_hash) @@ -221,6 +225,8 @@ class Cache(object): return None def memorize_file(self, path_hash, st, ids): + if not self.do_files: + return # Entry: Age, inode, size, mtime, chunk ids mtime_ns = st_mtime_ns(st) self.files[path_hash] = msgpack.packb((0, st.st_ino, st.st_size, int_to_bigint(mtime_ns), ids)) From 9841af55425893dbb6aa7e02da6609589dd04972 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 8 Mar 2015 19:18:21 +0100 Subject: [PATCH 039/241] better attic create -v output Added a indicator character to the left for (A)dded, (M)odified, (U)nchanged status of regular files. Lowercase indicators are for special files. You may or may not want to use grep to filter out U and d. --- attic/archive.py | 16 +++++++++++++++- attic/archiver.py | 24 ++++++++++++++++++++---- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/attic/archive.py b/attic/archive.py index d78ce4b85..ed9561a4b 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -374,14 +374,22 @@ class Archive: item = {b'path': make_path_safe(path), b'rdev': st.st_rdev} item.update(self.stat_attrs(st, path)) self.add_item(item) + if stat.S_ISCHR(st.st_mode): + status = 'c' # char device + elif stat.S_ISBLK(st.st_mode): + status = 'b' # block device + return status def process_symlink(self, path, st): source = os.readlink(path) item = {b'path': make_path_safe(path), b'source': source} item.update(self.stat_attrs(st, path)) self.add_item(item) + status = 's' # symlink + return status def process_file(self, path, st, cache): + status = None safe_path = make_path_safe(path) # Is it a hard link? if st.st_nlink > 1: @@ -390,7 +398,8 @@ class Archive: item = self.stat_attrs(st, path) item.update({b'path': safe_path, b'source': source}) self.add_item(item) - return + status = 'h' # regular file, hardlink (to already seen inodes) + return status else: self.hard_links[st.st_ino, st.st_dev] = safe_path path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape')) @@ -403,6 +412,9 @@ class Archive: break else: chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids] + status = 'U' # regular file, unchanged + else: + status = 'A' # regular file, added # Only chunkify the file if needed if chunks is None: with Archive._open_rb(path, st) as fd: @@ -410,10 +422,12 @@ class Archive: for chunk in self.chunker.chunkify(fd): chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats)) cache.memorize_file(path_hash, st, [c[0] for c in chunks]) + status = status or 'M' # regular file, modified (if not 'A' already) item = {b'path': safe_path, b'chunks': chunks} item.update(self.stat_attrs(st, path)) self.stats.nfiles += 1 self.add_item(item) + return status @staticmethod def list_archives(repository, key, manifest, cache=None): diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d4..0bcfd89a1 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -157,16 +157,17 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") # Ignore unix sockets if stat.S_ISSOCK(st.st_mode): return - self.print_verbose(remove_surrogates(path)) + status = None if stat.S_ISREG(st.st_mode): try: - archive.process_file(path, st, cache) + status = archive.process_file(path, st, cache) except IOError as e: self.print_error('%s: %s', path, e) elif stat.S_ISDIR(st.st_mode): if exclude_caches and is_cachedir(path): return archive.process_item(path, st) + status = 'd' # directory try: entries = os.listdir(path) except OSError as e: @@ -176,13 +177,28 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") self._process(archive, cache, excludes, exclude_caches, skip_inodes, os.path.join(path, filename), restrict_dev) elif stat.S_ISLNK(st.st_mode): - archive.process_symlink(path, st) + status = archive.process_symlink(path, st) elif stat.S_ISFIFO(st.st_mode): archive.process_item(path, st) + status = 'f' # fifo elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode): - archive.process_dev(path, st) + status = archive.process_dev(path, st) else: self.print_error('Unknown file type: %s', path) + return + # Status output + # A lowercase character means a file type other than a regular file, + # attic usually just stores them. E.g. (d)irectory. + # Hardlinks to already seen content are indicated by (h). + # A uppercase character means a regular file that was (A)dded, + # (M)odified or was (U)nchanged. + # Note: A/M/U is relative to the "files" cache, not to the repo. + # This would be an issue if the files cache is not used. + if status is None: + status = '?' # need to add a status code somewhere + # output ALL the stuff - it can be easily filtered using grep. + # even stuff considered unchanged might be interesting. + self.print_verbose("%1s %s", status, remove_surrogates(path)) def do_extract(self, args): """Extract archive contents""" From d3fe74d4c081edea8043208a95bfaa168e2f0faa Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 9 Mar 2015 15:17:56 +0100 Subject: [PATCH 040/241] Cache: do not try to release the lock twice If Cache was already closed and __del__ was called, it called close() again and crashed when trying to release the lock again. --- attic/cache.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/attic/cache.py b/attic/cache.py index acbc76653..7bec89c16 100644 --- a/attic/cache.py +++ b/attic/cache.py @@ -18,6 +18,7 @@ class Cache(object): def __init__(self, repository, key, manifest, path=None, sync=True): self.timestamp = None + self.lock = None self.txn_active = False self.repository = repository self.key = key @@ -69,7 +70,9 @@ class Cache(object): self.files = None def close(self): - self.lock.release() + if self.lock: + self.lock.release() + self.lock = None def _read_files(self): self.files = {} From 6e6819e626039e5fd66878ce4b6c8b4b40b73c97 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 9 Mar 2015 16:02:06 +0100 Subject: [PATCH 041/241] attic delete: add repository + local cache deletion --- attic/archiver.py | 41 ++++++++++++++++++++++++++--------------- attic/cache.py | 9 ++++++++- attic/remote.py | 5 ++++- attic/repository.py | 7 +++++++ 4 files changed, 45 insertions(+), 17 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d4..7e6f38297 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -224,18 +224,28 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") return self.exit_code def do_delete(self, args): - """Delete an existing archive""" - repository = self.open_repository(args.archive, exclusive=True) + """Delete an existing repository or archive""" + repository = self.open_repository(args.target, exclusive=True) manifest, key = Manifest.load(repository) cache = Cache(repository, key, manifest) - archive = Archive(repository, key, manifest, args.archive.archive, cache=cache) - stats = Statistics() - archive.delete(stats) - manifest.write() - repository.commit() - cache.commit() - if args.stats: - stats.print_('Deleted data:', cache) + if args.target.archive: + archive = Archive(repository, key, manifest, args.target.archive, cache=cache) + stats = Statistics() + archive.delete(stats) + manifest.write() + repository.commit() + cache.commit() + if args.stats: + stats.print_('Deleted data:', cache) + else: + print("You requested to completely DELETE the repository *including* all archives it contains:") + for archive in sorted(Archive.list_archives(repository, key, manifest), key=attrgetter('ts')): + print(format_archive(archive)) + print("""Type "YES" if you understand this and want to continue.\n""") + if input('Do you want to continue? ') == 'YES': + repository.destroy() + cache.destroy() + print("Repository and corresponding cache were deleted.") return self.exit_code def do_mount(self, args): @@ -591,8 +601,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") help='paths to extract') delete_epilog = textwrap.dedent(""" - This command deletes an archive from the repository. Any disk space not - shared with any other existing archive is also reclaimed. + This command deletes an archive from the repository or the complete repository. + Disk space is reclaimed accordingly. If you delete the complete repository, the + local cache for it (if any) is also deleted. """) subparser = subparsers.add_parser('delete', parents=[common_parser], description=self.do_delete.__doc__, @@ -602,9 +613,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('-s', '--stats', dest='stats', action='store_true', default=False, help='print statistics for the deleted archive') - subparser.add_argument('archive', metavar='ARCHIVE', - type=location_validator(archive=True), - help='archive to delete') + subparser.add_argument('target', metavar='TARGET', + type=location_validator(), + help='archive or repository to delete') list_epilog = textwrap.dedent(""" This command lists the contents of a repository or an archive. diff --git a/attic/cache.py b/attic/cache.py index 7bec89c16..d8170440c 100644 --- a/attic/cache.py +++ b/attic/cache.py @@ -38,7 +38,7 @@ class Cache(object): self.close() def create(self): - """Create a new empty cache at `path` + """Create a new empty cache at `self.path` """ os.makedirs(self.path) with open(os.path.join(self.path, 'README'), 'w') as fd: @@ -54,6 +54,13 @@ class Cache(object): with open(os.path.join(self.path, 'files'), 'w') as fd: pass # empty file + def destroy(self): + """destroy the cache at `self.path` + """ + self.close() + os.remove(os.path.join(self.path, 'config')) # kill config first + shutil.rmtree(self.path) + def open(self): if not os.path.isdir(self.path): raise Exception('%s Does not look like an Attic cache' % self.path) diff --git a/attic/remote.py b/attic/remote.py index f2a0aed06..7169a9eec 100644 --- a/attic/remote.py +++ b/attic/remote.py @@ -218,6 +218,9 @@ class RemoteRepository(object): def rollback(self, *args): return self.call('rollback') + def destroy(self): + return self.call('destroy') + def __len__(self): return self.call('__len__') @@ -312,4 +315,4 @@ class RepositoryCache: def cache_if_remote(repository): if isinstance(repository, RemoteRepository): return RepositoryCache(repository) - return repository \ No newline at end of file + return repository diff --git a/attic/repository.py b/attic/repository.py index eed85dc43..08ff6d6da 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -79,6 +79,13 @@ class Repository(object): with open(os.path.join(path, 'config'), 'w') as fd: config.write(fd) + def destroy(self): + """Destroy the repository at `self.path` + """ + self.close() + os.remove(os.path.join(self.path, 'config')) # kill config first + shutil.rmtree(self.path) + def get_index_transaction_id(self): indicies = sorted((int(name[6:]) for name in os.listdir(self.path) if name.startswith('index.') and name[6:].isdigit())) if indicies: From 954b26f64c903638bd2c195b595ad08a832ef1fa Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 9 Mar 2015 17:01:29 +0100 Subject: [PATCH 042/241] RPCError: include the exception args we get from remote Without this, you just got "RCPError: AttributeError", now you get (e.g.): RPCError: AttributeError(b"'Repository' object has no attribute 'segments'",) --- attic/remote.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/attic/remote.py b/attic/remote.py index f2a0aed06..3c9f1b305 100644 --- a/attic/remote.py +++ b/attic/remote.py @@ -157,7 +157,7 @@ class RemoteRepository(object): raise PathNotAllowed(*res) if error == b'ObjectNotFound': raise Repository.ObjectNotFound(res[0], self.location.orig) - raise self.RPCError(error) + raise self.RPCError("%s%r" % (error.decode('ascii'), res)) else: yield res if not waiting_for and not calls: @@ -312,4 +312,4 @@ class RepositoryCache: def cache_if_remote(repository): if isinstance(repository, RemoteRepository): return RepositoryCache(repository) - return repository \ No newline at end of file + return repository From 3b744d2ee8b80c5a0c818443db23fd36e0ae1162 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 9 Mar 2015 20:45:31 +0100 Subject: [PATCH 043/241] fix Repository._active_txn state when lock upgrade fails --- attic/repository.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/attic/repository.py b/attic/repository.py index eed85dc43..096d494f8 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -138,7 +138,14 @@ class Repository(object): def prepare_txn(self, transaction_id, do_cleanup=True): self._active_txn = True - self.lock.upgrade() + try: + self.lock.upgrade() + except UpgradableLock.WriteLockFailed: + # if upgrading the lock to exclusive fails, we do not have an + # active transaction. this is important for "serve" mode, where + # the repository instance lives on - even if exceptions happened. + self._active_txn = False + raise if not self.index: self.index = self.open_index(transaction_id) if transaction_id is None: From becae426180c0438ac179a92082f5b0bd64d59cd Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 9 Mar 2015 21:59:10 +0100 Subject: [PATCH 044/241] check unpacked data from RPC for tuple type and correct length, fixes #127 --- attic/remote.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/attic/remote.py b/attic/remote.py index f2a0aed06..f21d3cbc2 100644 --- a/attic/remote.py +++ b/attic/remote.py @@ -44,7 +44,10 @@ class RepositoryServer(object): if not data: return unpacker.feed(data) - for type, msgid, method, args in unpacker: + for unpacked in unpacker: + if not (isinstance(unpacked, tuple) and len(unpacked) == 4): + raise Exception("Unexpected RPC data format.") + type, msgid, method, args = unpacked method = method.decode('ascii') try: try: @@ -172,7 +175,10 @@ class RemoteRepository(object): if not data: raise ConnectionClosed() self.unpacker.feed(data) - for type, msgid, error, res in self.unpacker: + for unpacked in self.unpacker: + if not (isinstance(unpacked, tuple) and len(unpacked) == 4): + raise Exception("Unexpected RPC data format.") + type, msgid, error, res = unpacked if msgid in self.ignore_responses: self.ignore_responses.remove(msgid) else: @@ -312,4 +318,4 @@ class RepositoryCache: def cache_if_remote(repository): if isinstance(repository, RemoteRepository): return RepositoryCache(repository) - return repository \ No newline at end of file + return repository From a67d4219c39820c09e8c8debdb2f8d3ce033d5d9 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 9 Mar 2015 23:35:56 +0100 Subject: [PATCH 045/241] avoid defect python-msgpack releases, fixes #171, fixes #185 --- setup.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2c1432b10..9ebb55726 100644 --- a/setup.py +++ b/setup.py @@ -122,5 +122,14 @@ setup( scripts=['scripts/attic'], cmdclass=cmdclass, ext_modules=ext_modules, - install_requires=['msgpack-python'] + # msgpack pure python data corruption in some versions. + # The compiled C-version of python-msgpack was not affected. + # So, IF you had a compiler installed AND you did not force the pure-python version, + # you likely were not affected by the issue. + # python-msgpack <=0.4.2 is OK, 0.4.5 is latest release, but bug was fixed in repo. + # Details see: + # https://github.com/jborg/attic/issues/171 + # https://github.com/jborg/attic/issues/185 + # https://github.com/msgpack/msgpack-python/issues/124 + install_requires=['msgpack-python<=0.4.2,>0.4.5'] ) From 97b5154fc56a37643c4ebb0fcf42019fcba5ccdf Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 10 Mar 2015 01:11:18 +0100 Subject: [PATCH 046/241] check: sort archives in reverse time order --- attic/archive.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/attic/archive.py b/attic/archive.py index d78ce4b85..62a4df73d 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -672,7 +672,9 @@ class ArchiveChecker: repository = cache_if_remote(self.repository) num_archives = len(self.manifest.archives) - for i, (name, info) in enumerate(list(self.manifest.archives.items()), 1): + archive_items = sorted(self.manifest.archives.items(), reverse=True, + key=lambda name_info: name_info[1][b'time']) + for i, (name, info) in enumerate(archive_items, 1): self.report_progress('Analyzing archive {} ({}/{})'.format(name, i, num_archives)) archive_id = info[b'id'] if not archive_id in self.chunks: From 90c50e3171673fab694a0f122acedbcc6996b918 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 11 Mar 2015 03:04:12 +0100 Subject: [PATCH 047/241] implement check --last N Note: of course it can only check for orphaned objects, if it has processed all archives in the repo. Thus this check is skipped as soon as you give --last N option. The numbers shown in progress indicator are (N,T). N is the number of the currently checked archive (starts at T as it first checks latest archive). T is the total number of archives. --- attic/archive.py | 16 ++++++++++------ attic/archiver.py | 5 ++++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/attic/archive.py b/attic/archive.py index 62a4df73d..09b9b52c7 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -529,7 +529,7 @@ class ArchiveChecker: def __del__(self): shutil.rmtree(self.tmpdir) - def check(self, repository, repair=False): + def check(self, repository, repair=False, last=None): self.report_progress('Starting archive consistency check...') self.repair = repair self.repository = repository @@ -539,8 +539,11 @@ class ArchiveChecker: self.manifest = self.rebuild_manifest() else: self.manifest, _ = Manifest.load(repository, key=self.key) - self.rebuild_refcounts() - self.verify_chunks() + self.rebuild_refcounts(last=last) + if last is None: + self.verify_chunks() + else: + self.report_progress('Orphaned objects check skipped (needs all archives checked)') if not self.error_found: self.report_progress('Archive consistency check complete, no problems found.') return self.repair or not self.error_found @@ -595,7 +598,7 @@ class ArchiveChecker: self.report_progress('Manifest rebuild complete', error=True) return manifest - def rebuild_refcounts(self): + def rebuild_refcounts(self, last=None): """Rebuild object reference counts by walking the metadata Missing and/or incorrect data is repaired when detected @@ -674,8 +677,9 @@ class ArchiveChecker: num_archives = len(self.manifest.archives) archive_items = sorted(self.manifest.archives.items(), reverse=True, key=lambda name_info: name_info[1][b'time']) - for i, (name, info) in enumerate(archive_items, 1): - self.report_progress('Analyzing archive {} ({}/{})'.format(name, i, num_archives)) + end = None if last is None else min(num_archives, last) + for i, (name, info) in enumerate(archive_items[:end]): + self.report_progress('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives)) archive_id = info[b'id'] if not archive_id in self.chunks: self.report_progress('Archive metadata block is missing', error=True) diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d4..e7dc2b2e0 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -81,7 +81,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") print('Repository check complete, no problems found.') else: return 1 - if not args.repo_only and not ArchiveChecker().check(repository, repair=args.repair): + if not args.repo_only and not ArchiveChecker().check(repository, repair=args.repair, last=args.last): return 1 return 0 @@ -503,6 +503,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('--repair', dest='repair', action='store_true', default=False, help='attempt to repair any inconsistencies found') + subparser.add_argument('--last', dest='last', + type=int, default=None, metavar='N', + help='only check last N archives (Default: all)') change_passphrase_epilog = textwrap.dedent(""" The key files used for repository encryption are optionally passphrase From 4c7d0762a95ec29522e82039ce3fbdcde4d924a1 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 12 Mar 2015 20:06:53 +0100 Subject: [PATCH 048/241] as msgpack-python 0.4.6 is released now, just use that. --- setup.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index 9ebb55726..0309ad9ee 100644 --- a/setup.py +++ b/setup.py @@ -122,14 +122,7 @@ setup( scripts=['scripts/attic'], cmdclass=cmdclass, ext_modules=ext_modules, - # msgpack pure python data corruption in some versions. - # The compiled C-version of python-msgpack was not affected. - # So, IF you had a compiler installed AND you did not force the pure-python version, - # you likely were not affected by the issue. - # python-msgpack <=0.4.2 is OK, 0.4.5 is latest release, but bug was fixed in repo. - # Details see: - # https://github.com/jborg/attic/issues/171 - # https://github.com/jborg/attic/issues/185 - # https://github.com/msgpack/msgpack-python/issues/124 - install_requires=['msgpack-python<=0.4.2,>0.4.5'] + # msgpack pure python data corruption was fixed in 0.4.6. + # Also, we might use some rather recent API features. + install_requires=['msgpack-python>=0.4.6'] ) From 6116fa72b2d06b7ba5b2ede5fec6fd311f992e7d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 15 Mar 2015 00:20:50 +0100 Subject: [PATCH 049/241] give specific path to xattr.is_enabled(), disable symlink setattr call that always fails --- attic/testsuite/archiver.py | 9 +++++++-- attic/xattr.py | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 382fcc854..543551a51 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -140,9 +140,14 @@ class ArchiverTestCase(ArchiverTestCaseBase): os.path.join(self.input_path, 'hardlink')) # Symlink os.symlink('somewhere', os.path.join(self.input_path, 'link1')) - if xattr.is_enabled(): + if xattr.is_enabled(self.input_path): xattr.setxattr(os.path.join(self.input_path, 'file1'), 'user.foo', b'bar') - xattr.setxattr(os.path.join(self.input_path, 'link1'), 'user.foo_symlink', b'bar_symlink', follow_symlinks=False) + # XXX this always fails for me + # ubuntu 14.04, on a TMP dir filesystem with user_xattr, using fakeroot + # same for newer ubuntu and centos. + # if this is supported just on specific platform, platform should be checked first, + # so that the test setup for all tests using it does not fail here always for others. + #xattr.setxattr(os.path.join(self.input_path, 'link1'), 'user.foo_symlink', b'bar_symlink', follow_symlinks=False) # FIFO node os.mkfifo(os.path.join(self.input_path, 'fifo1')) if has_lchflags: diff --git a/attic/xattr.py b/attic/xattr.py index e00610155..07bc2a6b9 100644 --- a/attic/xattr.py +++ b/attic/xattr.py @@ -8,10 +8,10 @@ from ctypes import CDLL, create_string_buffer, c_ssize_t, c_size_t, c_char_p, c_ from ctypes.util import find_library -def is_enabled(): +def is_enabled(path=None): """Determine if xattr is enabled on the filesystem """ - with tempfile.NamedTemporaryFile() as fd: + with tempfile.NamedTemporaryFile(dir=path) as fd: try: setxattr(fd.fileno(), 'user.name', b'value') except OSError: From fec5572836921d41361f668ae180aae7b5fa0fab Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 17 Mar 2015 23:03:36 +0100 Subject: [PATCH 050/241] no Class(object) in py3 --- attic/cache.py | 2 +- attic/key.py | 2 +- attic/remote.py | 6 +++--- attic/repository.py | 4 ++-- attic/testsuite/helpers.py | 2 +- attic/testsuite/key.py | 6 +++--- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/attic/cache.py b/attic/cache.py index acbc76653..e9f0a81dd 100644 --- a/attic/cache.py +++ b/attic/cache.py @@ -10,7 +10,7 @@ from .helpers import Error, get_cache_dir, decode_dict, st_mtime_ns, unhexlify, from .hashindex import ChunkIndex -class Cache(object): +class Cache: """Client Side cache """ class RepositoryReplay(Error): diff --git a/attic/key.py b/attic/key.py index ef623f36c..e32c0bf71 100644 --- a/attic/key.py +++ b/attic/key.py @@ -44,7 +44,7 @@ def key_factory(repository, manifest_data): raise UnsupportedPayloadError(manifest_data[0]) -class KeyBase(object): +class KeyBase: def __init__(self): self.TYPE_STR = bytes([self.TYPE]) diff --git a/attic/remote.py b/attic/remote.py index f2a0aed06..27305a619 100644 --- a/attic/remote.py +++ b/attic/remote.py @@ -23,7 +23,7 @@ class PathNotAllowed(Error): """Repository path not allowed""" -class RepositoryServer(object): +class RepositoryServer: def __init__(self, restrict_to_paths): self.repository = None @@ -78,7 +78,7 @@ class RepositoryServer(object): return self.repository.id -class RemoteRepository(object): +class RemoteRepository: extra_test_args = [] class RPCError(Exception): @@ -312,4 +312,4 @@ class RepositoryCache: def cache_if_remote(repository): if isinstance(repository, RemoteRepository): return RepositoryCache(repository) - return repository \ No newline at end of file + return repository diff --git a/attic/repository.py b/attic/repository.py index eed85dc43..f87e2731e 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -19,7 +19,7 @@ TAG_DELETE = 1 TAG_COMMIT = 2 -class Repository(object): +class Repository: """Filesystem based transactional key value store On disk layout: @@ -378,7 +378,7 @@ class Repository(object): """ -class LoggedIO(object): +class LoggedIO: header_fmt = struct.Struct(' Date: Tue, 17 Mar 2015 23:09:14 +0100 Subject: [PATCH 051/241] use "with" with open() --- attic/testsuite/archiver.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 382fcc854..fde26cd3a 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -262,10 +262,9 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.attic('extract', '--dry-run', self.repository_location + '::test') self.attic('check', self.repository_location) name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0] - fd = open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+') - fd.seek(100) - fd.write('XXXX') - fd.close() + with open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+') as fd: + fd.seek(100) + fd.write('XXXX') self.attic('check', self.repository_location, exit_code=1) def test_readonly_repository(self): From a21da5d2e015aeedb1e1634fda28fcc7563faac2 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 17 Mar 2015 23:16:12 +0100 Subject: [PATCH 052/241] no bare except --- attic/archive.py | 2 +- attic/fuse.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/attic/archive.py b/attic/archive.py index d78ce4b85..e5d6c3ea6 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -587,7 +587,7 @@ class ArchiveChecker: continue try: archive = msgpack.unpackb(data) - except: + except Exception: continue if isinstance(archive, dict) and b'items' in archive and b'cmdline' in archive: self.report_progress('Found archive ' + archive[b'name'].decode('utf-8'), error=True) diff --git a/attic/fuse.py b/attic/fuse.py index bc102173e..6709990ff 100644 --- a/attic/fuse.py +++ b/attic/fuse.py @@ -228,7 +228,5 @@ class AtticOperations(llfuse.Operations): daemonize() try: llfuse.main(single=True) - except: + finally: llfuse.close() - raise - llfuse.close() From 3a33d57c7d9b69ba93275f59d2fd87a8496350e3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 17 Mar 2015 23:23:56 +0100 Subject: [PATCH 053/241] triple-double-quotes for docstrings --- attic/repository.py | 10 +++++----- attic/testsuite/helpers.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/attic/repository.py b/attic/repository.py index f87e2731e..f479c57b6 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -33,19 +33,19 @@ class Repository: DEFAULT_SEGMENTS_PER_DIR = 10000 class DoesNotExist(Error): - """Repository {} does not exist""" + """Repository {} does not exist.""" class AlreadyExists(Error): - """Repository {} already exists""" + """Repository {} already exists.""" class InvalidRepository(Error): - """{} is not a valid repository""" + """{} is not a valid repository.""" class CheckNeeded(Error): - '''Inconsistency detected. Please run "attic check {}"''' + """Inconsistency detected. Please run "attic check {}".""" class ObjectNotFound(Error): - """Object with key {} not found in repository {}""" + """Object with key {} not found in repository {}.""" def __init__(self, path, create=False, exclusive=False): self.path = path diff --git a/attic/testsuite/helpers.py b/attic/testsuite/helpers.py index 74f6fae80..700a0d542 100644 --- a/attic/testsuite/helpers.py +++ b/attic/testsuite/helpers.py @@ -139,7 +139,7 @@ class PruneSplitTestCase(AtticTestCase): def test(self): def local_to_UTC(month, day): - 'Convert noon on the month and day in 2013 to UTC.' + """Convert noon on the month and day in 2013 to UTC.""" seconds = mktime(strptime('2013-%02d-%02d 12:00' % (month, day), '%Y-%m-%d %H:%M')) return datetime.fromtimestamp(seconds, tz=timezone.utc) From 4ef6125f45bff8c336f03f1cba3ebf88fba99fb8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 17 Mar 2015 23:47:21 +0100 Subject: [PATCH 054/241] cleanup pep8 issues found by pycharm --- attic/_version.py | 23 ++++++++++++++--------- attic/archive.py | 15 +++++++++------ attic/archiver.py | 11 ++++++----- attic/helpers.py | 5 +++-- attic/key.py | 1 + attic/platform.py | 1 + attic/remote.py | 3 ++- attic/repository.py | 3 ++- attic/testsuite/archiver.py | 6 +++--- attic/testsuite/crypto.py | 6 +++--- attic/testsuite/helpers.py | 2 +- attic/testsuite/repository.py | 1 - attic/testsuite/xattr.py | 1 + setup.py | 2 +- 14 files changed, 47 insertions(+), 33 deletions(-) diff --git a/attic/_version.py b/attic/_version.py index adcaf93b5..e75446de4 100644 --- a/attic/_version.py +++ b/attic/_version.py @@ -17,6 +17,7 @@ git_full = "$Format:%H$" import subprocess import sys + def run_command(args, cwd=None, verbose=False): try: # remember shell=False, so use git.cmd on windows, not just git @@ -41,6 +42,7 @@ import sys import re import os.path + def get_expanded_variables(versionfile_source): # the code embedded in _version.py can just fetch the value of these # variables. When used from setup.py, we don't want to import @@ -48,7 +50,7 @@ def get_expanded_variables(versionfile_source): # used from _version.py. variables = {} try: - for line in open(versionfile_source,"r").readlines(): + for line in open(versionfile_source, "r").readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: @@ -61,12 +63,13 @@ def get_expanded_variables(versionfile_source): pass return variables + def versions_from_expanded_variables(variables, tag_prefix, verbose=False): refnames = variables["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("variables are unexpanded, not using") - return {} # unexpanded, so not in an unpacked git-archive tarball + return {} # unexpanded, so not in an unpacked git-archive tarball refs = set([r.strip() for r in refnames.strip("()").split(",")]) for ref in list(refs): if not re.search(r'\d', ref): @@ -87,13 +90,14 @@ def versions_from_expanded_variables(variables, tag_prefix, verbose=False): r = ref[len(tag_prefix):] if verbose: print("picking %s" % r) - return { "version": r, - "full": variables["full"].strip() } + return {"version": r, + "full": variables["full"].strip()} # no suitable tags, so we use the full revision id if verbose: print("no suitable tags, using full revision id") - return { "version": variables["full"].strip(), - "full": variables["full"].strip() } + return {"version": variables["full"].strip(), + "full": variables["full"].strip()} + def versions_from_vcs(tag_prefix, versionfile_source, verbose=False): # this runs 'git' from the root of the source tree. That either means @@ -110,7 +114,7 @@ def versions_from_vcs(tag_prefix, versionfile_source, verbose=False): here = os.path.abspath(__file__) except NameError: # some py2exe/bbfreeze/non-CPython implementations don't do __file__ - return {} # not always correct + return {} # not always correct # versionfile_source is the relative path from the top of the source tree # (where the .git directory might live) to this file. Invert this to find @@ -157,7 +161,7 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False) here = os.path.abspath(__file__) except NameError: # py2exe/bbfreeze/non-CPython don't have __file__ - return {} # without __file__, we have no hope + return {} # without __file__, we have no hope # versionfile_source is the relative path from the top of the source # tree to _version.py. Invert this to find the root from __file__. root = here @@ -183,8 +187,9 @@ tag_prefix = "" parentdir_prefix = "Attic-" versionfile_source = "attic/_version.py" + def get_versions(default={"version": "unknown", "full": ""}, verbose=False): - variables = { "refnames": git_refnames, "full": git_full } + variables = {"refnames": git_refnames, "full": git_full} ver = versions_from_expanded_variables(variables, tag_prefix, verbose) if not ver: ver = versions_from_vcs(tag_prefix, versionfile_source, verbose) diff --git a/attic/archive.py b/attic/archive.py index e5d6c3ea6..be926f680 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -141,7 +141,7 @@ class Archive: i = 0 while True: self.checkpoint_name = '%s.checkpoint%s' % (name, i and ('.%d' % i) or '') - if not self.checkpoint_name in manifest.archives: + if self.checkpoint_name not in manifest.archives: break i += 1 else: @@ -211,6 +211,7 @@ class Archive: count, size, csize = self.cache.chunks[id] stats.update(size, csize, count == 1) self.cache.chunks[id] = count - 1, size, csize + def add_file_chunks(chunks): for id, _, _ in chunks: add(id) @@ -535,7 +536,7 @@ class ArchiveChecker: self.repository = repository self.init_chunks() self.key = self.identify_key(repository) - if not Manifest.MANIFEST_ID in self.chunks: + if Manifest.MANIFEST_ID not in self.chunks: self.manifest = self.rebuild_manifest() else: self.manifest, _ = Manifest.load(repository, key=self.key) @@ -583,7 +584,7 @@ class ArchiveChecker: # Some basic sanity checks of the payload before feeding it into msgpack if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0): continue - if not b'cmdline' in data or not b'\xa7version\x01' in data: + if b'cmdline' not in data or b'\xa7version\x01' not in data: continue try: archive = msgpack.unpackb(data) @@ -632,7 +633,7 @@ class ArchiveChecker: offset = 0 chunk_list = [] for chunk_id, size, csize in item[b'chunks']: - if not chunk_id in self.chunks: + if chunk_id not in self.chunks: # If a file chunk is missing, create an all empty replacement chunk self.report_progress('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size), error=True) data = bytes(size) @@ -653,11 +654,13 @@ class ArchiveChecker: """ unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item) _state = 0 + def missing_chunk_detector(chunk_id): nonlocal _state - if _state % 2 != int(not chunk_id in self.chunks): + if _state % 2 != int(chunk_id not in self.chunks): _state += 1 return _state + for state, items in groupby(archive[b'items'], missing_chunk_detector): items = list(items) if state % 2: @@ -675,7 +678,7 @@ class ArchiveChecker: for i, (name, info) in enumerate(list(self.manifest.archives.items()), 1): self.report_progress('Analyzing archive {} ({}/{})'.format(name, i, num_archives)) archive_id = info[b'id'] - if not archive_id in self.chunks: + if archive_id not in self.chunks: self.report_progress('Archive metadata block is missing', error=True) del self.manifest.archives[name] continue diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d4..24af9f4c1 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -290,9 +290,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") extra = ' link to %s' % item[b'source'] else: extra = '' - print('%s%s %-6s %-6s %8d %s %s%s' % (type, mode, item[b'user'] or item[b'uid'], - item[b'group'] or item[b'gid'], size, mtime, - remove_surrogates(item[b'path']), extra)) + print('%s%s %-6s %-6s %8d %s %s%s' % ( + type, mode, item[b'user'] or item[b'uid'], + item[b'group'] or item[b'gid'], size, mtime, + remove_surrogates(item[b'path']), extra)) else: for archive in sorted(Archive.list_archives(repository, key, manifest), key=attrgetter('ts')): print(format_archive(archive)) @@ -445,8 +446,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") """).lstrip()) common_parser = argparse.ArgumentParser(add_help=False) common_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', - default=False, - help='verbose output') + default=False, + help='verbose output') # We can't use argparse for "serve" since we don't want it to show up in "Available commands" if args: diff --git a/attic/helpers.py b/attic/helpers.py index ac5266980..abdc1fc7d 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -102,7 +102,7 @@ class Manifest: m = msgpack.unpackb(data) if not m.get(b'version') == 1: raise ValueError('Invalid manifest version') - manifest.archives = dict((k.decode('utf-8'), v) for k,v in m[b'archives'].items()) + manifest.archives = dict((k.decode('utf-8'), v) for k, v in m[b'archives'].items()) manifest.timestamp = m.get(b'timestamp') if manifest.timestamp: manifest.timestamp = manifest.timestamp.decode('ascii') @@ -145,7 +145,8 @@ def prune_split(archives, pattern, n, skip=[]): last = period if a not in skip: keep.append(a) - if len(keep) == n: break + if len(keep) == n: + break return keep diff --git a/attic/key.py b/attic/key.py index e32c0bf71..cd7737b90 100644 --- a/attic/key.py +++ b/attic/key.py @@ -17,6 +17,7 @@ class UnsupportedPayloadError(Error): """Unsupported payload type {}. A newer version is required to access this repository. """ + class HMAC(hmac.HMAC): """Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews """ diff --git a/attic/platform.py b/attic/platform.py index 5e0ec917b..37c5c72c4 100644 --- a/attic/platform.py +++ b/attic/platform.py @@ -13,5 +13,6 @@ else: def acl_get(path, item, numeric_owner=False): pass + def acl_set(path, item, numeric_owner=False): pass diff --git a/attic/remote.py b/attic/remote.py index 27305a619..dc31c0800 100644 --- a/attic/remote.py +++ b/attic/remote.py @@ -130,6 +130,7 @@ class RemoteRepository: def call_many(self, cmd, calls, wait=True, is_preloaded=False): if not calls: return + def fetch_from_cache(args): msgid = self.cache[args].pop(0) if not self.cache[args]: @@ -293,7 +294,7 @@ class RepositoryCache: return next(self.get_many([key])) def get_many(self, keys): - unknown_keys = [key for key in keys if not key in self.index] + unknown_keys = [key for key in keys if key not in self.index] repository_iterator = zip(unknown_keys, self.repository.get_many(unknown_keys)) for key in keys: try: diff --git a/attic/repository.py b/attic/repository.py index f479c57b6..670da9b65 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -107,7 +107,7 @@ class Repository: raise self.DoesNotExist(path) self.config = RawConfigParser() self.config.read(os.path.join(self.path, 'config')) - if not 'repository' in self.config.sections() or self.config.getint('repository', 'version') != 1: + if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1: raise self.InvalidRepository(path) self.lock = UpgradableLock(os.path.join(path, 'config'), exclusive) self.max_segment_size = self.config.getint('repository', 'max_segment_size') @@ -241,6 +241,7 @@ class Repository: the index is consistent with the data stored in the segments. """ error_found = False + def report_error(msg): nonlocal error_found error_found = True diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index fde26cd3a..9a3a57dfc 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -200,8 +200,8 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_exclude_caches(self): self.attic('init', self.repository_location) self.create_regular_file('file1', size=1024 * 80) - self.create_regular_file('cache1/CACHEDIR.TAG', contents = b'Signature: 8a477f597d28d172789f06886806bc55 extra stuff') - self.create_regular_file('cache2/CACHEDIR.TAG', contents = b'invalid signature') + self.create_regular_file('cache1/CACHEDIR.TAG', contents=b'Signature: 8a477f597d28d172789f06886806bc55 extra stuff') + self.create_regular_file('cache2/CACHEDIR.TAG', contents=b'invalid signature') self.attic('create', '--exclude-caches', self.repository_location + '::test', 'input') with changedir('output'): self.attic('extract', self.repository_location + '::test') @@ -356,7 +356,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): for key, _ in repository.open_index(repository.get_transaction_id()).iteritems(): data = repository.get(key) hash = sha256(data).digest() - if not hash in seen: + if hash not in seen: seen.add(hash) num_blocks = num_aes_blocks(len(data) - 41) nonce = bytes_to_long(data[33:41]) diff --git a/attic/testsuite/crypto.py b/attic/testsuite/crypto.py index b67d186ca..9e73aec31 100644 --- a/attic/testsuite/crypto.py +++ b/attic/testsuite/crypto.py @@ -14,11 +14,11 @@ class CryptoTestCase(AtticTestCase): def test_pbkdf2_sha256(self): self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 1, 32)), - b'120fb6cffcf8b32c43e7225256c4f837a86548c92ccc35480805987cb70be17b') + b'120fb6cffcf8b32c43e7225256c4f837a86548c92ccc35480805987cb70be17b') self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 2, 32)), - b'ae4d0c95af6b46d32d0adff928f06dd02a303f8ef3c251dfd6e2d85a95474c43') + b'ae4d0c95af6b46d32d0adff928f06dd02a303f8ef3c251dfd6e2d85a95474c43') self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 4096, 32)), - b'c5e478d59288c841aa530db6845c4c8d962893a001ce4e11a4963873aa98134a') + b'c5e478d59288c841aa530db6845c4c8d962893a001ce4e11a4963873aa98134a') def test_get_random_bytes(self): bytes = get_random_bytes(10) diff --git a/attic/testsuite/helpers.py b/attic/testsuite/helpers.py index 700a0d542..d6c6fce87 100644 --- a/attic/testsuite/helpers.py +++ b/attic/testsuite/helpers.py @@ -151,7 +151,7 @@ class PruneSplitTestCase(AtticTestCase): self.assert_equal(set(prune_split(ta, '%Y-%m', n, skip)), subset(test_archives, indices)) - test_pairs = [(1,1), (2,1), (2,28), (3,1), (3,2), (3,31), (5,1)] + test_pairs = [(1, 1), (2, 1), (2, 28), (3, 1), (3, 2), (3, 31), (5, 1)] test_dates = [local_to_UTC(month, day) for month, day in test_pairs] test_archives = [MockArchive(date) for date in test_dates] diff --git a/attic/testsuite/repository.py b/attic/testsuite/repository.py index 91a822803..985c3c9f6 100644 --- a/attic/testsuite/repository.py +++ b/attic/testsuite/repository.py @@ -159,7 +159,6 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): self.assert_raises(UpgradableLock.WriteLockFailed, lambda: len(self.repository)) upgrade.assert_called_once() - def test_crash_before_write_index(self): self.add_keys() self.repository.write_index = None diff --git a/attic/testsuite/xattr.py b/attic/testsuite/xattr.py index 3b0387674..7d6e5939a 100644 --- a/attic/testsuite/xattr.py +++ b/attic/testsuite/xattr.py @@ -4,6 +4,7 @@ import unittest from attic.testsuite import AtticTestCase from attic.xattr import is_enabled, getxattr, setxattr, listxattr + @unittest.skipUnless(is_enabled(), 'xattr not enabled on filesystem') class XattrTestCase(AtticTestCase): diff --git a/setup.py b/setup.py index 2c1432b10..5635d3631 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ import versioneer versioneer.versionfile_source = 'attic/_version.py' versioneer.versionfile_build = 'attic/_version.py' versioneer.tag_prefix = '' -versioneer.parentdir_prefix = 'Attic-' # dirname like 'myproject-1.2.0' +versioneer.parentdir_prefix = 'Attic-' # dirname like 'myproject-1.2.0' platform = os.uname()[0] From e9aacad9a371bd7c2cfafe8007fb411d3d9fe2f3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 17 Mar 2015 23:54:05 +0100 Subject: [PATCH 055/241] remove unused imports --- attic/_version.py | 1 - attic/helpers.py | 1 - attic/repository.py | 1 - 3 files changed, 3 deletions(-) diff --git a/attic/_version.py b/attic/_version.py index e75446de4..296f6d28a 100644 --- a/attic/_version.py +++ b/attic/_version.py @@ -15,7 +15,6 @@ git_full = "$Format:%H$" import subprocess -import sys def run_command(args, cwd=None, verbose=False): diff --git a/attic/helpers.py b/attic/helpers.py index abdc1fc7d..2869c4b0e 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -5,7 +5,6 @@ import msgpack import os import pwd import re -import stat import sys import time from datetime import datetime, timezone, timedelta diff --git a/attic/repository.py b/attic/repository.py index 670da9b65..01afe47e4 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -3,7 +3,6 @@ from binascii import hexlify from itertools import islice import errno import os -import shutil import struct import sys from zlib import crc32 From ee1dc89b52f54b72c72f66372564581e83241421 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 18 Mar 2015 02:33:34 +0100 Subject: [PATCH 056/241] fix misleading hint the fuse ImportError handler gave, fixes #237 --- attic/archiver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d4..8741a3215 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -242,8 +242,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") """Mount archive or an entire repository as a FUSE fileystem""" try: from attic.fuse import AtticOperations - except ImportError: - self.print_error('the "llfuse" module is required to use this feature') + except ImportError as e: + self.print_error('loading fuse support failed [ImportError: %s]' % str(e)) return self.exit_code if not os.path.isdir(args.mountpoint) or not os.access(args.mountpoint, os.R_OK | os.W_OK | os.X_OK): From 1fe844a896acabb14f3a958e480f3fba42f10996 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 18 Mar 2015 17:45:05 +0100 Subject: [PATCH 057/241] dummy xattr implementation for unsupported platforms raising "Unsupported platform" (and making attic completely unusable) just because there is no xattr support isn't necessary. --- attic/xattr.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/attic/xattr.py b/attic/xattr.py index 07bc2a6b9..253383117 100644 --- a/attic/xattr.py +++ b/attic/xattr.py @@ -248,4 +248,14 @@ elif sys.platform.startswith('freebsd'): _check(func(path, EXTATTR_NAMESPACE_USER, name, value, len(value) if value else 0), path) else: - raise Exception('Unsupported platform: %s' % sys.platform) + # this is a dummy xattr interface for platforms for which we do not have + # a real implementation (or which do not support xattr at all). + + def listxattr(path, *, follow_symlinks=True): + return [] + + def getxattr(path, name, *, follow_symlinks=True): + return + + def setxattr(path, name, value, *, follow_symlinks=True): + return From 767e79559784b6d1b922bd05c8984483212cab28 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 18 Mar 2015 18:21:04 +0100 Subject: [PATCH 058/241] dummy acl implementation for unsupported platforms the API_VERSION there was not in sync (and that was even true, as "st" was missing in acl_get()) - fixed. --- attic/platform.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/attic/platform.py b/attic/platform.py index 5e0ec917b..6fe031599 100644 --- a/attic/platform.py +++ b/attic/platform.py @@ -9,9 +9,13 @@ elif platform == 'FreeBSD': elif platform == 'Darwin': from attic.platform_darwin import acl_get, acl_set, API_VERSION else: - API_VERSION = 1 + # this is a dummy acl interface for platforms for which we do not have + # a real implementation (or which do not support acls at all). - def acl_get(path, item, numeric_owner=False): + API_VERSION = 2 + + def acl_get(path, item, st, numeric_owner=False): pass + def acl_set(path, item, numeric_owner=False): pass From 8fba904b16022ff6003bb15966c9f7f6b4a894ef Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 20 Mar 2015 01:40:51 +0100 Subject: [PATCH 059/241] fix noatime mode, fixes #243 added "nonlocal euid" - without this, euid just gets redefined in inner scope instead of assigned to outer scope added check for euid 0 - if we run as root, we always have permissions (not just if we are file owner) note: due to caching and OS behaviour on linux, the bug was a bit tricky to reproduce and also the fix was a bit tricky to test. one needs strictatime mount option to enfore traditional atime updating. for repeated tests, always change file contents (e.g. from /dev/urandom) or attic's caching will prevent that the file gets read ("accessed") again. check atimes with ls -lu i could reproduce code was broken and is fixed with this changeset. and root now doesn't touch any atimes. --- attic/archive.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/attic/archive.py b/attic/archive.py index d78ce4b85..0b62a1058 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -429,7 +429,8 @@ class Archive: return open(p, 'rb') def open_noatime_if_owner(p, s): - if s.st_uid == euid: + if euid == 0 or s.st_uid == euid: + # we are root or owner of file return os.fdopen(os.open(p, flags_noatime), 'rb') else: return open(p, 'rb') @@ -442,6 +443,7 @@ class Archive: fo = open(p, 'rb') # Yes, it was -- otherwise the above line would have thrown # another exception. + nonlocal euid euid = os.geteuid() # So in future, let's check whether the file is owned by us # before attempting to use O_NOATIME. From 2da70c7d7ad5e0c2a6cfb14ad861d91516cee844 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 20 Mar 2015 02:31:39 +0100 Subject: [PATCH 060/241] fix some exception handlers don't catch "Exception" when OSError was meant (otherwise e.errno is not there anyway) don't use bare "except:" if one can avoid (copied code fragment from similar handler) --- attic/archive.py | 4 +++- attic/repository.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/attic/archive.py b/attic/archive.py index d78ce4b85..80c1bb05d 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -587,7 +587,9 @@ class ArchiveChecker: continue try: archive = msgpack.unpackb(data) - except: + # Ignore exceptions that might be raised when feeding + # msgpack with invalid data + except (TypeError, ValueError, StopIteration): continue if isinstance(archive, dict) and b'items' in archive and b'cmdline' in archive: self.report_progress('Found archive ' + archive[b'name'].decode('utf-8'), error=True) diff --git a/attic/repository.py b/attic/repository.py index eed85dc43..dbe071de0 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -446,7 +446,7 @@ class LoggedIO(object): with open(filename, 'rb') as fd: try: fd.seek(-self.header_fmt.size, os.SEEK_END) - except Exception as e: + except OSError as e: # return False if segment file is empty or too small if e.errno == errno.EINVAL: return False From c679f1a6f05f4f14d69836d35af6e570703b5d14 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 21 Mar 2015 02:17:19 +0100 Subject: [PATCH 061/241] catch all Exceptions, transmit traceback back from remote before this changesets, most informations about exceptions/tracebacks on the remote side were lost. now they are transmitted and displayed, together with the remote attic version. --- attic/archiver.py | 3 +++ attic/remote.py | 12 ++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d4..021d59dba 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -728,6 +728,9 @@ def main(): except Error as e: archiver.print_error(e.get_message()) exit_code = e.exit_code + except RemoteRepository.RPCError as e: + print(e) + exit_code = 1 except KeyboardInterrupt: archiver.print_error('Error: Keyboard interrupt') exit_code = 1 diff --git a/attic/remote.py b/attic/remote.py index f2a0aed06..c3baa52d4 100644 --- a/attic/remote.py +++ b/attic/remote.py @@ -7,6 +7,9 @@ import shutil from subprocess import Popen, PIPE import sys import tempfile +import traceback + +from attic import __version__ from .hashindex import NSIndex from .helpers import Error, IntegrityError @@ -52,8 +55,9 @@ class RepositoryServer(object): except AttributeError: f = getattr(self.repository, method) res = f(*args) - except Exception as e: - sys.stdout.buffer.write(msgpack.packb((1, msgid, e.__class__.__name__, e.args))) + except BaseException as e: + exc = "Remote Traceback by Attic %s%s%s" % (__version__, os.linesep, traceback.format_exc()) + sys.stdout.buffer.write(msgpack.packb((1, msgid, e.__class__.__name__, exc))) else: sys.stdout.buffer.write(msgpack.packb((1, msgid, None, res))) sys.stdout.flush() @@ -157,7 +161,7 @@ class RemoteRepository(object): raise PathNotAllowed(*res) if error == b'ObjectNotFound': raise Repository.ObjectNotFound(res[0], self.location.orig) - raise self.RPCError(error) + raise self.RPCError(res.decode('utf-8')) else: yield res if not waiting_for and not calls: @@ -312,4 +316,4 @@ class RepositoryCache: def cache_if_remote(repository): if isinstance(repository, RemoteRepository): return RepositoryCache(repository) - return repository \ No newline at end of file + return repository From 22ae80e1ac6f4ea7d3f715b8553322aa99371f41 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 22 Mar 2015 14:55:02 +0100 Subject: [PATCH 062/241] add condensed description of the changes in merge branch --- CHANGES-merge.txt | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 CHANGES-merge.txt diff --git a/CHANGES-merge.txt b/CHANGES-merge.txt new file mode 100644 index 000000000..81ba70017 --- /dev/null +++ b/CHANGES-merge.txt @@ -0,0 +1,25 @@ +Stuff in "merge" that is not in "master" minus minor changes: + +dummy acl and xattr implementation for unsupported platforms +give specific path to xattr.is_enabled(), disable symlink setattr call that always fails +less memory usage: do not create items_buffer and chunker unless we are creating new archive +fix misleading hint the fuse ImportError handler gave, fixes #237 +source: misc. cleanups, pep8, style +serve: use os.write(stdout_fd, ...), fixes #233 +implement attic check --last N +check: sort archives in reverse time order +avoid defect python-msgpack releases, fixes #171, fixes #185 +check unpacked data from RPC for tuple type and correct length, fixes #127 +Cache: do not try to release the lock twice +less memory usage: add global option --no-cache-files +fix traceback when trying to do unsupported passphrase change, fixes #189 +datetime does not like the year 10.000, fixes issue #139 +docs improvements, fixes, updates +cleanup crypto.pyx, make it easier to adapt to other modes +attic extract: if --stdout is given, write all extracted binary data to stdout +attic create: if "-" is given as path, read binary from stdin +do os.fsync like recommended in the python docs +Extra debug information for 'fread failed' +FUSE: reflect deduplication in allocated blocks +Only allow whitelisted RPC calls in server mode +Normalize source/exclude paths before matching From 6d67379c086efa50dc133c84f38c544f64d0e44a Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 22 Mar 2015 15:52:43 +0100 Subject: [PATCH 063/241] refactor indicator (status) and item processing process_item was used only for dirs and fifo, replaced it by process_dir and process_fifo, so the status can be generated there (as it is done for the other item types). --- attic/archive.py | 17 +++++++++++------ attic/archiver.py | 6 ++---- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/attic/archive.py b/attic/archive.py index ed9561a4b..2ba5a08f0 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -365,28 +365,33 @@ class Archive: acl_get(path, item, st, self.numeric_owner) return item - def process_item(self, path, st): + def process_dir(self, path, st): item = {b'path': make_path_safe(path)} item.update(self.stat_attrs(st, path)) self.add_item(item) + return 'd' # directory + + def process_fifo(self, path, st): + item = {b'path': make_path_safe(path)} + item.update(self.stat_attrs(st, path)) + self.add_item(item) + return 'f' # fifo def process_dev(self, path, st): item = {b'path': make_path_safe(path), b'rdev': st.st_rdev} item.update(self.stat_attrs(st, path)) self.add_item(item) if stat.S_ISCHR(st.st_mode): - status = 'c' # char device + return 'c' # char device elif stat.S_ISBLK(st.st_mode): - status = 'b' # block device - return status + return 'b' # block device def process_symlink(self, path, st): source = os.readlink(path) item = {b'path': make_path_safe(path), b'source': source} item.update(self.stat_attrs(st, path)) self.add_item(item) - status = 's' # symlink - return status + return 's' # symlink def process_file(self, path, st, cache): status = None diff --git a/attic/archiver.py b/attic/archiver.py index 0bcfd89a1..f046a5439 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -166,8 +166,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") elif stat.S_ISDIR(st.st_mode): if exclude_caches and is_cachedir(path): return - archive.process_item(path, st) - status = 'd' # directory + status = archive.process_dir(path, st) try: entries = os.listdir(path) except OSError as e: @@ -179,8 +178,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") elif stat.S_ISLNK(st.st_mode): status = archive.process_symlink(path, st) elif stat.S_ISFIFO(st.st_mode): - archive.process_item(path, st) - status = 'f' # fifo + status = archive.process_fifo(path, st) elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode): status = archive.process_dev(path, st) else: From 231721d13320a483b6e5bda99d411cbc3e9a315b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 24 Mar 2015 04:24:54 +0100 Subject: [PATCH 064/241] implemented create --progress shows original, compressed and deduped size plus path name. output is 79 chars wide, so 80x24 terminal does not wrap/scroll. long path names are shortened (in a rather simplistic way). output happens when a new item is started, but not more often than 5/s (thus, not every pathname is shown) at the end, the output line is cleared but not scrolled, so it basically vanishes. --- attic/archive.py | 7 ++++++- attic/archiver.py | 7 ++++++- attic/helpers.py | 12 ++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/attic/archive.py b/attic/archive.py index d78ce4b85..ec3f2413c 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -120,7 +120,7 @@ class Archive: """Archive {} already exists""" def __init__(self, repository, key, manifest, name, cache=None, create=False, - checkpoint_interval=300, numeric_owner=False): + checkpoint_interval=300, numeric_owner=False, progress=False): self.cwd = os.getcwd() self.key = key self.repository = repository @@ -128,6 +128,8 @@ class Archive: self.manifest = manifest self.hard_links = {} self.stats = Statistics() + self.show_progress = progress + self.last_progress = time.time() self.name = name self.checkpoint_interval = checkpoint_interval self.numeric_owner = numeric_owner @@ -174,6 +176,9 @@ class Archive: yield item def add_item(self, item): + if self.show_progress and time.time() - self.last_progress > 0.2: + self.stats.show_progress(item=item) + self.last_progress = time.time() self.items_buffer.add(item) if time.time() - self.last_checkpoint > self.checkpoint_interval: self.write_checkpoint() diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d4..bc3eb6b30 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -100,7 +100,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") cache = Cache(repository, key, manifest) archive = Archive(repository, key, manifest, args.archive.archive, cache=cache, create=True, checkpoint_interval=args.checkpoint_interval, - numeric_owner=args.numeric_owner) + numeric_owner=args.numeric_owner, progress=args.progress) # Add Attic cache dir to inode_skip list skip_inodes = set() try: @@ -127,6 +127,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") restrict_dev = None self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev) archive.save() + if args.progress: + archive.stats.show_progress(final=True) if args.stats: t = datetime.now() diff = t - t0 @@ -532,6 +534,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('-s', '--stats', dest='stats', action='store_true', default=False, help='print statistics for the created archive') + subparser.add_argument('-p', '--progress', dest='progress', + action='store_true', default=False, + help='print progress while creating the archive') subparser.add_argument('-e', '--exclude', dest='excludes', type=ExcludePattern, action='append', metavar="PATTERN", help='exclude paths matching PATTERN') diff --git a/attic/helpers.py b/attic/helpers.py index ac5266980..c5d95e526 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -167,6 +167,18 @@ class Statistics: print('%-15s %20s %20s %20s' % (label, format_file_size(self.osize), format_file_size(self.csize), format_file_size(self.usize))) print('All archives: %20s %20s %20s' % (format_file_size(total_size), format_file_size(total_csize), format_file_size(unique_csize))) + def show_progress(self, item=None, final=False): + if not final: + path = remove_surrogates(item[b'path']) if item else '' + if len(path) > 43: + path = '%s...%s' % (path[:20], path[-20:]) + msg = '%9s O %9s C %9s D %-43s' % ( + format_file_size(self.osize), format_file_size(self.csize), format_file_size(self.usize), path) + else: + msg = ' ' * 79 + print(msg, end='\r') + sys.stdout.flush() + def get_keys_dir(): """Determine where to repository keys and cache""" From 9f6840dddb21b9c32c70b65662c53a9a47726b26 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 24 Mar 2015 07:11:00 +0100 Subject: [PATCH 065/241] implement attic rename repo::oldname newname I extracted the inner part of Archive.load into a new _load_meta method that does not modify self and does not decode, so I could simply reuse it. --- attic/archive.py | 24 ++++++++++++++++++++---- attic/archiver.py | 26 ++++++++++++++++++++++++++ attic/testsuite/archiver.py | 20 ++++++++++++++++++++ 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/attic/archive.py b/attic/archive.py index d78ce4b85..cc76c47b1 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -150,12 +150,16 @@ class Archive: info = self.manifest.archives[name] self.load(info[b'id']) + def _load_meta(self, id): + data = self.key.decrypt(id, self.repository.get(id)) + metadata = msgpack.unpackb(data) + if metadata[b'version'] != 1: + raise Exception('Unknown archive metadata version') + return metadata + def load(self, id): self.id = id - data = self.key.decrypt(self.id, self.repository.get(self.id)) - self.metadata = msgpack.unpackb(data) - if self.metadata[b'version'] != 1: - raise Exception('Unknown archive metadata version') + self.metadata = self._load_meta(self.id) decode_dict(self.metadata, (b'name', b'hostname', b'username', b'time')) self.metadata[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in self.metadata[b'cmdline']] self.name = self.metadata[b'name'] @@ -335,6 +339,18 @@ class Archive: except OSError: pass + def rename(self, name): + if name in self.manifest.archives: + raise self.AlreadyExists(name) + metadata = StableDict(self._load_meta(self.id)) + metadata[b'name'] = name + data = msgpack.packb(metadata, unicode_errors='surrogateescape') + new_id = self.key.id_hash(data) + self.cache.add_chunk(new_id, data, self.stats) + self.manifest.archives[name] = {'id': new_id, 'time': metadata[b'time']} + self.cache.chunk_decref(self.id, self.stats) + del self.manifest.archives[self.name] + def delete(self, stats): unpacker = msgpack.Unpacker(use_list=False) for items_id, data in zip(self.metadata[b'items'], self.repository.get_many(self.metadata[b'items'])): diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d4..7393e3908 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -223,6 +223,18 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") archive.extract_item(dirs.pop(-1)) return self.exit_code + def do_rename(self, args): + """Rename an existing archive""" + repository = self.open_repository(args.archive, exclusive=True) + manifest, key = Manifest.load(repository) + cache = Cache(repository, key, manifest) + archive = Archive(repository, key, manifest, args.archive.archive, cache=cache) + archive.rename(args.name) + manifest.write() + repository.commit() + cache.commit() + return self.exit_code + def do_delete(self, args): """Delete an existing archive""" repository = self.open_repository(args.archive, exclusive=True) @@ -590,6 +602,20 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('paths', metavar='PATH', nargs='*', type=str, help='paths to extract') + rename_epilog = textwrap.dedent(""" + This command renames an archive in the repository. + """) + subparser = subparsers.add_parser('rename', parents=[common_parser], + description=self.do_rename.__doc__, + epilog=rename_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter) + subparser.set_defaults(func=self.do_rename) + subparser.add_argument('archive', metavar='ARCHIVE', + type=location_validator(archive=True), + help='archive to rename') + subparser.add_argument('name', metavar='NEWNAME', type=str, + help='the new archive name to use') + delete_epilog = textwrap.dedent(""" This command deletes an archive from the repository. Any disk space not shared with any other existing archive is also reclaimed. diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 382fcc854..80140c7bb 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -241,6 +241,26 @@ class ArchiverTestCase(ArchiverTestCaseBase): with changedir('output'): self.attic('extract', self.repository_location + '::test', exit_code=1) + def test_rename(self): + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('dir2/file2', size=1024 * 80) + self.attic('init', self.repository_location) + self.attic('create', self.repository_location + '::test', 'input') + self.attic('create', self.repository_location + '::test.2', 'input') + self.attic('extract', '--dry-run', self.repository_location + '::test') + self.attic('extract', '--dry-run', self.repository_location + '::test.2') + self.attic('rename', self.repository_location + '::test', 'test.3') + self.attic('extract', '--dry-run', self.repository_location + '::test.2') + self.attic('rename', self.repository_location + '::test.2', 'test.4') + self.attic('extract', '--dry-run', self.repository_location + '::test.3') + self.attic('extract', '--dry-run', self.repository_location + '::test.4') + # Make sure both archives have been renamed + repository = Repository(self.repository_path) + manifest, key = Manifest.load(repository) + self.assert_equal(len(manifest.archives), 2) + self.assert_in('test.3', manifest.archives) + self.assert_in('test.4', manifest.archives) + def test_delete(self): self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('dir2/file2', size=1024 * 80) From d43cb4bac89393f39e90d34eaadf7d7ecad1a110 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 24 Mar 2015 22:08:06 +0100 Subject: [PATCH 066/241] refactor _open_rb code a bit, so it is more consistent / regular --- attic/archive.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/attic/archive.py b/attic/archive.py index 0b62a1058..10eaa5c57 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -422,25 +422,31 @@ class Archive: @staticmethod def _open_rb(path, st): - flags_noatime = None + flags_normal = os.O_RDONLY | getattr(os, 'O_BINARY', 0) + flags_noatime = flags_normal | getattr(os, 'NO_ATIME', 0) euid = None def open_simple(p, s): - return open(p, 'rb') + fd = os.open(p, flags_normal) + return os.fdopen(fd, 'rb') + + def open_noatime(p, s): + fd = os.open(p, flags_noatime) + return os.fdopen(fd, 'rb') def open_noatime_if_owner(p, s): if euid == 0 or s.st_uid == euid: # we are root or owner of file - return os.fdopen(os.open(p, flags_noatime), 'rb') + return open_noatime(p, s) else: - return open(p, 'rb') + return open_simple(p, s) - def open_noatime(p, s): + def open_noatime_with_fallback(p, s): try: fd = os.open(p, flags_noatime) except PermissionError: # Was this EPERM due to the O_NOATIME flag? - fo = open(p, 'rb') + fd = os.open(p, flags_normal) # Yes, it was -- otherwise the above line would have thrown # another exception. nonlocal euid @@ -448,14 +454,11 @@ class Archive: # So in future, let's check whether the file is owned by us # before attempting to use O_NOATIME. Archive._open_rb = open_noatime_if_owner - return fo return os.fdopen(fd, 'rb') - o_noatime = getattr(os, 'O_NOATIME', None) - if o_noatime is not None: - flags_noatime = os.O_RDONLY | getattr(os, 'O_BINARY', 0) | o_noatime + if flags_noatime != flags_normal: # Always use O_NOATIME version. - Archive._open_rb = open_noatime + Archive._open_rb = open_noatime_with_fallback else: # Always use non-O_NOATIME version. Archive._open_rb = open_simple From 8b2b91fb0c708801fbd7db4c2e299b3b7b50d1a4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 26 Mar 2015 01:23:25 +0100 Subject: [PATCH 067/241] Fix test setup for 32bit platforms, partial fix for #196 --- attic/testsuite/archiver.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 382fcc854..3cc1bbb40 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -120,8 +120,12 @@ class ArchiverTestCase(ArchiverTestCaseBase): """ # File self.create_regular_file('empty', size=0) + # next code line raises OverflowError on 32bit cpu (raspberry pi 2): # 2600-01-01 > 2**64 ns - os.utime('input/empty', (19880895600, 19880895600)) + #os.utime('input/empty', (19880895600, 19880895600)) + # thus, we better test with something not that far in future: + # 2038-01-19 (1970 + 2^31 - 1 seconds) is the 32bit "deadline": + os.utime('input/empty', (2**31 - 1, 2**31 - 1)) self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('flagfile', size=1024) # Directory From 14d91a25fcfc68a16cfb29e8eb0ab203d4d236c0 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 1 Apr 2015 23:12:06 +0200 Subject: [PATCH 068/241] show tracebacks in top-level exception handler for easier debugging sure it is "prettier" without, but a lot of useful information for debugging is lost if the traceback is not shown. even for KeyboardInterrupt: it may have some bad reason when one has to use Ctrl-C - if attic was stuck somewhere, we want to know where it was. --- attic/archiver.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/attic/archiver.py b/attic/archiver.py index 47650c2d4..d3c8bb511 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -8,6 +8,7 @@ import os import stat import sys import textwrap +import traceback from attic import __version__ from attic.archive import Archive, ArchiveChecker @@ -726,9 +727,11 @@ def main(): try: exit_code = archiver.run(sys.argv[1:]) except Error as e: + traceback.print_exc() archiver.print_error(e.get_message()) exit_code = e.exit_code except KeyboardInterrupt: + traceback.print_exc() archiver.print_error('Error: Keyboard interrupt') exit_code = 1 else: From 89e9528d0b2f94d2a7ac81c47d8d97d2c9815af8 Mon Sep 17 00:00:00 2001 From: Lauri Alanko Date: Fri, 3 Apr 2015 00:19:33 +0300 Subject: [PATCH 069/241] Escape ".." in path regexp properly. --- attic/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attic/helpers.py b/attic/helpers.py index ac5266980..0a9b9b44e 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -498,7 +498,7 @@ def remove_surrogates(s, errors='replace'): return s.encode('utf-8', errors).decode('utf-8') -_safe_re = re.compile('^((..)?/+)+') +_safe_re = re.compile('^((\.\.)?/+)+') def make_path_safe(path): From 7ad109395112a6a285b50e4588ea1c36c9f5741a Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 8 Apr 2015 18:43:53 +0200 Subject: [PATCH 070/241] let chunker optionally work with os-level file descriptor this safes some back-and-forth between C and Python code and also some memory management overhead as we can always reuse the same read_buf instead of letting Python allocate and free a up to 10MB big buffer for each buffer filling read. we can't use os-level file descriptors all the time though, as chunkify gets also invoked on objects like BytesIO that are not backed by a os-level file. Note: this changeset is also a preparation for O_DIRECT support which can be implemented a lot easier on C level. --- attic/_chunker.c | 50 +++++++++++++++++++++++++++++++++++------------ attic/archive.py | 13 ++++++------ attic/chunker.pyx | 15 ++++++++++---- 3 files changed, 54 insertions(+), 24 deletions(-) diff --git a/attic/_chunker.c b/attic/_chunker.c index 94d4e47ae..b43bd759b 100644 --- a/attic/_chunker.c +++ b/attic/_chunker.c @@ -78,8 +78,9 @@ typedef struct { int window_size, chunk_mask, min_size; size_t buf_size; uint32_t *table; - uint8_t *data; + uint8_t *data, *read_buf; PyObject *fd; + int fh; int done, eof; size_t remaining, bytes_read, bytes_yielded, position, last; } Chunker; @@ -94,15 +95,17 @@ chunker_init(int window_size, int chunk_mask, int min_size, uint32_t seed) c->table = buzhash_init_table(seed); c->buf_size = 10 * 1024 * 1024; c->data = malloc(c->buf_size); + c->read_buf = malloc(c->buf_size); return c; } static void -chunker_set_fd(Chunker *c, PyObject *fd) +chunker_set_fd(Chunker *c, PyObject *fd, int fh) { Py_XDECREF(c->fd); c->fd = fd; Py_INCREF(fd); + c->fh = fh; c->done = 0; c->remaining = 0; c->bytes_read = 0; @@ -118,6 +121,7 @@ chunker_free(Chunker *c) Py_XDECREF(c->fd); free(c->table); free(c->data); + free(c->read_buf); free(c); } @@ -133,20 +137,40 @@ chunker_fill(Chunker *c) if(c->eof || n == 0) { return 1; } - data = PyObject_CallMethod(c->fd, "read", "i", n); - if(!data) { - return 0; - } - n = PyBytes_Size(data); - if(n) { - memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), n); - c->remaining += n; - c->bytes_read += n; + if(c->fh >= 0) { + // if we have a os-level file descriptor, use os-level API + n = read(c->fh, c->read_buf, n); + if(n > 0) { + memcpy(c->data + c->position + c->remaining, c->read_buf, n); + c->remaining += n; + c->bytes_read += n; + } + else + if(n == 0) { + c->eof = 1; + } + else { + // some error happened + return 0; + } } else { - c->eof = 1; + // no os-level file descriptor, use Python file object API + data = PyObject_CallMethod(c->fd, "read", "i", n); + if(!data) { + return 0; + } + n = PyBytes_Size(data); + if(n) { + memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), n); + c->remaining += n; + c->bytes_read += n; + } + else { + c->eof = 1; + } + Py_DECREF(data); } - Py_DECREF(data); return 1; } diff --git a/attic/archive.py b/attic/archive.py index 10eaa5c57..09ccb4fb2 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -405,9 +405,10 @@ class Archive: chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids] # Only chunkify the file if needed if chunks is None: - with Archive._open_rb(path, st) as fd: + fh = Archive._open_rb(path, st) + with os.fdopen(fh, 'rb') as fd: chunks = [] - for chunk in self.chunker.chunkify(fd): + for chunk in self.chunker.chunkify(fd, fh): chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats)) cache.memorize_file(path_hash, st, [c[0] for c in chunks]) item = {b'path': safe_path, b'chunks': chunks} @@ -427,12 +428,10 @@ class Archive: euid = None def open_simple(p, s): - fd = os.open(p, flags_normal) - return os.fdopen(fd, 'rb') + return os.open(p, flags_normal) def open_noatime(p, s): - fd = os.open(p, flags_noatime) - return os.fdopen(fd, 'rb') + return os.open(p, flags_noatime) def open_noatime_if_owner(p, s): if euid == 0 or s.st_uid == euid: @@ -454,7 +453,7 @@ class Archive: # So in future, let's check whether the file is owned by us # before attempting to use O_NOATIME. Archive._open_rb = open_noatime_if_owner - return os.fdopen(fd, 'rb') + return fd if flags_noatime != flags_normal: # Always use O_NOATIME version. diff --git a/attic/chunker.pyx b/attic/chunker.pyx index 44ec31fc7..3ca4b1a7e 100644 --- a/attic/chunker.pyx +++ b/attic/chunker.pyx @@ -9,7 +9,7 @@ cdef extern from "_chunker.c": ctypedef struct _Chunker "Chunker": pass _Chunker *chunker_init(int window_size, int chunk_mask, int min_size, uint32_t seed) - void chunker_set_fd(_Chunker *chunker, object fd) + void chunker_set_fd(_Chunker *chunker, object f, int fd) void chunker_free(_Chunker *chunker) object chunker_process(_Chunker *chunker) uint32_t *buzhash_init_table(uint32_t seed) @@ -23,8 +23,15 @@ cdef class Chunker: def __cinit__(self, window_size, chunk_mask, min_size, seed): self.chunker = chunker_init(window_size, chunk_mask, min_size, seed & 0xffffffff) - def chunkify(self, fd): - chunker_set_fd(self.chunker, fd) + def chunkify(self, fd, fh=-1): + """ + Cut a file into chunks. + + :param fd: Python file object + :param fh: OS-level file handle (if available), + defaults to -1 which means not to use OS-level fd. + """ + chunker_set_fd(self.chunker, fd, fh) return self def __dealloc__(self): @@ -52,4 +59,4 @@ def buzhash_update(uint32_t sum, unsigned char remove, unsigned char add, size_t table = buzhash_init_table(seed & 0xffffffff) sum = c_buzhash_update(sum, remove, add, len, table) free(table) - return sum \ No newline at end of file + return sum From c7d232c4cec6cf14424e8761135e627518b75ac5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 11 Apr 2015 01:09:03 +0200 Subject: [PATCH 071/241] use posix_fadvise to avoid spoiling the OS cache note: - we call this frequently AFTER re-filling the chunker buffer, so even big input files have little cache impact. - there is still some cache impact due to output files caching, if the repository is on a locally mounted filesystem. --- attic/_chunker.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/attic/_chunker.c b/attic/_chunker.c index b43bd759b..167f6f2c3 100644 --- a/attic/_chunker.c +++ b/attic/_chunker.c @@ -1,4 +1,5 @@ #include +#include /* Cyclic polynomial / buzhash: https://en.wikipedia.org/wiki/Rolling_hash */ @@ -153,6 +154,14 @@ chunker_fill(Chunker *c) // some error happened return 0; } + #if ( _XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L ) + // We tell the OS that we do not need the data of this file any more + // that it maybe has in the cache. This avoids that we spoil the + // complete cache with data that we only read once and (due to cache + // size limit) kick out data from the cache that might be still useful + // for the OS or other processes. + posix_fadvise(c->fh, (off_t) 0, (off_t) 0, POSIX_FADV_DONTNEED); + #endif } else { // no os-level file descriptor, use Python file object API From 874f5c491b504a83dd9da94e8ca5a8c5c30b0298 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 11 Apr 2015 02:40:22 +0200 Subject: [PATCH 072/241] use posix_fadvise for repo writes also --- attic/repository.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/attic/repository.py b/attic/repository.py index eed85dc43..592e5fe5d 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -555,6 +555,10 @@ class LoggedIO(object): header = self.header_no_crc_fmt.pack(size, TAG_PUT) crc = self.crc_fmt.pack(crc32(data, crc32(id, crc32(header))) & 0xffffffff) fd.write(b''.join((crc, header, id, data))) + if hasattr(os, 'posix_fadvise'): # python >= 3.3, only on UNIX + # tell the OS that it does not need to cache what we just wrote, + # avoids spoiling the cache for the OS and other processes. + os.posix_fadvise(fd.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) self.offset += size return self.segment, offset From 57071ce6fdfdd67937de7c936397d68a255a8803 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 11 Apr 2015 17:04:10 +0200 Subject: [PATCH 073/241] repo writes: call posix_fadvise between fsync and close less calls to posix_fadvise (which seem to force a write-cache sync-to-disk and wait for that to complete) - if we call it after we synced anyway, we don't lose time. also: fixed a bug in the os.fsync call, it needs the fileno. --- attic/repository.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/attic/repository.py b/attic/repository.py index 592e5fe5d..f534eb5ab 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -555,10 +555,6 @@ class LoggedIO(object): header = self.header_no_crc_fmt.pack(size, TAG_PUT) crc = self.crc_fmt.pack(crc32(data, crc32(id, crc32(header))) & 0xffffffff) fd.write(b''.join((crc, header, id, data))) - if hasattr(os, 'posix_fadvise'): # python >= 3.3, only on UNIX - # tell the OS that it does not need to cache what we just wrote, - # avoids spoiling the cache for the OS and other processes. - os.posix_fadvise(fd.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) self.offset += size return self.segment, offset @@ -581,6 +577,10 @@ class LoggedIO(object): if self._write_fd: self.segment += 1 self.offset = 0 - os.fsync(self._write_fd) + os.fsync(self._write_fd.fileno()) + if hasattr(os, 'posix_fadvise'): # python >= 3.3, only on UNIX + # tell the OS that it does not need to cache what we just wrote, + # avoids spoiling the cache for the OS and other processes. + os.posix_fadvise(self._write_fd.fileno(), 0, 0, os.POSIX_FADV_DONTNEED) self._write_fd.close() self._write_fd = None From 0295ef85634db53c61a45f1603a587b8f88ec567 Mon Sep 17 00:00:00 2001 From: Cam Hutchison Date: Wed, 8 Apr 2015 21:09:58 +1000 Subject: [PATCH 074/241] archive: Add testcases for microsecond handling. datetime.isoformat() has different output depending on whether microseconds are zero or not. Add test cases to ensure we handle both cases correctly in an archive. --- attic/testsuite/archive.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/attic/testsuite/archive.py b/attic/testsuite/archive.py index 8d478f5f2..a0af6c534 100644 --- a/attic/testsuite/archive.py +++ b/attic/testsuite/archive.py @@ -1,7 +1,10 @@ import msgpack from attic.testsuite import AtticTestCase -from attic.archive import CacheChunkBuffer, RobustUnpacker +from attic.testsuite.mock import Mock +from attic.archive import Archive, CacheChunkBuffer, RobustUnpacker from attic.key import PlaintextKey +from attic.helpers import Manifest +from datetime import datetime, timezone class MockCache: @@ -14,6 +17,27 @@ class MockCache: return id, len(data), len(data) +class ArchiveTimestampTestCase(AtticTestCase): + + def _test_timestamp_parsing(self, isoformat, expected): + repository = Mock() + key = PlaintextKey() + manifest = Manifest(repository, key) + a = Archive(repository, key, manifest, 'test', create=True) + a.metadata = {b'time': isoformat} + self.assert_equal(a.ts, expected) + + def test_with_microseconds(self): + self._test_timestamp_parsing( + '1970-01-01T00:00:01.000001', + datetime(1970, 1, 1, 0, 0, 1, 1, timezone.utc)) + + def test_without_microseconds(self): + self._test_timestamp_parsing( + '1970-01-01T00:00:01', + datetime(1970, 1, 1, 0, 0, 1, 0, timezone.utc)) + + class ChunkBufferTestCase(AtticTestCase): def test(self): From 9f99aa1abfa265b562f2874b92ca6d2269bed12b Mon Sep 17 00:00:00 2001 From: Cam Hutchison Date: Mon, 6 Apr 2015 22:17:38 +1000 Subject: [PATCH 075/241] archive: Fix parsing with missing microseconds. Archive timestamps are stored as the output of datetime.isoformat(). This function omits microseconds in the string output if the microseconds are zero (as documented and explained at https://bugs.python.org/issue7342). Parsing of timestamps assumes there are always microseconds present after a decimal point. This is not always true. Handle this case where it is not true by explicitly using '0' microseconds when not present. This commit fixes #282 --- attic/archive.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/attic/archive.py b/attic/archive.py index d78ce4b85..df789ac24 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -163,8 +163,11 @@ class Archive: @property def ts(self): """Timestamp of archive creation in UTC""" - t, f = self.metadata[b'time'].split('.', 1) - return datetime.strptime(t, '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc) + timedelta(seconds=float('.' + f)) + t = self.metadata[b'time'].split('.', 1) + dt = datetime.strptime(t[0], '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc) + if len(t) > 1: + dt += timedelta(seconds=float('.' + t[1])) + return dt def __repr__(self): return 'Archive(%r)' % self.name From a2bf2aea22c08301e55eafe06474f2c4b7712608 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 15 Apr 2015 16:29:18 +0200 Subject: [PATCH 076/241] simple sparse file support, made chunk buffer size flexible Implemented sparse file support to remove this blocker for people backing up lots of huge sparse files (like VM images). Attic could not support this use case yet as it would have restored all files to their fully expanded size, possibly running out of disk space if the total expanded size would be bigger than the available space. Please note that this is a very simple implementation of sparse file support - at backup time, it does not do anything special (it just reads all these zero bytes, chunks, compresses and encrypts them as usual). At restore time, it detects chunks that are completely filled with zeros and does a seek on the output file rather than a normal data write, so it creates a hole in a sparse file. The chunk size for these all-zero chunks is currently 10MiB, so it'll create holes of multiples of that size (depends also a bit on fs block size, alignment, previously written data). Special cases like sparse files starting and/or ending with a hole are supported. Please note that it will currently always create sparse files at restore time if it detects all-zero chunks. Also improved: I needed a constant for the max. chunk size, so I introduced CHUNK_MAX (see also existing CHUNK_MIN) for the maximum chunk size (which is the same as the chunk buffer size). Attic still always uses 10MiB chunk buffer size now, but it could be changed now more easily. --- attic/_chunker.c | 4 ++-- attic/archive.py | 15 ++++++++++++--- attic/chunker.pyx | 8 ++++---- attic/testsuite/archiver.py | 34 +++++++++++++++++++++++++++++++++- attic/testsuite/chunker.py | 25 +++++++++++++------------ 5 files changed, 64 insertions(+), 22 deletions(-) diff --git a/attic/_chunker.c b/attic/_chunker.c index 94d4e47ae..f384a56b6 100644 --- a/attic/_chunker.c +++ b/attic/_chunker.c @@ -85,14 +85,14 @@ typedef struct { } Chunker; static Chunker * -chunker_init(int window_size, int chunk_mask, int min_size, uint32_t seed) +chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32_t seed) { Chunker *c = calloc(sizeof(Chunker), 1); c->window_size = window_size; c->chunk_mask = chunk_mask; c->min_size = min_size; c->table = buzhash_init_table(seed); - c->buf_size = 10 * 1024 * 1024; + c->buf_size = max_size; c->data = malloc(c->buf_size); return c; } diff --git a/attic/archive.py b/attic/archive.py index d78a7fdb3..b637d7f1e 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -22,9 +22,12 @@ from attic.helpers import Error, uid2user, user2uid, gid2group, group2gid, \ ITEMS_BUFFER = 1024 * 1024 CHUNK_MIN = 1024 +CHUNK_MAX = 10 * 1024 * 1024 WINDOW_SIZE = 0xfff CHUNK_MASK = 0xffff +ZEROS = b'\0' * CHUNK_MAX + utime_supports_fd = os.utime in getattr(os, 'supports_fd', {}) utime_supports_follow_symlinks = os.utime in getattr(os, 'supports_follow_symlinks', {}) has_mtime_ns = sys.version >= '3.3' @@ -71,7 +74,7 @@ class ChunkBuffer: self.packer = msgpack.Packer(unicode_errors='surrogateescape') self.chunks = [] self.key = key - self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, self.key.chunk_seed) + self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX,self.key.chunk_seed) def add(self, item): self.buffer.write(self.packer.pack(StableDict(item))) @@ -134,7 +137,7 @@ class Archive: self.pipeline = DownloadPipeline(self.repository, self.key) if create: self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats) - self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, self.key.chunk_seed) + self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX, self.key.chunk_seed) if name in manifest.archives: raise self.AlreadyExists(name) self.last_checkpoint = time.time() @@ -269,7 +272,13 @@ class Archive: with open(path, 'wb') as fd: ids = [c[0] for c in item[b'chunks']] for data in self.pipeline.fetch_many(ids, is_preloaded=True): - fd.write(data) + if ZEROS.startswith(data): + # all-zero chunk: create a hole in a sparse file + fd.seek(len(data), 1) + else: + fd.write(data) + pos = fd.tell() + fd.truncate(pos) fd.flush() self.restore_attrs(path, item, fd=fd.fileno()) elif stat.S_ISFIFO(mode): diff --git a/attic/chunker.pyx b/attic/chunker.pyx index 44ec31fc7..10a6adae3 100644 --- a/attic/chunker.pyx +++ b/attic/chunker.pyx @@ -8,7 +8,7 @@ cdef extern from "_chunker.c": ctypedef int uint32_t ctypedef struct _Chunker "Chunker": pass - _Chunker *chunker_init(int window_size, int chunk_mask, int min_size, uint32_t seed) + _Chunker *chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32_t seed) void chunker_set_fd(_Chunker *chunker, object fd) void chunker_free(_Chunker *chunker) object chunker_process(_Chunker *chunker) @@ -20,8 +20,8 @@ cdef extern from "_chunker.c": cdef class Chunker: cdef _Chunker *chunker - def __cinit__(self, window_size, chunk_mask, min_size, seed): - self.chunker = chunker_init(window_size, chunk_mask, min_size, seed & 0xffffffff) + def __cinit__(self, window_size, chunk_mask, min_size, max_size, seed): + self.chunker = chunker_init(window_size, chunk_mask, min_size, max_size, seed & 0xffffffff) def chunkify(self, fd): chunker_set_fd(self.chunker, fd) @@ -52,4 +52,4 @@ def buzhash_update(uint32_t sum, unsigned char remove, unsigned char add, size_t table = buzhash_init_table(seed & 0xffffffff) sum = c_buzhash_update(sum, remove, add, len, table) free(table) - return sum \ No newline at end of file + return sum diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index c115b460f..b9743fd58 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -11,7 +11,7 @@ import time import unittest from hashlib import sha256 from attic import xattr -from attic.archive import Archive, ChunkBuffer +from attic.archive import Archive, ChunkBuffer, CHUNK_MAX from attic.archiver import Archiver from attic.cache import Cache from attic.crypto import bytes_to_long, num_aes_blocks @@ -197,6 +197,38 @@ class ArchiverTestCase(ArchiverTestCaseBase): config.write(fd) return Repository(self.repository_path).id + def test_sparse_file(self): + filename = os.path.join(self.input_path, 'sparse') + content = b'foobar' + hole_size = 5 * CHUNK_MAX # 5 full chunker buffers + with open(filename, 'wb') as fd: + # create a file that has a hole at the beginning and end + fd.seek(hole_size, 1) + fd.write(content) + fd.seek(hole_size, 1) + pos = fd.tell() + fd.truncate(pos) + total_len = hole_size + len(content) + hole_size + st = os.stat(filename) + self.assert_equal(st.st_size, total_len) + if hasattr(st, 'st_blocks'): + self.assert_true(st.st_blocks * 512 < total_len / 10) # is input sparse? + self.attic('init', self.repository_location) + self.attic('create', self.repository_location + '::test', 'input') + with changedir('output'): + self.attic('extract', self.repository_location + '::test') + self.assert_dirs_equal('input', 'output/input') + filename = os.path.join(self.output_path, 'input', 'sparse') + with open(filename, 'rb') as fd: + # check if file contents are as expected + self.assert_equal(fd.read(hole_size), b'\0' * hole_size) + self.assert_equal(fd.read(len(content)), content) + self.assert_equal(fd.read(hole_size), b'\0' * hole_size) + st = os.stat(filename) + self.assert_equal(st.st_size, total_len) + if hasattr(st, 'st_blocks'): + self.assert_true(st.st_blocks * 512 < total_len / 10) # is output sparse? + def test_repository_swap_detection(self): self.create_test_files() os.environ['ATTIC_PASSPHRASE'] = 'passphrase' diff --git a/attic/testsuite/chunker.py b/attic/testsuite/chunker.py index 2e666265a..90c4a8c50 100644 --- a/attic/testsuite/chunker.py +++ b/attic/testsuite/chunker.py @@ -1,25 +1,26 @@ from attic.chunker import Chunker, buzhash, buzhash_update from attic.testsuite import AtticTestCase +from attic.archive import CHUNK_MAX from io import BytesIO class ChunkerTestCase(AtticTestCase): def test_chunkify(self): - data = b'0' * 1024 * 1024 * 15 + b'Y' - parts = [bytes(c) for c in Chunker(2, 0x3, 2, 0).chunkify(BytesIO(data))] + data = b'0' * int(1.5 * CHUNK_MAX) + b'Y' + parts = [bytes(c) for c in Chunker(2, 0x3, 2, CHUNK_MAX, 0).chunkify(BytesIO(data))] self.assert_equal(len(parts), 2) self.assert_equal(b''.join(parts), data) - self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, 0).chunkify(BytesIO(b''))], []) - self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz']) - self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz']) - self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz']) - self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3]) - self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boo', b'bazfo', b'obar', b'boo', b'bazfo', b'obar', b'boobaz']) - self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foo', b'barboobaz', b'foo', b'barboobaz', b'foo', b'barboobaz']) - self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3]) - self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz']) - self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz']) + self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, CHUNK_MAX, 0).chunkify(BytesIO(b''))], []) + self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, CHUNK_MAX, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz']) + self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, CHUNK_MAX, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz']) + self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, CHUNK_MAX, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz']) + self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, CHUNK_MAX, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3]) + self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, CHUNK_MAX, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boo', b'bazfo', b'obar', b'boo', b'bazfo', b'obar', b'boobaz']) + self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, CHUNK_MAX, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foo', b'barboobaz', b'foo', b'barboobaz', b'foo', b'barboobaz']) + self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, CHUNK_MAX, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3]) + self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, CHUNK_MAX, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz']) + self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, CHUNK_MAX, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz']) def test_buzhash(self): self.assert_equal(buzhash(b'abcdefghijklmnop', 0), 3795437769) From ee80db4eb28d881879aa87c76c944e17b455401c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 17 Apr 2015 22:28:40 +0200 Subject: [PATCH 077/241] add commandline argument --sparse default is to not create sparse files. if you give --sparse, it will create a hole in the sparse output file when a all-zero chunk is extracted. --- attic/archive.py | 4 ++-- attic/archiver.py | 6 +++++- attic/testsuite/archiver.py | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/attic/archive.py b/attic/archive.py index b637d7f1e..781986309 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -233,7 +233,7 @@ class Archive: cache.rollback() return stats - def extract_item(self, item, restore_attrs=True, dry_run=False): + def extract_item(self, item, restore_attrs=True, dry_run=False, sparse=False): if dry_run: if b'chunks' in item: for _ in self.pipeline.fetch_many([c[0] for c in item[b'chunks']], is_preloaded=True): @@ -272,7 +272,7 @@ class Archive: with open(path, 'wb') as fd: ids = [c[0] for c in item[b'chunks']] for data in self.pipeline.fetch_many(ids, is_preloaded=True): - if ZEROS.startswith(data): + if sparse and ZEROS.startswith(data): # all-zero chunk: create a hole in a sparse file fd.seek(len(data), 1) else: diff --git a/attic/archiver.py b/attic/archiver.py index 7295ac06e..259552a4b 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -195,6 +195,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") numeric_owner=args.numeric_owner) patterns = adjust_patterns(args.paths, args.excludes) dry_run = args.dry_run + sparse = args.sparse strip_components = args.strip_components dirs = [] for item in archive.iter_items(lambda item: not exclude_path(item[b'path'], patterns), preload=True): @@ -215,7 +216,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") dirs.append(item) archive.extract_item(item, restore_attrs=False) else: - archive.extract_item(item) + archive.extract_item(item, sparse=sparse) except IOError as e: self.print_error('%s: %s', remove_surrogates(orig_path), e) @@ -585,6 +586,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('--strip-components', dest='strip_components', type=int, default=0, metavar='NUMBER', help='Remove the specified number of leading path elements. Pathnames with fewer elements will be silently skipped.') + subparser.add_argument('--sparse', dest='sparse', + action='store_true', default=False, + help='create holes in output sparse file from all-zero chunks') subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to extract') diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index b9743fd58..3821a03db 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -216,7 +216,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.attic('init', self.repository_location) self.attic('create', self.repository_location + '::test', 'input') with changedir('output'): - self.attic('extract', self.repository_location + '::test') + self.attic('extract', '--sparse', self.repository_location + '::test') self.assert_dirs_equal('input', 'output/input') filename = os.path.join(self.output_path, 'input', 'sparse') with open(filename, 'rb') as fd: From 0ffee1f1ee1586d6beb0771885f61f1a3d4e2aa3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 18 Apr 2015 21:36:10 +0200 Subject: [PATCH 078/241] implement --timestamp, iso8601-like utc timestamp string or reference file/dir note: this needs bug #282 to be fixed first, because it will create timestamps with microseconds==0. --- attic/archive.py | 6 ++++-- attic/archiver.py | 9 +++++++-- attic/helpers.py | 20 ++++++++++++++++++++ 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/attic/archive.py b/attic/archive.py index d78a7fdb3..b4f9121b2 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -184,11 +184,13 @@ class Archive: del self.manifest.archives[self.checkpoint_name] self.cache.chunk_decref(self.id, self.stats) - def save(self, name=None): + def save(self, name=None, timestamp=None): name = name or self.name if name in self.manifest.archives: raise self.AlreadyExists(name) self.items_buffer.flush(flush=True) + if timestamp is None: + timestamp = datetime.utcnow() metadata = StableDict({ 'version': 1, 'name': name, @@ -196,7 +198,7 @@ class Archive: 'cmdline': sys.argv, 'hostname': socket.gethostname(), 'username': getuser(), - 'time': datetime.utcnow().isoformat(), + 'time': timestamp.isoformat(), }) data = msgpack.packb(metadata, unicode_errors='surrogateescape') self.id = self.key.id_hash(data) diff --git a/attic/archiver.py b/attic/archiver.py index 7295ac06e..febffb988 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -15,7 +15,7 @@ from attic.repository import Repository from attic.cache import Cache from attic.key import key_creator from attic.helpers import Error, location_validator, format_time, \ - format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \ + format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ is_cachedir, bigint_to_int @@ -127,7 +127,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") else: restrict_dev = None self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev) - archive.save() + archive.save(timestamp=args.timestamp) if args.stats: t = datetime.now() diff = t - t0 @@ -551,6 +551,11 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('--numeric-owner', dest='numeric_owner', action='store_true', default=False, help='only store numeric user and group identifiers') + subparser.add_argument('--timestamp', dest='timestamp', + type=timestamp, default=None, + metavar='yyyy-mm-ddThh:mm:ss', + help='manually specify the archive creation date/time (UTC). ' + 'alternatively, give a reference file/directory.') subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to create') diff --git a/attic/helpers.py b/attic/helpers.py index 646ba2571..c223dd049 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -257,6 +257,26 @@ class ExcludePattern(IncludePattern): return '%s(%s)' % (type(self), self.pattern) +def timestamp(s): + """Convert a --timestamp=s argument to a datetime object""" + try: + # is it pointing to a file / directory? + ts = os.stat(s).st_mtime + return datetime.utcfromtimestamp(ts) + except OSError: + # didn't work, try parsing as timestamp. UTC, no TZ, no microsecs support. + for format in ('%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S+00:00', + '%Y-%m-%dT%H:%M:%S', '%Y-%m-%d %H:%M:%S', + '%Y-%m-%dT%H:%M', '%Y-%m-%d %H:%M', + '%Y-%m-%d', '%Y-%j', + ): + try: + return datetime.strptime(s, format) + except ValueError: + continue + raise ValueError + + def is_cachedir(path): """Determines whether the specified path is a cache directory (and therefore should potentially be excluded from the backup) according to From 212777d716a96a6a7ac76f751e1bcc86ee8511df Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 19 Apr 2015 18:50:10 +0200 Subject: [PATCH 079/241] fix "attic info" all archives stats, fixes #183 the problem was that calc_stats() dirties cache.chunks by decrementing the chunk reference counters (so it can compute the deduplicated size of the archive correctly). the fix is to create a local Cache instance inside calc_stats, so the dirty cache instance can not be used elsewhere. also: fix internal consistency of calc_stats function: always use "cache" (not "self.cache"). minor cosmetic pep8 fixes --- attic/archive.py | 14 +++++++++----- attic/archiver.py | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/attic/archive.py b/attic/archive.py index d78a7fdb3..de9553db7 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -14,6 +14,7 @@ import sys import time from io import BytesIO from attic import xattr +from attic.cache import Cache from attic.platform import acl_get, acl_set from attic.chunker import Chunker from attic.hashindex import ChunkIndex @@ -206,19 +207,22 @@ class Archive: self.repository.commit() self.cache.commit() - def calc_stats(self, cache): + def calc_stats(self): def add(id): - count, size, csize = self.cache.chunks[id] + count, size, csize = cache.chunks[id] stats.update(size, csize, count == 1) - self.cache.chunks[id] = count - 1, size, csize + cache.chunks[id] = count - 1, size, csize # dirties cache.chunks! + def add_file_chunks(chunks): for id, _, _ in chunks: add(id) + # This function is a bit evil since it abuses the cache to calculate - # the stats. The cache transaction must be rolled back afterwards - unpacker = msgpack.Unpacker(use_list=False) + # the stats. The cache transaction must be rolled back afterwards. + cache = Cache(self.repository, self.key, self.manifest) cache.begin_txn() stats = Statistics() + unpacker = msgpack.Unpacker(use_list=False) add(self.id) for id, chunk in zip(self.metadata[b'items'], self.repository.get_many(self.metadata[b'items'])): add(id) diff --git a/attic/archiver.py b/attic/archiver.py index 7295ac06e..95a9129cc 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -305,7 +305,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") manifest, key = Manifest.load(repository) cache = Cache(repository, key, manifest) archive = Archive(repository, key, manifest, args.archive.archive, cache=cache) - stats = archive.calc_stats(cache) + stats = archive.calc_stats() print('Name:', archive.name) print('Fingerprint: %s' % hexlify(archive.id).decode('ascii')) print('Hostname:', archive.metadata[b'hostname']) From b2147c14d6c8de060b3ef09bdce7410b3c1709dd Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 19 Apr 2015 19:56:48 +0200 Subject: [PATCH 080/241] updated CHANGES-merge.txt --- CHANGES-merge.txt | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/CHANGES-merge.txt b/CHANGES-merge.txt index 81ba70017..b9df3a2f6 100644 --- a/CHANGES-merge.txt +++ b/CHANGES-merge.txt @@ -1,16 +1,29 @@ -Stuff in "merge" that is not in "master" minus minor changes: +Important note about "merge" branch +=================================== + +Goal of the "merge" branch is to be conservative and only merge: +- bug fixes +- new features that do not impact compatibility +- docs updates +- etc. + +BUT: THERE IS NO GUARANTEE THAT IT IS ALWAYS COMPATIBLE WITH ORIGINAL ATTIC +nor THAT YOU CAN SWITCH BACK AND FORTH BETWEEN ORIGINAL CODE AND THIS CODE +WITHIN THE SAME REPOSITORY WITHOUT ENCOUNTERING ISSUES. + +Please also see the LICENSE for more informations. + + +Stuff in "merge" that is not in "master" minus minor changes +============================================================ -dummy acl and xattr implementation for unsupported platforms give specific path to xattr.is_enabled(), disable symlink setattr call that always fails -less memory usage: do not create items_buffer and chunker unless we are creating new archive fix misleading hint the fuse ImportError handler gave, fixes #237 source: misc. cleanups, pep8, style -serve: use os.write(stdout_fd, ...), fixes #233 implement attic check --last N check: sort archives in reverse time order avoid defect python-msgpack releases, fixes #171, fixes #185 check unpacked data from RPC for tuple type and correct length, fixes #127 -Cache: do not try to release the lock twice less memory usage: add global option --no-cache-files fix traceback when trying to do unsupported passphrase change, fixes #189 datetime does not like the year 10.000, fixes issue #139 @@ -19,7 +32,22 @@ cleanup crypto.pyx, make it easier to adapt to other modes attic extract: if --stdout is given, write all extracted binary data to stdout attic create: if "-" is given as path, read binary from stdin do os.fsync like recommended in the python docs -Extra debug information for 'fread failed' +extra debug information for 'fread failed' FUSE: reflect deduplication in allocated blocks -Only allow whitelisted RPC calls in server mode -Normalize source/exclude paths before matching +only allow whitelisted RPC calls in server mode +normalize source/exclude paths before matching +fix "attic info" all archives stats, fixes #183 +implement attic create --timestamp, utc string or reference file/dir +simple sparse file support (attic extract --sparse) +fix parsing with missing microseconds, fixes #282 +use posix_fadvise to not spoil the OS cache, fixes #252 +source: Let chunker optionally work with os-level file descriptor. +source: Linux: remove duplicate os.fsencode calls +fix test setup for 32bit platforms, partial fix for #196 +source: refactor _open_rb code a bit, so it is more consistent / regular +implement attic rename repo::oldname newname +implement attic create --progress +source: refactor indicator (status) and item processing +implement attic delete (also deletes local cache) +better attic create -v output + From bbe200f41bf5a8463cb0c91e59683a4bea6d8771 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 21 Apr 2015 11:34:13 +0200 Subject: [PATCH 081/241] fix incomplete merge of delete_repo branch --- attic/repository.py | 1 + 1 file changed, 1 insertion(+) diff --git a/attic/repository.py b/attic/repository.py index 4338df082..9689c237f 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -3,6 +3,7 @@ from binascii import hexlify from itertools import islice import errno import os +import shutil import struct import sys from zlib import crc32 From 38dd086e309504eaa6fc4831f456e09388846df0 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 21 Apr 2015 20:50:19 +0200 Subject: [PATCH 082/241] Revert "fix "attic info" all archives stats, fixes #183" Fixed by jborg in master in a different way. This reverts commit 212777d716a96a6a7ac76f751e1bcc86ee8511df. --- attic/archive.py | 14 +++++--------- attic/archiver.py | 2 +- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/attic/archive.py b/attic/archive.py index de9553db7..d78a7fdb3 100644 --- a/attic/archive.py +++ b/attic/archive.py @@ -14,7 +14,6 @@ import sys import time from io import BytesIO from attic import xattr -from attic.cache import Cache from attic.platform import acl_get, acl_set from attic.chunker import Chunker from attic.hashindex import ChunkIndex @@ -207,22 +206,19 @@ class Archive: self.repository.commit() self.cache.commit() - def calc_stats(self): + def calc_stats(self, cache): def add(id): - count, size, csize = cache.chunks[id] + count, size, csize = self.cache.chunks[id] stats.update(size, csize, count == 1) - cache.chunks[id] = count - 1, size, csize # dirties cache.chunks! - + self.cache.chunks[id] = count - 1, size, csize def add_file_chunks(chunks): for id, _, _ in chunks: add(id) - # This function is a bit evil since it abuses the cache to calculate - # the stats. The cache transaction must be rolled back afterwards. - cache = Cache(self.repository, self.key, self.manifest) + # the stats. The cache transaction must be rolled back afterwards + unpacker = msgpack.Unpacker(use_list=False) cache.begin_txn() stats = Statistics() - unpacker = msgpack.Unpacker(use_list=False) add(self.id) for id, chunk in zip(self.metadata[b'items'], self.repository.get_many(self.metadata[b'items'])): add(id) diff --git a/attic/archiver.py b/attic/archiver.py index 95a9129cc..7295ac06e 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -305,7 +305,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") manifest, key = Manifest.load(repository) cache = Cache(repository, key, manifest) archive = Archive(repository, key, manifest, args.archive.archive, cache=cache) - stats = archive.calc_stats() + stats = archive.calc_stats(cache) print('Name:', archive.name) print('Fingerprint: %s' % hexlify(archive.id).decode('ascii')) print('Hostname:', archive.metadata[b'hostname']) From 5fcaac1b6f2072a0fb9580180ad45018a4a9a063 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 27 Apr 2015 00:48:09 +0200 Subject: [PATCH 083/241] do not test for sparse file on Mac OS X --- attic/testsuite/archiver.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 3821a03db..8eea4448b 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -198,11 +198,14 @@ class ArchiverTestCase(ArchiverTestCaseBase): return Repository(self.repository_path).id def test_sparse_file(self): + # no sparse file support on Mac OS X + sparse_support = sys.platform != 'darwin' filename = os.path.join(self.input_path, 'sparse') content = b'foobar' hole_size = 5 * CHUNK_MAX # 5 full chunker buffers with open(filename, 'wb') as fd: - # create a file that has a hole at the beginning and end + # create a file that has a hole at the beginning and end (if the + # OS and filesystem supports sparse files) fd.seek(hole_size, 1) fd.write(content) fd.seek(hole_size, 1) @@ -211,7 +214,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): total_len = hole_size + len(content) + hole_size st = os.stat(filename) self.assert_equal(st.st_size, total_len) - if hasattr(st, 'st_blocks'): + if sparse_support and hasattr(st, 'st_blocks'): self.assert_true(st.st_blocks * 512 < total_len / 10) # is input sparse? self.attic('init', self.repository_location) self.attic('create', self.repository_location + '::test', 'input') @@ -226,7 +229,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_equal(fd.read(hole_size), b'\0' * hole_size) st = os.stat(filename) self.assert_equal(st.st_size, total_len) - if hasattr(st, 'st_blocks'): + if sparse_support and hasattr(st, 'st_blocks'): self.assert_true(st.st_blocks * 512 < total_len / 10) # is output sparse? def test_repository_swap_detection(self): From cb5c9b63b388d39cde7799b3e54d95a8a0be0fe6 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 9 May 2015 18:40:55 +0200 Subject: [PATCH 084/241] fork: s/attic/borg/g, fix URLs, add new section to AUTHORS use borg instead attic except at the places where it was used: - as toplevel package name, directory name, file name - to refer to original attic remove sphinx upload make command, will be replaced by github.io site later remove references to binary downloads and linux packages for now remove some software name references, fix grammar use borgbackup rather than borg-backup (or borg) in URLs, less name collision issues, better search results, no validity issues with "-" --- AUTHORS | 10 +- LICENSE | 1 + README.rst | 36 ++++--- attic/_version.py | 2 +- attic/archiver.py | 28 +++--- attic/cache.py | 10 +- attic/fuse.py | 6 +- attic/helpers.py | 10 +- attic/key.py | 8 +- attic/remote.py | 4 +- attic/repository.py | 4 +- attic/testsuite/archiver.py | 16 ++-- attic/testsuite/helpers.py | 4 +- attic/testsuite/key.py | 10 +- docs/Makefile | 12 +-- docs/_themes/attic/sidebarlogo.html | 7 -- docs/_themes/attic/sidebarusefullinks.html | 10 -- docs/_themes/local/sidebarlogo.html | 5 + docs/_themes/local/sidebarusefullinks.html | 11 +++ .../attic.css_t => local/static/local.css_t} | 94 +++++++++++++------ docs/_themes/{attic => local}/theme.conf | 2 +- docs/conf.py | 23 +++-- docs/faq.rst | 10 +- docs/foreword.rst | 6 +- docs/global.rst.inc | 16 ++-- docs/index.rst | 28 +++--- docs/installation.rst | 20 +--- docs/internals.rst | 10 +- docs/quickstart.rst | 36 +++---- docs/update_usage.sh | 10 +- docs/usage.rst | 48 +++++----- scripts/{attic => borg} | 0 setup.py | 22 ++--- 33 files changed, 273 insertions(+), 246 deletions(-) delete mode 100644 docs/_themes/attic/sidebarlogo.html delete mode 100644 docs/_themes/attic/sidebarusefullinks.html create mode 100644 docs/_themes/local/sidebarlogo.html create mode 100644 docs/_themes/local/sidebarusefullinks.html rename docs/_themes/{attic/static/attic.css_t => local/static/local.css_t} (60%) rename docs/_themes/{attic => local}/theme.conf (71%) rename scripts/{attic => borg} (100%) diff --git a/AUTHORS b/AUTHORS index 7d18c0df1..be47591b2 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,9 +1,13 @@ -Attic is written and maintained by Jonas Borgström and -various contributors: +Borg Developers / Contributors ("The Borg Collective") +`````````````````````````````````````````````````````` +- Thomas Waldmann + + +Borg is a fork of Attic. Attic is written and maintained +by Jonas Borgström and various contributors: Development Lead ```````````````` - - Jonas Borgström Patches and Suggestions diff --git a/LICENSE b/LICENSE index 2016c735c..ad958c543 100644 --- a/LICENSE +++ b/LICENSE @@ -1,3 +1,4 @@ +Copyright (C) 2015 The Borg Collective (see AUTHORS file) Copyright (C) 2010-2014 Jonas Borgström All rights reserved. diff --git a/README.rst b/README.rst index 0903f04f0..9b21011bd 100644 --- a/README.rst +++ b/README.rst @@ -1,16 +1,20 @@ -What is Attic? --------------- -Attic is a deduplicating backup program. The main goal of Attic is to provide +|build| + +What is Borg? +------------- +Borg is a deduplicating backup program. The main goal of Borg is to provide an efficient and secure way to backup data. The data deduplication -technique used makes Attic suitable for daily backups since only changes +technique used makes Borg suitable for daily backups since only changes are stored. +Borg is a fork of Attic and maintained by "The Borg Collective" (see AUTHORS file). + Easy to use ~~~~~~~~~~~ Initialize backup repository and create a backup archive:: - $ attic init /usbdrive/my-backup.attic - $ attic create -v /usbdrive/my-backup.attic::documents ~/Documents + $ borg init /usbdrive/my-backup.borg + $ borg create -v /usbdrive/my-backup.borg::documents ~/Documents Main features ~~~~~~~~~~~~~ @@ -25,8 +29,8 @@ Optional data encryption and authenticity is verified using HMAC-SHA256. Off-site backups - Attic can store data on any remote host accessible over SSH. This is - most efficient if Attic is also installed on the remote host. + Borg can store data on any remote host accessible over SSH. This is + most efficient if Borg is also installed on the remote host. Backups mountable as filesystems Backup archives are mountable as userspace filesystems for easy backup @@ -34,24 +38,28 @@ Backups mountable as filesystems What do I need? --------------- -Attic requires Python 3.2 or above to work. Besides Python, Attic also requires -msgpack-python and sufficiently recent OpenSSL (>= 1.0.0). +Borg requires Python 3.2 or above to work. +Borg also requires a sufficiently recent OpenSSL (>= 1.0.0). In order to mount archives as filesystems, llfuse is required. How do I install it? -------------------- :: - $ pip install Attic + $ pip3 install borgbackup Where are the docs? ------------------- -Go to https://attic-backup.org/ for a prebuilt version of the documentation. +Go to https://borgbackup.github.io/ for a prebuilt version of the documentation. You can also build it yourself from the docs folder. Where are the tests? -------------------- -The tests are in the attic/testsuite package. To run the test suite use the +The tests are in the borg/testsuite package. To run the test suite use the following command:: - $ fakeroot -u python -m attic.testsuite.run + $ fakeroot -u tox # you need to have tox installed + +.. |build| image:: https://travis-ci.org/borgbackup/borg.svg + :alt: Build Status + :target: https://travis-ci.org/borgbackup/borg diff --git a/attic/_version.py b/attic/_version.py index 129c2f82e..b8a6306a2 100644 --- a/attic/_version.py +++ b/attic/_version.py @@ -183,7 +183,7 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False) return {"version": dirname[len(parentdir_prefix):], "full": ""} tag_prefix = "" -parentdir_prefix = "Attic-" +parentdir_prefix = "borgbackup-" versionfile_source = "attic/_version.py" diff --git a/attic/archiver.py b/attic/archiver.py index 66d8448a3..7a8311519 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -38,7 +38,7 @@ class Archiver: def print_error(self, msg, *args): msg = args and msg % args or msg self.exit_code = 1 - print('attic: ' + msg, file=sys.stderr) + print('borg: ' + msg, file=sys.stderr) def print_verbose(self, msg, *args, **kw): if self.verbose: @@ -49,7 +49,7 @@ class Archiver: print(msg, end=' ') def do_serve(self, args): - """Start Attic in server mode. This command is usually not used manually. + """Start in server mode. This command is usually not used manually. """ return RepositoryServer(restrict_to_paths=args.restrict_to_paths).serve() @@ -69,7 +69,7 @@ class Archiver: """Check repository consistency""" repository = self.open_repository(args.repository, exclusive=args.repair) if args.repair: - while not os.environ.get('ATTIC_CHECK_I_KNOW_WHAT_I_AM_DOING'): + while not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'): self.print_error("""Warning: 'check --repair' is an experimental feature that might result in data loss. @@ -102,7 +102,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") archive = Archive(repository, key, manifest, args.archive.archive, cache=cache, create=True, checkpoint_interval=args.checkpoint_interval, numeric_owner=args.numeric_owner, progress=args.progress) - # Add Attic cache dir to inode_skip list + # Add cache dir to inode_skip list skip_inodes = set() try: st = os.stat(get_cache_dir()) @@ -198,7 +198,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") return # Status output # A lowercase character means a file type other than a regular file, - # attic usually just stores them. E.g. (d)irectory. + # borg usually just stores them. E.g. (d)irectory. # Hardlinks to already seen content are indicated by (h). # A uppercase character means a regular file that was (A)dded, # (M)odified or was (U)nchanged. @@ -435,17 +435,17 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") Examples: # Exclude '/home/user/file.o' but not '/home/user/file.odt': - $ attic create -e '*.o' repo.attic / + $ borg create -e '*.o' repo.borg / # Exclude '/home/user/junk' and '/home/user/subdir/junk' but # not '/home/user/importantjunk' or '/etc/junk': - $ attic create -e '/home/*/junk' repo.attic / + $ borg create -e '/home/*/junk' repo.borg / # Exclude the contents of '/home/user/cache' but not the directory itself: - $ attic create -e /home/user/cache/ repo.attic / + $ borg create -e /home/user/cache/ repo.borg / # The file '/home/user/cache/important' is *not* backed up: - $ attic create -e /home/user/cache/ repo.attic / /home/user/cache/important + $ borg create -e /home/user/cache/ repo.borg / /home/user/cache/important ''' def do_help(self, parser, commands, args): @@ -474,7 +474,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") ('--yearly', '--keep-yearly', 'Warning: "--yearly" has been deprecated. Use "--keep-yearly" instead.') ] if args and args[0] == 'verify': - print('Warning: "attic verify" has been deprecated. Use "attic extract --dry-run" instead.') + print('Warning: "borg verify" has been deprecated. Use "borg extract --dry-run" instead.') args = ['extract', '--dry-run'] + args[1:] for i, arg in enumerate(args[:]): for old_name, new_name, warning in deprecations: @@ -496,7 +496,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") with open(os.path.join(cache_dir, 'CACHEDIR.TAG'), 'w') as fd: fd.write(textwrap.dedent(""" Signature: 8a477f597d28d172789f06886806bc55 - # This file is a cache directory tag created by Attic. + # This file is a cache directory tag created by Borg. # For information about cache directory tags, see: # http://www.brynosaurus.com/cachedir/ """).lstrip()) @@ -510,7 +510,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") if args: args = self.preprocess_args(args) - parser = argparse.ArgumentParser(description='Attic %s - Deduplicated Backups' % __version__) + parser = argparse.ArgumentParser(description='Borg %s - Deduplicated Backups' % __version__) subparsers = parser.add_subparsers(title='Available commands') subparser = subparsers.add_parser('serve', parents=[common_parser], @@ -582,7 +582,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") traversing all paths specified. The archive will consume almost no disk space for files or parts of files that have already been stored in other archives. - See "attic help patterns" for more help on exclude patterns. + See "borg help patterns" for more help on exclude patterns. """) subparser = subparsers.add_parser('create', parents=[common_parser], @@ -631,7 +631,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") by passing a list of ``PATHs`` as arguments. The file selection can further be restricted by using the ``--exclude`` option. - See "attic help patterns" for more help on exclude patterns. + See "borg help patterns" for more help on exclude patterns. """) subparser = subparsers.add_parser('extract', parents=[common_parser], description=self.do_extract.__doc__, diff --git a/attic/cache.py b/attic/cache.py index 75212b4cc..97890b52a 100644 --- a/attic/cache.py +++ b/attic/cache.py @@ -43,14 +43,14 @@ class Cache: if not os.path.exists(self.path): if warn_if_unencrypted and isinstance(key, PlaintextKey): if not self._confirm('Warning: Attempting to access a previously unknown unencrypted repository', - 'ATTIC_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK'): + 'BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK'): raise self.CacheInitAbortedError() self.create() self.open() # Warn user before sending data to a relocated repository if self.previous_location and self.previous_location != repository._location.canonical_path(): msg = 'Warning: The repository at location {} was previously located at {}'.format(repository._location.canonical_path(), self.previous_location) - if not self._confirm(msg, 'ATTIC_RELOCATED_REPO_ACCESS_IS_OK'): + if not self._confirm(msg, 'BORG_RELOCATED_REPO_ACCESS_IS_OK'): raise self.RepositoryAccessAborted() if sync and self.manifest.id != self.manifest_id: @@ -84,7 +84,7 @@ class Cache: """ os.makedirs(self.path) with open(os.path.join(self.path, 'README'), 'w') as fd: - fd.write('This is an Attic cache') + fd.write('This is a Borg cache') config = RawConfigParser() config.add_section('cache') config.set('cache', 'version', '1') @@ -107,7 +107,7 @@ class Cache: self.config = RawConfigParser() self.config.read(os.path.join(self.path, 'config')) if self.config.getint('cache', 'version') != 1: - raise Exception('%s Does not look like an Attic cache') + raise Exception('%s Does not look like a Borg cache') self.id = self.config.get('cache', 'repository') self.manifest_id = unhexlify(self.config.get('cache', 'manifest')) self.timestamp = self.config.get('cache', 'timestamp', fallback=None) @@ -118,7 +118,7 @@ class Cache: def open(self): if not os.path.isdir(self.path): - raise Exception('%s Does not look like an Attic cache' % self.path) + raise Exception('%s Does not look like a Borg cache' % self.path) self.lock = UpgradableLock(os.path.join(self.path, 'config'), exclusive=True) self.rollback() diff --git a/attic/fuse.py b/attic/fuse.py index c91e2c80f..e5fe46151 100644 --- a/attic/fuse.py +++ b/attic/fuse.py @@ -30,8 +30,8 @@ class ItemCache: return next(msgpack.Unpacker(self.fd)) -class AtticOperations(llfuse.Operations): - """Export Attic archive as a fuse filesystem +class FuseOperations(llfuse.Operations): + """Export archive as a fuse filesystem """ def __init__(self, key, repository, manifest, archive): super(AtticOperations, self).__init__() @@ -226,7 +226,7 @@ class AtticOperations(llfuse.Operations): return os.fsencode(item[b'source']) def mount(self, mountpoint, extra_options, foreground=False): - options = ['fsname=atticfs', 'ro'] + options = ['fsname=borgfs', 'ro'] if extra_options: options.extend(extra_options.split(',')) llfuse.init(self, mountpoint, options) diff --git a/attic/helpers.py b/attic/helpers.py index 26418e54e..0ba0739f9 100644 --- a/attic/helpers.py +++ b/attic/helpers.py @@ -27,7 +27,7 @@ class Error(Exception): class ExtensionModuleError(Error): - """The Attic binary extension modules does not seem to be properly installed""" + """The Borg binary extension modules do not seem to be properly installed""" class UpgradableLock: @@ -182,14 +182,14 @@ class Statistics: def get_keys_dir(): """Determine where to repository keys and cache""" - return os.environ.get('ATTIC_KEYS_DIR', - os.path.join(os.path.expanduser('~'), '.attic', 'keys')) + return os.environ.get('BORG_KEYS_DIR', + os.path.join(os.path.expanduser('~'), '.borg', 'keys')) def get_cache_dir(): """Determine where to repository keys and cache""" - return os.environ.get('ATTIC_CACHE_DIR', - os.path.join(os.path.expanduser('~'), '.cache', 'attic')) + return os.environ.get('BORG_CACHE_DIR', + os.path.join(os.path.expanduser('~'), '.cache', 'borg')) def to_localtime(ts): diff --git a/attic/key.py b/attic/key.py index 7065042a7..4b71d747b 100644 --- a/attic/key.py +++ b/attic/key.py @@ -160,7 +160,7 @@ class PassphraseKey(AESKeyBase): @classmethod def create(cls, repository, args): key = cls() - passphrase = os.environ.get('ATTIC_PASSPHRASE') + passphrase = os.environ.get('BORG_PASSPHRASE') if passphrase is not None: passphrase2 = passphrase else: @@ -182,7 +182,7 @@ class PassphraseKey(AESKeyBase): def detect(cls, repository, manifest_data): prompt = 'Enter passphrase for %s: ' % repository._location.orig key = cls() - passphrase = os.environ.get('ATTIC_PASSPHRASE') + passphrase = os.environ.get('BORG_PASSPHRASE') if passphrase is None: passphrase = getpass(prompt) while True: @@ -215,7 +215,7 @@ class KeyfileKey(AESKeyBase): key = cls() path = cls.find_key_file(repository) prompt = 'Enter passphrase for key file %s: ' % path - passphrase = os.environ.get('ATTIC_PASSPHRASE', '') + passphrase = os.environ.get('BORG_PASSPHRASE', '') while not key.load(path, passphrase): passphrase = getpass(prompt) num_blocks = num_aes_blocks(len(manifest_data) - 41) @@ -310,7 +310,7 @@ class KeyfileKey(AESKeyBase): while os.path.exists(path): i += 1 path = filename + '.%d' % i - passphrase = os.environ.get('ATTIC_PASSPHRASE') + passphrase = os.environ.get('BORG_PASSPHRASE') if passphrase is not None: passphrase2 = passphrase else: diff --git a/attic/remote.py b/attic/remote.py index c52d4d80e..9650f065d 100644 --- a/attic/remote.py +++ b/attic/remote.py @@ -79,7 +79,7 @@ class RepositoryServer: f = getattr(self.repository, method) res = f(*args) except BaseException as e: - exc = "Remote Traceback by Attic %s%s%s" % (__version__, os.linesep, traceback.format_exc()) + exc = "Remote Traceback by Borg %s%s%s" % (__version__, os.linesep, traceback.format_exc()) os.write(stdout_fd, msgpack.packb((1, msgid, e.__class__.__name__, exc))) else: os.write(stdout_fd, msgpack.packb((1, msgid, None, res))) @@ -132,7 +132,7 @@ class RemoteRepository: args.append('%s@%s' % (location.user, location.host)) else: args.append('%s' % location.host) - args += ['attic', 'serve'] + args += ['borg', 'serve'] self.p = Popen(args, bufsize=0, stdin=PIPE, stdout=PIPE) self.stdin_fd = self.p.stdin.fileno() self.stdout_fd = self.p.stdout.fileno() diff --git a/attic/repository.py b/attic/repository.py index 9689c237f..893c04711 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -42,7 +42,7 @@ class Repository: """{} is not a valid repository.""" class CheckNeeded(Error): - """Inconsistency detected. Please run "attic check {}".""" + """Inconsistency detected. Please run "borg check {}".""" class ObjectNotFound(Error): """Object with key {} not found in repository {}.""" @@ -68,7 +68,7 @@ class Repository: if not os.path.exists(path): os.mkdir(path) with open(os.path.join(path, 'README'), 'w') as fd: - fd.write('This is an Attic repository\n') + fd.write('This is a Borg repository\n') os.mkdir(os.path.join(path, 'data')) config = RawConfigParser() config.add_section('repository') diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 4a962396a..534a46564 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -65,7 +65,7 @@ class ArchiverTestCaseBase(AtticTestCase): prefix = '' def setUp(self): - os.environ['ATTIC_CHECK_I_KNOW_WHAT_I_AM_DOING'] = '1' + os.environ['BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'] = '1' self.archiver = Archiver() self.tmpdir = tempfile.mkdtemp() self.repository_path = os.path.join(self.tmpdir, 'repository') @@ -75,8 +75,8 @@ class ArchiverTestCaseBase(AtticTestCase): self.keys_path = os.path.join(self.tmpdir, 'keys') self.cache_path = os.path.join(self.tmpdir, 'cache') self.exclude_file_path = os.path.join(self.tmpdir, 'excludes') - os.environ['ATTIC_KEYS_DIR'] = self.keys_path - os.environ['ATTIC_CACHE_DIR'] = self.cache_path + os.environ['BORG_KEYS_DIR'] = self.keys_path + os.environ['BORG_CACHE_DIR'] = self.cache_path os.mkdir(self.input_path) os.mkdir(self.output_path) os.mkdir(self.keys_path) @@ -190,7 +190,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): info_output = self.attic('info', self.repository_location + '::test') self.assert_in('Number of files: 4', info_output) shutil.rmtree(self.cache_path) - with environment_variable(ATTIC_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK='1'): + with environment_variable(BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK='1'): info_output2 = self.attic('info', self.repository_location + '::test') # info_output2 starts with some "initializing cache" text but should # end the same way as info_output @@ -244,7 +244,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_repository_swap_detection(self): self.create_test_files() - os.environ['ATTIC_PASSPHRASE'] = 'passphrase' + os.environ['BORG_PASSPHRASE'] = 'passphrase' self.attic('init', '--encryption=passphrase', self.repository_location) repository_id = self._extract_repository_id(self.repository_path) self.attic('create', self.repository_location + '::test', 'input') @@ -257,7 +257,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_repository_swap_detection2(self): self.create_test_files() self.attic('init', '--encryption=none', self.repository_location + '_unencrypted') - os.environ['ATTIC_PASSPHRASE'] = 'passphrase' + os.environ['BORG_PASSPHRASE'] = 'passphrase' self.attic('init', '--encryption=passphrase', self.repository_location + '_encrypted') self.attic('create', self.repository_location + '_encrypted::test', 'input') shutil.rmtree(self.repository_path + '_encrypted') @@ -419,7 +419,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.attic('init', self.repository_location) self.attic('create', self.repository_location + '::test', 'input') output = self.attic('verify', '-v', self.repository_location + '::test') - self.assert_in('"attic verify" has been deprecated', output) + self.assert_in('"borg verify" has been deprecated', output) output = self.attic('prune', self.repository_location, '--hourly=1') self.assert_in('"--hourly" has been deprecated. Use "--keep-hourly" instead', output) @@ -502,7 +502,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): used.add(counter) self.create_test_files() - os.environ['ATTIC_PASSPHRASE'] = 'passphrase' + os.environ['BORG_PASSPHRASE'] = 'passphrase' self.attic('init', '--encryption=' + method, self.repository_location) verify_uniqueness() self.attic('create', self.repository_location + '::test', 'input') diff --git a/attic/testsuite/helpers.py b/attic/testsuite/helpers.py index 9842390cb..9cbfb1629 100644 --- a/attic/testsuite/helpers.py +++ b/attic/testsuite/helpers.py @@ -38,8 +38,8 @@ class LocationTestCase(AtticTestCase): "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')" ) self.assert_equal( - repr(Location('mybackup.attic::archive')), - "Location(proto='file', user=None, host=None, port=None, path='mybackup.attic', archive='archive')" + repr(Location('mybackup.borg::archive')), + "Location(proto='file', user=None, host=None, port=None, path='mybackup.borg', archive='archive')" ) self.assert_equal( repr(Location('/some/absolute/path::archive')), diff --git a/attic/testsuite/key.py b/attic/testsuite/key.py index 7e0d235bd..35e588aa4 100644 --- a/attic/testsuite/key.py +++ b/attic/testsuite/key.py @@ -32,7 +32,7 @@ class KeyTestCase(AtticTestCase): def setUp(self): self.tmppath = tempfile.mkdtemp() - os.environ['ATTIC_KEYS_DIR'] = self.tmppath + os.environ['BORG_KEYS_DIR'] = self.tmppath def tearDown(self): shutil.rmtree(self.tmppath) @@ -51,7 +51,7 @@ class KeyTestCase(AtticTestCase): self.assert_equal(data, key.decrypt(key.id_hash(data), key.encrypt(data))) def test_keyfile(self): - os.environ['ATTIC_PASSPHRASE'] = 'test' + os.environ['BORG_PASSPHRASE'] = 'test' key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0) manifest = key.encrypt(b'XXX') @@ -70,14 +70,14 @@ class KeyTestCase(AtticTestCase): self.assert_equal(data, key2.decrypt(key.id_hash(data), key.encrypt(data))) def test_keyfile2(self): - with open(os.path.join(os.environ['ATTIC_KEYS_DIR'], 'keyfile'), 'w') as fd: + with open(os.path.join(os.environ['BORG_KEYS_DIR'], 'keyfile'), 'w') as fd: fd.write(self.keyfile2_key_file) - os.environ['ATTIC_PASSPHRASE'] = 'passphrase' + os.environ['BORG_PASSPHRASE'] = 'passphrase' key = KeyfileKey.detect(self.MockRepository(), self.keyfile2_cdata) self.assert_equal(key.decrypt(self.keyfile2_id, self.keyfile2_cdata), b'payload') def test_passphrase(self): - os.environ['ATTIC_PASSPHRASE'] = 'test' + os.environ['BORG_PASSPHRASE'] = 'test' key = PassphraseKey.create(self.MockRepository(), None) self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0) self.assert_equal(hexlify(key.id_key), b'793b0717f9d8fb01c751a487e9b827897ceea62409870600013fbc6b4d8d7ca6') diff --git a/docs/Makefile b/docs/Makefile index 367e035d6..21d6d69c7 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -73,17 +73,17 @@ qthelp: @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/attic.qhcp" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/borg.qhcp" @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/attic.qhc" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/borg.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/attic" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/attic" + @echo "# mkdir -p $$HOME/.local/share/devhelp/borg" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/borg" @echo "# devhelp" epub: @@ -140,7 +140,3 @@ gh-pages: html inotify: html while inotifywait -r . --exclude usage.rst --exclude '_build/*' ; do make html ; done - -upload: html - rsync -va -e ssh _build/html/ sushi.edgewall.com:/srv/attic/www/ - diff --git a/docs/_themes/attic/sidebarlogo.html b/docs/_themes/attic/sidebarlogo.html deleted file mode 100644 index ceda28cbe..000000000 --- a/docs/_themes/attic/sidebarlogo.html +++ /dev/null @@ -1,7 +0,0 @@ - - \ No newline at end of file diff --git a/docs/_themes/attic/sidebarusefullinks.html b/docs/_themes/attic/sidebarusefullinks.html deleted file mode 100644 index d630501d8..000000000 --- a/docs/_themes/attic/sidebarusefullinks.html +++ /dev/null @@ -1,10 +0,0 @@ -Fork me on GitHub - -

Useful Links

- \ No newline at end of file diff --git a/docs/_themes/local/sidebarlogo.html b/docs/_themes/local/sidebarlogo.html new file mode 100644 index 000000000..20ecf4108 --- /dev/null +++ b/docs/_themes/local/sidebarlogo.html @@ -0,0 +1,5 @@ + diff --git a/docs/_themes/local/sidebarusefullinks.html b/docs/_themes/local/sidebarusefullinks.html new file mode 100644 index 000000000..5723043b2 --- /dev/null +++ b/docs/_themes/local/sidebarusefullinks.html @@ -0,0 +1,11 @@ +Fork me on GitHub + +

Useful Links

+ diff --git a/docs/_themes/attic/static/attic.css_t b/docs/_themes/local/static/local.css_t similarity index 60% rename from docs/_themes/attic/static/attic.css_t rename to docs/_themes/local/static/local.css_t index 34bbd971c..d3ae46596 100644 --- a/docs/_themes/attic/static/attic.css_t +++ b/docs/_themes/local/static/local.css_t @@ -1,12 +1,14 @@ @import url("basic.css"); +@import url(//fonts.googleapis.com/css?family=Black+Ops+One); body { - font-family: Helvetica; - background-color: white; + font-family: Arial, Helvetica, sans-serif; + background-color: black; margin: 0; padding: 0; position: relative; } + div.related { display: none; background-color: black; @@ -14,10 +16,12 @@ div.related { width: 800px; margin: 0 auto; } + div.related a { color: white; text-decoration: none; } + div.document { width: 1030px; margin: 0 auto; @@ -27,114 +31,144 @@ div.documentwrapper { float: right; width: 760px; padding: 0 20px 20px 20px; - background-color: #f3f3f3; + color: #00aa00; + background-color: #000000; margin-bottom: 2em; } + div.sphinxsidebar { margin-left: 0; - parring-right: 20px; + padding-right: 20px; width: 230px; - background: #e9e9e9; + background: #081008; position: absolute; top: 0; min-height: 100%; } h1, h2, h3 { - font-family: "Oswald"; font-weight: normal; - color: #333; + color: #33dd33; } + h1 { margin: .8em 0 .5em; + font-size: 200%; } -h2, h3 { + +h2 { margin: 1.2em 0 .6em; + font-size: 140%; } -h1 { font-size: 200%;} -h2 { font-size: 140%;} -h3 { font-size: 110%;} + +h3 { + margin: 1.2em 0 .6em; + font-size: 110%; +} + ul { padding-left: 1.2em; margin-bottom: .3em; } + ul ul { font-size: 95%; } + li { margin: .1em 0; } -a:link, a:visited { - color: #00608f; + +a:link { + color: #dddd00; text-decoration: none; } + +a:visited { + color: #990000; + text-decoration: none; +} + a:hover { - color: #00B0E4; - border-bottom: 1px dotted #00B0E4; + color: #dd0000; + border-bottom: 1px dotted #dd0000; } div.sphinxsidebar a:link, div.sphinxsidebar a:visited { - color: #555; border-bottom: 1px dotted #555; } +div.sphinxsidebar { + color: #00aa00; + background: 0000000; +} + div.sphinxsidebar input { - border: 1px solid #ccc; + color: #00cc00; + background: 0000000; + border: 1px solid #444444; } pre { padding: 10px 20px; - background: white; - color: #222; + background: #101010; + color: #22cc22; line-height: 1.5em; border-bottom: 2px solid black; - font-family: "Inconsolata"; } + pre a:link, pre a:visited { - color: #00B0E4; + color: #00b0e4; } div.sidebarlogo .title { - font-family: "Oswald"; + font-family: 'Black Ops One', cursive; font-size: 500%; } + +div.sidebarlogo a { + color: #00dd00; +} + div.sidebarlogo .subtitle { font-style: italic; color: #777; } + tt span.pre { font-size: 110%; } + dt { - font-family: "Oswald"; font-size: 95%; } div.admonition p.admonition-title + p { - display: inline; + display: inline; } div.admonition p { - margin-bottom: 5px; + margin-bottom: 5px; } p.admonition-title { - display: inline; + display: inline; } p.admonition-title:after { - content: ":"; + content: ":"; } div.note { - background-color: #ff5; + background-color: #0f5; border-bottom: 2px solid #d22; } div.seealso { - background-color: #ffe; - border: 1px solid #ff6; + background-color: #0fe; + border: 1px solid #0f6; border-radius: .4em; box-shadow: 2px 2px #dd6; } + diff --git a/docs/_themes/attic/theme.conf b/docs/_themes/local/theme.conf similarity index 71% rename from docs/_themes/attic/theme.conf rename to docs/_themes/local/theme.conf index 3f003dd29..03505a6ec 100644 --- a/docs/_themes/attic/theme.conf +++ b/docs/_themes/local/theme.conf @@ -1,6 +1,6 @@ [theme] inherit = basic -stylesheet = attic.css +stylesheet = local.css pygments_style = tango [options] diff --git a/docs/conf.py b/docs/conf.py index 3ed30c837..bd8b14bff 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Attic documentation build configuration file, created by +# documentation build configuration file, created by # sphinx-quickstart on Sat Sep 10 18:18:25 2011. # # This file is execfile()d with the current directory set to its containing dir. @@ -40,7 +40,7 @@ source_suffix = '.rst' master_doc = 'index' # General information about the project. -project = 'Attic - Deduplicating Archiver' +project = 'Borg - Deduplicating Archiver' copyright = '2010-2014, Jonas Borgström' # The version info for the project you're documenting, acts as replacement for @@ -91,7 +91,7 @@ pygments_style = 'sphinx' # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'attic' +html_theme = 'local' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -166,7 +166,7 @@ html_show_copyright = False #html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'atticdoc' +htmlhelp_basename = 'borgdoc' # -- Options for LaTeX output -------------------------------------------------- @@ -180,8 +180,8 @@ htmlhelp_basename = 'atticdoc' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'Attic.tex', 'Attic Documentation', - 'Jonas Borgström', 'manual'), + ('index', 'Borg.tex', 'Borg Documentation', + 'see "AUTHORS" file', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of @@ -213,14 +213,13 @@ latex_documents = [ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). #man_pages = [ -# ('man', 'attic', 'Attic', -# ['Jonas Borgström'], 1) +# ('man', 'borg', 'Borg', +# ['see "AUTHORS" file'], 1) #] extensions = ['sphinx.ext.extlinks'] extlinks = { - 'issue': ('https://github.com/jborg/attic/issues/%s', '#'), - 'targz_url': ('https://pypi.python.org/packages/source/A/Attic/%%s-%s.tar.gz' % version, None), - 'artifacts': ('https://attic-backup.org/downloads/releases/%s/%%s' % version, '') - } + 'issue': ('https://github.com/borgbackup/borg/issues/%s', '#'), + 'targz_url': ('https://pypi.python.org/packages/source/b/borgbackup/%%s-%s.tar.gz' % version, None), +} diff --git a/docs/faq.rst b/docs/faq.rst index a23cafb2c..d6ffe99ea 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -13,16 +13,16 @@ Can I backup VM disk images? makes sure only the modified parts of the file are stored. Can I backup from multiple servers into a single repository? - Yes, but in order for the deduplication used by Attic to work, it + Yes, but in order for the deduplication used by Borg to work, it needs to keep a local cache containing checksums of all file chunks already stored in the repository. This cache is stored in - ``~/.cache/attic/``. If Attic detects that a repository has been + ``~/.cache/borg/``. If Borg detects that a repository has been modified since the local cache was updated it will need to rebuild the cache. This rebuild can be quite time consuming. So, yes it's possible. But it will be most efficient if a single repository is only modified from one place. Also keep in mind that - Attic will keep an exclusive lock on the repository while creating + Borg will keep an exclusive lock on the repository while creating or deleting archives, which may make *simultaneous* backups fail. Which file attributes are preserved? @@ -41,7 +41,7 @@ Which file attributes are preserved? How can I specify the encryption passphrase programmatically? The encryption passphrase can be specified programmatically using the - `ATTIC_PASSPHRASE` environment variable. This is convenient when setting up + `BORG_PASSPHRASE` environment variable. This is convenient when setting up automated encrypted backups. Another option is to use key file based encryption with a blank passphrase. See :ref:`encrypted_repos` for more details. @@ -49,7 +49,7 @@ How can I specify the encryption passphrase programmatically? When backing up to remote servers, is data encrypted before leaving the local machine, or do I have to trust that the remote server isn't malicious? Yes, everything is encrypted before leaving the local machine. -If a backup stops mid-way, does the already-backed-up data stay there? I.e. does Attic resume backups? +If a backup stops mid-way, does the already-backed-up data stay there? I.e. does Borg resume backups? Yes, during a backup a special checkpoint archive named ``.checkpoint`` is saved every 5 minutes containing all the data backed-up until that point. This means that at most 5 minutes worth of data needs to be retransmitted if a backup needs to be restarted. diff --git a/docs/foreword.rst b/docs/foreword.rst index 9d625b2ad..dc81e493e 100644 --- a/docs/foreword.rst +++ b/docs/foreword.rst @@ -26,7 +26,7 @@ Off-site backups long as |project_name| is installed. Backups mountable as filesystems - Backup archives are :ref:`mountable ` as + Backup archives are :ref:`mountable ` as `userspace filesystems`_ for easy backup verification and restores. @@ -53,8 +53,8 @@ Repository A repository is a filesystem directory storing data from zero or more archives. The data in a repository is both deduplicated and optionally encrypted making it both efficient and safe. Repositories are - created using :ref:`attic_init` and the contents can be listed using - :ref:`attic_list`. + created using :ref:`borg_init` and the contents can be listed using + :ref:`borg_list`. Key file When a repository is initialized a key file containing a password diff --git a/docs/global.rst.inc b/docs/global.rst.inc index ac3f90a11..c0629a143 100644 --- a/docs/global.rst.inc +++ b/docs/global.rst.inc @@ -1,9 +1,11 @@ .. highlight:: bash -.. |project_name| replace:: ``Attic`` -.. |package_dirname| replace:: Attic-|version| +.. |project_name| replace:: ``Borg`` +.. |package_dirname| replace:: borgbackup-|version| .. |package_filename| replace:: |package_dirname|.tar.gz -.. |package_url| replace:: https://pypi.python.org/packages/source/A/Attic/|package_filename| -.. |git_url| replace:: https://github.com/jborg/attic.git +.. |package_url| replace:: https://pypi.python.org/packages/source/b/borgbackup/|package_filename| +.. |git_url| replace:: https://github.com/borgbackup/borg.git +.. _github: https://github.com/borgbackup/borg +.. _issue tracker: https://github.com/borgbackup/borg/issues .. _deduplication: https://en.wikipedia.org/wiki/Data_deduplication .. _AES: https://en.wikipedia.org/wiki/Advanced_Encryption_Standard .. _HMAC-SHA256: http://en.wikipedia.org/wiki/HMAC @@ -11,7 +13,6 @@ .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2 .. _ACL: https://en.wikipedia.org/wiki/Access_control_list .. _libacl: http://savannah.nongnu.org/projects/acl/ -.. _github: https://github.com/jborg/attic .. _OpenSSL: https://www.openssl.org/ .. _Python: http://www.python.org/ .. _Buzhash: https://en.wikipedia.org/wiki/Buzhash @@ -19,13 +20,8 @@ .. _`msgpack-python`: https://pypi.python.org/pypi/msgpack-python/ .. _llfuse: https://pypi.python.org/pypi/llfuse/ .. _homebrew: http://mxcl.github.io/homebrew/ -.. _issue tracker: https://github.com/jborg/attic/issues .. _userspace filesystems: https://en.wikipedia.org/wiki/Filesystem_in_Userspace .. _librelist: http://librelist.com/ -.. _Debian: http://packages.debian.org/attic -.. _Ubuntu: http://packages.ubuntu.com/attic -.. _Arch Linux: https://aur.archlinux.org/packages/attic/ -.. _Slackware: http://slackbuilds.org/result/?search=Attic .. _Cython: http://cython.org/ .. _virtualenv: https://pypi.python.org/pypi/virtualenv/ .. _mailing list discussion about internals: http://librelist.com/browser/attic/2014/5/6/questions-and-suggestions-about-inner-working-of-attic> diff --git a/docs/index.rst b/docs/index.rst index 029215a0d..e8877ce1a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,6 +1,6 @@ .. include:: global.rst.inc -Welcome to Attic +Welcome to Borg ================ |project_name| is a deduplicating backup program written in Python. The main goal of |project_name| is to provide an efficient and secure way @@ -13,9 +13,9 @@ Easy to use Initialize a new backup :ref:`repository ` and create your first backup :ref:`archive ` in two lines:: - $ attic init /somewhere/my-repository.attic - $ attic create /somewhere/my-repository.attic::Monday ~/Documents - $ attic create --stats /somewhere/my-repository.attic::Tuesday ~/Documents + $ borg init /somewhere/my-repository.borg + $ borg create /somewhere/my-repository.borg::Monday ~/Documents + $ borg create --stats /somewhere/my-repository.borg::Tuesday ~/Documents Archive name: Tuesday Archive fingerprint: 387a5e3f9b0e792e91ce87134b0f4bfe17677d9248cb5337f3fbf3a8e157942a Start time: Tue Mar 25 12:00:10 2014 @@ -32,10 +32,7 @@ Easy installation ----------------- You can use pip to install |project_name| quickly and easily:: - $ pip3 install attic - -|project_name| is also part of the Debian_, Ubuntu_, `Arch Linux`_ and Slackware_ -distributions of GNU/Linux. + $ pip3 install borgbackup Need more help with installing? See :ref:`installation`. @@ -59,13 +56,18 @@ If you've found a bug or have a concrete feature request, you can add your bug report or feature request directly to the project's `issue tracker`_. For more general questions or discussions, a post to the mailing list is preferred. +IRC +--- +Join us on channel ##borgbackup on chat.freenode.net. As usual on IRC, just +ask or tell directly and then patiently wait for replies. Stay connected. + Mailing list ------------ -There is a mailing list for Attic on librelist_ that you can use for feature -requests and general discussions about Attic. A mailing list archive is -available `here `_. +There is a mailing list for Borg on librelist_ that you can use for feature +requests and general discussions about Borg. A mailing list archive is +available `here `_. -To subscribe to the list, send an email to attic@librelist.com and reply +To subscribe to the list, send an email to borgbackup@librelist.com and reply to the confirmation mail. Likewise, to unsubscribe, send an email to -attic-unsubscribe@librelist.com and reply to the confirmation mail. +borgbackup-unsubscribe@librelist.com and reply to the confirmation mail. diff --git a/docs/installation.rst b/docs/installation.rst index fc380ecdd..c42c99473 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -36,11 +36,11 @@ Installing from PyPI using pip To install |project_name| system-wide:: - $ sudo pip3 install Attic + $ sudo pip3 install borgbackup To install it in a user-specific account:: - $ pip3 install --user Attic + $ pip3 install --user borgbackup Then add ``$HOME/.library/bin`` to your ``$PATH``. @@ -48,7 +48,7 @@ Installing from source tarballs ------------------------------- .. parsed-literal:: - $ curl -O :targz_url:`Attic` + $ curl -O :targz_url:`Borg` $ tar -xvzf |package_filename| $ cd |package_dirname| $ sudo python3 setup.py install @@ -58,20 +58,8 @@ Installing from git .. parsed-literal:: $ git clone |git_url| - $ cd attic + $ cd borg $ sudo python3 setup.py install Please note that when installing from git, Cython_ is required to generate some files that are normally bundled with the release tarball. - -Packages --------- - -|project_name| is also part of the Debian_, Ubuntu_, `Arch Linux`_ and Slackware_ -distributions of GNU/Linux. - -Standalone binaries -------------------- - -Prebuilt standalone binaries that work on -most Linux systems can be found :artifacts:`here <>`. diff --git a/docs/internals.rst b/docs/internals.rst index 52e2938a9..ead22158b 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -251,14 +251,14 @@ security but limits the maximum repository capacity to only 295 exabytes (2**64 * 16 bytes). Encryption keys are either a passphrase, passed through the -``ATTIC_PASSPHRASE`` environment or prompted on the commandline, or +``BORG_PASSPHRASE`` environment or prompted on the commandline, or stored in automatically generated key files. Key files --------- When initialized with the ``init -e keyfile`` command, |project_name| -needs an associated file in ``$HOME/.attic/keys`` to read and write +needs an associated file in ``$HOME/.borg/keys`` to read and write the repository. The format is based on msgpack_, base64 encoding and PBKDF2_ SHA256 hashing, which is then encoded again in a msgpack_. @@ -312,6 +312,6 @@ data described above The resulting msgpack_ is then encoded using base64 and written to the -key file, wrapped using the standard ``textwrap`` module with a -header. The header is a single line with the string ``ATTIC_KEY``, a -space and a hexadecimal representation of the repository id. +key file, wrapped using the standard ``textwrap`` module with a header. +The header is a single line with a MAGIC string, a space and a hexadecimal +representation of the repository id. diff --git a/docs/quickstart.rst b/docs/quickstart.rst index e02b5ca17..48cf13a60 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -13,16 +13,16 @@ A step by step example 1. Before a backup can be made a repository has to be initialized:: - $ attic init /somewhere/my-repository.attic + $ borg init /somewhere/my-repository.borg 2. Backup the ``~/src`` and ``~/Documents`` directories into an archive called *Monday*:: - $ attic create /somewhere/my-repository.attic::Monday ~/src ~/Documents + $ borg create /somewhere/my-repository.borg::Monday ~/src ~/Documents 3. The next day create a new archive called *Tuesday*:: - $ attic create --stats /somewhere/my-repository.attic::Tuesday ~/src ~/Documents + $ borg create --stats /somewhere/my-repository.borg::Tuesday ~/src ~/Documents This backup will be a lot quicker and a lot smaller since only new never before seen data is stored. The ``--stats`` option causes |project_name| to @@ -42,27 +42,27 @@ A step by step example 4. List all archives in the repository:: - $ attic list /somewhere/my-repository.attic + $ borg list /somewhere/my-repository.borg Monday Mon Mar 24 11:59:35 2014 Tuesday Tue Mar 25 12:00:10 2014 5. List the contents of the *Monday* archive:: - $ attic list /somewhere/my-repository.attic::Monday + $ borg list /somewhere/my-repository.borg::Monday drwxr-xr-x user group 0 Jan 06 15:22 home/user/Documents -rw-r--r-- user group 7961 Nov 17 2012 home/user/Documents/Important.doc ... 6. Restore the *Monday* archive:: - $ attic extract /somwhere/my-repository.attic::Monday + $ borg extract /somwhere/my-repository.borg::Monday 7. Recover disk space by manually deleting the *Monday* archive:: - $ attic delete /somwhere/my-backup.attic::Monday + $ borg delete /somwhere/my-backup.borg::Monday .. Note:: - Attic is quiet by default. Add the ``-v`` or ``--verbose`` option to + Borg is quiet by default. Add the ``-v`` or ``--verbose`` option to get progress reporting during command execution. Automating backups @@ -70,15 +70,15 @@ Automating backups The following example script backs up ``/home`` and ``/var/www`` to a remote server. The script also uses the -:ref:`attic_prune` subcommand to maintain a certain number +:ref:`borg_prune` subcommand to maintain a certain number of old archives:: #!/bin/sh - REPOSITORY=username@remoteserver.com:repository.attic + REPOSITORY=username@remoteserver.com:repository.borg # Backup all of /home and /var/www except a few # excluded directories - attic create --stats \ + borg create --stats \ $REPOSITORY::hostname-`date +%Y-%m-%d` \ /home \ /var/www \ @@ -88,7 +88,7 @@ of old archives:: # Use the `prune` subcommand to maintain 7 daily, 4 weekly # and 6 monthly archives. - attic prune -v $REPOSITORY --keep-daily=7 --keep-weekly=4 --keep-monthly=6 + borg prune -v $REPOSITORY --keep-daily=7 --keep-weekly=4 --keep-monthly=6 .. _encrypted_repos: @@ -97,7 +97,7 @@ Repository encryption Repository encryption is enabled at repository creation time:: - $ attic init --encryption=passphrase|keyfile PATH + $ borg init --encryption=passphrase|keyfile PATH When repository encryption is enabled all data is encrypted using 256-bit AES_ encryption and the integrity and authenticity is verified using `HMAC-SHA256`_. @@ -116,11 +116,11 @@ Passphrase based encryption .. Note:: For automated backups the passphrase can be specified using the - `ATTIC_PASSPHRASE` environment variable. + `BORG_PASSPHRASE` environment variable. Key file based encryption This method generates random keys at repository initialization time that - are stored in a password protected file in the ``~/.attic/keys/`` directory. + are stored in a password protected file in the ``~/.borg/keys/`` directory. The key file is a printable text file. This method is secure and suitable for automated backups. @@ -138,18 +138,18 @@ Remote repositories host is accessible using SSH. This is fastest and easiest when |project_name| is installed on the remote host, in which case the following syntax is used:: - $ attic init user@hostname:repository.attic + $ borg init user@hostname:repository.borg or:: - $ attic init ssh://user@hostname:port/repository.attic + $ borg init ssh://user@hostname:port/repository.borg If it is not possible to install |project_name| on the remote host, it is still possible to use the remote host to store a repository by mounting the remote filesystem, for example, using sshfs:: $ sshfs user@hostname:/path/to/folder /tmp/mymountpoint - $ attic init /tmp/mymountpoint/repository.attic + $ borg init /tmp/mymountpoint/repository.borg $ fusermount -u /tmp/mymountpoint However, be aware that sshfs doesn't fully implement POSIX locks, so diff --git a/docs/update_usage.sh b/docs/update_usage.sh index 69498decb..3089d6297 100755 --- a/docs/update_usage.sh +++ b/docs/update_usage.sh @@ -4,10 +4,10 @@ if [ ! -d usage ]; then fi for cmd in change-passphrase check create delete extract info init list mount prune; do FILENAME="usage/$cmd.rst.inc" - LINE=`echo -n attic $cmd | tr 'a-z- ' '-'` - echo -e ".. _attic_$cmd:\n" > $FILENAME - echo -e "attic $cmd\n$LINE\n::\n\n" >> $FILENAME - attic help $cmd --usage-only | sed -e 's/^/ /' >> $FILENAME + LINE=`echo -n borg $cmd | tr 'a-z- ' '-'` + echo -e ".. _borg_$cmd:\n" > $FILENAME + echo -e "borg $cmd\n$LINE\n::\n\n" >> $FILENAME + borg help $cmd --usage-only | sed -e 's/^/ /' >> $FILENAME echo -e "\nDescription\n~~~~~~~~~~~\n" >> $FILENAME - attic help $cmd --epilog-only >> $FILENAME + borg help $cmd --epilog-only >> $FILENAME done diff --git a/docs/usage.rst b/docs/usage.rst index d04b8e13c..d0e192b69 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -22,13 +22,13 @@ Examples :: # Local repository - $ attic init /data/mybackuprepo.attic + $ borg init /data/mybackuprepo.borg # Remote repository - $ attic init user@hostname:mybackuprepo.attic + $ borg init user@hostname:mybackuprepo.borg # Encrypted remote repository - $ attic init --encryption=passphrase user@hostname:mybackuprepo.attic + $ borg init --encryption=passphrase user@hostname:mybackuprepo.borg .. include:: usage/create.rst.inc @@ -38,17 +38,17 @@ Examples :: # Backup ~/Documents into an archive named "my-documents" - $ attic create /data/myrepo.attic::my-documents ~/Documents + $ borg create /data/myrepo.borg::my-documents ~/Documents # Backup ~/Documents and ~/src but exclude pyc files - $ attic create /data/myrepo.attic::my-files \ + $ borg create /data/myrepo.borg::my-files \ ~/Documents \ ~/src \ --exclude '*.pyc' # Backup the root filesystem into an archive named "root-YYYY-MM-DD" NAME="root-`date +%Y-%m-%d`" - $ attic create /data/myrepo.attic::$NAME / --do-not-cross-mountpoints + $ borg create /data/myrepo.borg::$NAME / --do-not-cross-mountpoints .. include:: usage/extract.rst.inc @@ -58,16 +58,16 @@ Examples :: # Extract entire archive - $ attic extract /data/myrepo::my-files + $ borg extract /data/myrepo::my-files # Extract entire archive and list files while processing - $ attic extract -v /data/myrepo::my-files + $ borg extract -v /data/myrepo::my-files # Extract the "src" directory - $ attic extract /data/myrepo::my-files home/USERNAME/src + $ borg extract /data/myrepo::my-files home/USERNAME/src # Extract the "src" directory but exclude object files - $ attic extract /data/myrepo::my-files home/USERNAME/src --exclude '*.o' + $ borg extract /data/myrepo::my-files home/USERNAME/src --exclude '*.o' .. include:: usage/check.rst.inc @@ -79,14 +79,14 @@ Examples ~~~~~~~~ :: - $ attic list /data/myrepo + $ borg list /data/myrepo my-files Thu Aug 1 23:33:22 2013 my-documents Thu Aug 1 23:35:43 2013 root-2013-08-01 Thu Aug 1 23:43:55 2013 root-2013-08-02 Fri Aug 2 15:18:17 2013 ... - $ attic list /data/myrepo::root-2013-08-02 + $ borg list /data/myrepo::root-2013-08-02 drwxr-xr-x root root 0 Jun 05 12:06 . lrwxrwxrwx root root 0 May 31 20:40 bin -> usr/bin drwxr-xr-x root root 0 Aug 01 22:08 etc @@ -102,18 +102,18 @@ Examples :: # Keep 7 end of day and 4 additional end of week archives: - $ attic prune /data/myrepo --keep-daily=7 --keep-weekly=4 + $ borg prune /data/myrepo --keep-daily=7 --keep-weekly=4 # Same as above but only apply to archive names starting with "foo": - $ attic prune /data/myrepo --keep-daily=7 --keep-weekly=4 --prefix=foo + $ borg prune /data/myrepo --keep-daily=7 --keep-weekly=4 --prefix=foo # Keep 7 end of day, 4 additional end of week archives, # and an end of month archive for every month: - $ attic prune /data/myrepo --keep-daily=7 --keep-weekly=4 --monthly=-1 + $ borg prune /data/myrepo --keep-daily=7 --keep-weekly=4 --monthly=-1 # Keep all backups in the last 10 days, 4 additional end of week archives, # and an end of month archive for every month: - $ attic prune /data/myrepo --keep-within=10d --keep-weekly=4 --monthly=-1 + $ borg prune /data/myrepo --keep-within=10d --keep-weekly=4 --monthly=-1 .. include:: usage/info.rst.inc @@ -122,13 +122,13 @@ Examples ~~~~~~~~ :: - $ attic info /data/myrepo::root-2013-08-02 + $ borg info /data/myrepo::root-2013-08-02 Name: root-2013-08-02 Fingerprint: bc3902e2c79b6d25f5d769b335c5c49331e6537f324d8d3badcb9a0917536dbb Hostname: myhostname Username: root Time: Fri Aug 2 15:18:17 2013 - Command line: /usr/bin/attic create --stats /data/myrepo::root-2013-08-02 / --do-not-cross-mountpoints + Command line: /usr/bin/borg create --stats /data/myrepo::root-2013-08-02 / --do-not-cross-mountpoints Number of files: 147429 Original size: 5344169493 (4.98 GB) Compressed size: 1748189642 (1.63 GB) @@ -141,7 +141,7 @@ Examples ~~~~~~~~ :: - $ attic mount /data/myrepo::root-2013-08-02 /tmp/mymountpoint + $ borg mount /data/myrepo::root-2013-08-02 /tmp/mymountpoint $ ls /tmp/mymountpoint bin boot etc lib lib64 mnt opt root sbin srv usr var $ fusermount -u /tmp/mymountpoint @@ -154,16 +154,16 @@ Examples :: # Create a key file protected repository - $ attic init --encryption=keyfile /tmp/encrypted-repo + $ borg init --encryption=keyfile /tmp/encrypted-repo Initializing repository at "/tmp/encrypted-repo" Enter passphrase (empty for no passphrase): Enter same passphrase again: - Key file "/home/USER/.attic/keys/tmp_encrypted_repo" created. + Key file "/home/USER/.borg/keys/tmp_encrypted_repo" created. Keep this file safe. Your data will be inaccessible without it. # Change key file passphrase - $ attic change-passphrase /tmp/encrypted-repo - Enter passphrase for key file /home/USER/.attic/keys/tmp_encrypted_repo: + $ borg change-passphrase /tmp/encrypted-repo + Enter passphrase for key file /home/USER/.borg/keys/tmp_encrypted_repo: New passphrase: Enter same passphrase again: - Key file "/home/USER/.attic/keys/tmp_encrypted_repo" updated + Key file "/home/USER/.borg/keys/tmp_encrypted_repo" updated diff --git a/scripts/attic b/scripts/borg similarity index 100% rename from scripts/attic rename to scripts/borg diff --git a/setup.py b/setup.py index 6a166805d..1c6da086b 100644 --- a/setup.py +++ b/setup.py @@ -7,11 +7,11 @@ import versioneer versioneer.versionfile_source = 'attic/_version.py' versioneer.versionfile_build = 'attic/_version.py' versioneer.tag_prefix = '' -versioneer.parentdir_prefix = 'Attic-' # dirname like 'myproject-1.2.0' +versioneer.parentdir_prefix = 'borgbackup-' # dirname like 'myproject-1.2.0' min_python = (3, 2) if sys.version_info < min_python: - print("Attic requires Python %d.%d or later" % min_python) + print("Borg requires Python %d.%d or later" % min_python) sys.exit(1) try: @@ -54,7 +54,7 @@ except ImportError: platform_darwin_source = platform_darwin_source.replace('.pyx', '.c') from distutils.command.build_ext import build_ext if not all(os.path.exists(path) for path in [crypto_source, chunker_source, hashindex_source, platform_linux_source, platform_freebsd_source]): - raise ImportError('The GIT version of Attic needs Cython. Install Cython or use a released version') + raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version') def detect_openssl(prefixes): @@ -67,8 +67,8 @@ def detect_openssl(prefixes): possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl', '/usr/local/attic', '/opt/local'] -if os.environ.get('ATTIC_OPENSSL_PREFIX'): - possible_openssl_prefixes.insert(0, os.environ.get('ATTIC_OPENSSL_PREFIX')) +if os.environ.get('BORG_OPENSSL_PREFIX'): + possible_openssl_prefixes.insert(0, os.environ.get('BORG_OPENSSL_PREFIX')) ssl_prefix = detect_openssl(possible_openssl_prefixes) if not ssl_prefix: raise Exception('Unable to find OpenSSL >= 1.0 headers. (Looked here: {})'.format(', '.join(possible_openssl_prefixes))) @@ -95,12 +95,12 @@ elif sys.platform == 'darwin': ext_modules.append(Extension('attic.platform_darwin', [platform_darwin_source])) setup( - name='Attic', + name='borgbackup', version=versioneer.get_version(), - author='Jonas Borgstrom', - author_email='jonas@borgstrom.se', - url='https://attic-backup.org/', - description='Deduplicated backups', + author='The Borg Collective (see AUTHORS file)', + author_email='borgbackup@librelist.com', + url='https://borgbackup.github.io/', + description='Deduplicated, encrypted, authenticated and compressed backups', long_description=long_description, license='BSD', platforms=['Linux', 'MacOS X'], @@ -117,7 +117,7 @@ setup( 'Topic :: System :: Archiving :: Backup', ], packages=['attic', 'attic.testsuite'], - scripts=['scripts/attic'], + scripts=['scripts/borg'], cmdclass=cmdclass, ext_modules=ext_modules, # msgpack pure python data corruption was fixed in 0.4.6. From 35d06afd6b0dd5d96bbd47933f36996febf51c10 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 9 May 2015 19:41:03 +0200 Subject: [PATCH 085/241] get rid of some .borg, be consistent with the backup repo name and path it's pretty useless to have .borg as a directory extension, especially since there is a README in there stating that this is a borg repo. conistency: "backup" is always used as relative backup repository path "/mnt/backup" is always used as absolute repository path --- README.rst | 4 +-- attic/archiver.py | 8 +++--- attic/testsuite/helpers.py | 4 +-- docs/index.rst | 6 ++--- docs/quickstart.rst | 26 +++++++++--------- docs/usage.rst | 55 +++++++++++++++++++------------------- 6 files changed, 52 insertions(+), 51 deletions(-) diff --git a/README.rst b/README.rst index 9b21011bd..07421f280 100644 --- a/README.rst +++ b/README.rst @@ -13,8 +13,8 @@ Easy to use ~~~~~~~~~~~ Initialize backup repository and create a backup archive:: - $ borg init /usbdrive/my-backup.borg - $ borg create -v /usbdrive/my-backup.borg::documents ~/Documents + $ borg init /mnt/backup + $ borg create -v /mnt/backup::documents ~/Documents Main features ~~~~~~~~~~~~~ diff --git a/attic/archiver.py b/attic/archiver.py index 7a8311519..315bbffd9 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -435,17 +435,17 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") Examples: # Exclude '/home/user/file.o' but not '/home/user/file.odt': - $ borg create -e '*.o' repo.borg / + $ borg create -e '*.o' backup / # Exclude '/home/user/junk' and '/home/user/subdir/junk' but # not '/home/user/importantjunk' or '/etc/junk': - $ borg create -e '/home/*/junk' repo.borg / + $ borg create -e '/home/*/junk' backup / # Exclude the contents of '/home/user/cache' but not the directory itself: - $ borg create -e /home/user/cache/ repo.borg / + $ borg create -e /home/user/cache/ backup / # The file '/home/user/cache/important' is *not* backed up: - $ borg create -e /home/user/cache/ repo.borg / /home/user/cache/important + $ borg create -e /home/user/cache/ backup / /home/user/cache/important ''' def do_help(self, parser, commands, args): diff --git a/attic/testsuite/helpers.py b/attic/testsuite/helpers.py index 9cbfb1629..b509b67e8 100644 --- a/attic/testsuite/helpers.py +++ b/attic/testsuite/helpers.py @@ -38,8 +38,8 @@ class LocationTestCase(AtticTestCase): "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')" ) self.assert_equal( - repr(Location('mybackup.borg::archive')), - "Location(proto='file', user=None, host=None, port=None, path='mybackup.borg', archive='archive')" + repr(Location('path::archive')), + "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')" ) self.assert_equal( repr(Location('/some/absolute/path::archive')), diff --git a/docs/index.rst b/docs/index.rst index e8877ce1a..723ad3d7f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,9 +13,9 @@ Easy to use Initialize a new backup :ref:`repository ` and create your first backup :ref:`archive ` in two lines:: - $ borg init /somewhere/my-repository.borg - $ borg create /somewhere/my-repository.borg::Monday ~/Documents - $ borg create --stats /somewhere/my-repository.borg::Tuesday ~/Documents + $ borg init /mnt/backup + $ borg create /mnt/backup::Monday ~/Documents + $ borg create --stats /mnt/backup::Tuesday ~/Documents Archive name: Tuesday Archive fingerprint: 387a5e3f9b0e792e91ce87134b0f4bfe17677d9248cb5337f3fbf3a8e157942a Start time: Tue Mar 25 12:00:10 2014 diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 48cf13a60..a6f131c8d 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -13,16 +13,16 @@ A step by step example 1. Before a backup can be made a repository has to be initialized:: - $ borg init /somewhere/my-repository.borg + $ borg init /mnt/backup 2. Backup the ``~/src`` and ``~/Documents`` directories into an archive called *Monday*:: - $ borg create /somewhere/my-repository.borg::Monday ~/src ~/Documents + $ borg create /mnt/backup::Monday ~/src ~/Documents 3. The next day create a new archive called *Tuesday*:: - $ borg create --stats /somewhere/my-repository.borg::Tuesday ~/src ~/Documents + $ borg create --stats /mnt/backup::Tuesday ~/src ~/Documents This backup will be a lot quicker and a lot smaller since only new never before seen data is stored. The ``--stats`` option causes |project_name| to @@ -42,24 +42,24 @@ A step by step example 4. List all archives in the repository:: - $ borg list /somewhere/my-repository.borg + $ borg list /mnt/backup Monday Mon Mar 24 11:59:35 2014 Tuesday Tue Mar 25 12:00:10 2014 5. List the contents of the *Monday* archive:: - $ borg list /somewhere/my-repository.borg::Monday + $ borg list /mnt/backup::Monday drwxr-xr-x user group 0 Jan 06 15:22 home/user/Documents -rw-r--r-- user group 7961 Nov 17 2012 home/user/Documents/Important.doc ... 6. Restore the *Monday* archive:: - $ borg extract /somwhere/my-repository.borg::Monday + $ borg extract /mnt/backup::Monday 7. Recover disk space by manually deleting the *Monday* archive:: - $ borg delete /somwhere/my-backup.borg::Monday + $ borg delete /mnt/backup::Monday .. Note:: Borg is quiet by default. Add the ``-v`` or ``--verbose`` option to @@ -74,7 +74,7 @@ The following example script backs up ``/home`` and of old archives:: #!/bin/sh - REPOSITORY=username@remoteserver.com:repository.borg + REPOSITORY=username@remoteserver.com:backup # Backup all of /home and /var/www except a few # excluded directories @@ -138,19 +138,19 @@ Remote repositories host is accessible using SSH. This is fastest and easiest when |project_name| is installed on the remote host, in which case the following syntax is used:: - $ borg init user@hostname:repository.borg + $ borg init user@hostname:backup or:: - $ borg init ssh://user@hostname:port/repository.borg + $ borg init ssh://user@hostname:port/backup If it is not possible to install |project_name| on the remote host, it is still possible to use the remote host to store a repository by mounting the remote filesystem, for example, using sshfs:: - $ sshfs user@hostname:/path/to/folder /tmp/mymountpoint - $ borg init /tmp/mymountpoint/repository.borg - $ fusermount -u /tmp/mymountpoint + $ sshfs user@hostname:/path/to/folder /mnt + $ borg init /mnt/backup + $ fusermount -u /mnt However, be aware that sshfs doesn't fully implement POSIX locks, so you must be sure to not have two processes trying to access the same diff --git a/docs/usage.rst b/docs/usage.rst index d0e192b69..2f391b043 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -22,13 +22,13 @@ Examples :: # Local repository - $ borg init /data/mybackuprepo.borg + $ borg init /mnt/backup # Remote repository - $ borg init user@hostname:mybackuprepo.borg + $ borg init user@hostname:backup # Encrypted remote repository - $ borg init --encryption=passphrase user@hostname:mybackuprepo.borg + $ borg init --encryption=passphrase user@hostname:backup .. include:: usage/create.rst.inc @@ -38,17 +38,17 @@ Examples :: # Backup ~/Documents into an archive named "my-documents" - $ borg create /data/myrepo.borg::my-documents ~/Documents + $ borg create /mnt/backup::my-documents ~/Documents # Backup ~/Documents and ~/src but exclude pyc files - $ borg create /data/myrepo.borg::my-files \ - ~/Documents \ - ~/src \ + $ borg create /mnt/backup::my-files \ + ~/Documents \ + ~/src \ --exclude '*.pyc' # Backup the root filesystem into an archive named "root-YYYY-MM-DD" NAME="root-`date +%Y-%m-%d`" - $ borg create /data/myrepo.borg::$NAME / --do-not-cross-mountpoints + $ borg create /mnt/backup::$NAME / --do-not-cross-mountpoints .. include:: usage/extract.rst.inc @@ -58,16 +58,16 @@ Examples :: # Extract entire archive - $ borg extract /data/myrepo::my-files + $ borg extract /mnt/backup::my-files # Extract entire archive and list files while processing - $ borg extract -v /data/myrepo::my-files + $ borg extract -v /mnt/backup::my-files # Extract the "src" directory - $ borg extract /data/myrepo::my-files home/USERNAME/src + $ borg extract /mnt/backup::my-files home/USERNAME/src # Extract the "src" directory but exclude object files - $ borg extract /data/myrepo::my-files home/USERNAME/src --exclude '*.o' + $ borg extract /mnt/backup::my-files home/USERNAME/src --exclude '*.o' .. include:: usage/check.rst.inc @@ -79,14 +79,14 @@ Examples ~~~~~~~~ :: - $ borg list /data/myrepo + $ borg list /mnt/backup my-files Thu Aug 1 23:33:22 2013 my-documents Thu Aug 1 23:35:43 2013 root-2013-08-01 Thu Aug 1 23:43:55 2013 root-2013-08-02 Fri Aug 2 15:18:17 2013 ... - $ borg list /data/myrepo::root-2013-08-02 + $ borg list /mnt/backup::root-2013-08-02 drwxr-xr-x root root 0 Jun 05 12:06 . lrwxrwxrwx root root 0 May 31 20:40 bin -> usr/bin drwxr-xr-x root root 0 Aug 01 22:08 etc @@ -102,18 +102,18 @@ Examples :: # Keep 7 end of day and 4 additional end of week archives: - $ borg prune /data/myrepo --keep-daily=7 --keep-weekly=4 + $ borg prune /mnt/backup --keep-daily=7 --keep-weekly=4 # Same as above but only apply to archive names starting with "foo": - $ borg prune /data/myrepo --keep-daily=7 --keep-weekly=4 --prefix=foo + $ borg prune /mnt/backup --keep-daily=7 --keep-weekly=4 --prefix=foo # Keep 7 end of day, 4 additional end of week archives, # and an end of month archive for every month: - $ borg prune /data/myrepo --keep-daily=7 --keep-weekly=4 --monthly=-1 + $ borg prune /mnt/backup --keep-daily=7 --keep-weekly=4 --monthly=-1 # Keep all backups in the last 10 days, 4 additional end of week archives, # and an end of month archive for every month: - $ borg prune /data/myrepo --keep-within=10d --keep-weekly=4 --monthly=-1 + $ borg prune /mnt/backup --keep-within=10d --keep-weekly=4 --monthly=-1 .. include:: usage/info.rst.inc @@ -122,13 +122,13 @@ Examples ~~~~~~~~ :: - $ borg info /data/myrepo::root-2013-08-02 + $ borg info /mnt/backup::root-2013-08-02 Name: root-2013-08-02 Fingerprint: bc3902e2c79b6d25f5d769b335c5c49331e6537f324d8d3badcb9a0917536dbb Hostname: myhostname Username: root Time: Fri Aug 2 15:18:17 2013 - Command line: /usr/bin/borg create --stats /data/myrepo::root-2013-08-02 / --do-not-cross-mountpoints + Command line: /usr/bin/borg create --stats /mnt/backup::root-2013-08-02 / --do-not-cross-mountpoints Number of files: 147429 Original size: 5344169493 (4.98 GB) Compressed size: 1748189642 (1.63 GB) @@ -141,7 +141,7 @@ Examples ~~~~~~~~ :: - $ borg mount /data/myrepo::root-2013-08-02 /tmp/mymountpoint + $ borg mount /mnt/backup::root-2013-08-02 /tmp/mymountpoint $ ls /tmp/mymountpoint bin boot etc lib lib64 mnt opt root sbin srv usr var $ fusermount -u /tmp/mymountpoint @@ -154,16 +154,17 @@ Examples :: # Create a key file protected repository - $ borg init --encryption=keyfile /tmp/encrypted-repo - Initializing repository at "/tmp/encrypted-repo" + $ borg init --encryption=keyfile /mnt/backup + Initializing repository at "/mnt/backup" Enter passphrase (empty for no passphrase): Enter same passphrase again: - Key file "/home/USER/.borg/keys/tmp_encrypted_repo" created. + Key file "/home/USER/.borg/keys/mnt_backup" created. Keep this file safe. Your data will be inaccessible without it. # Change key file passphrase - $ borg change-passphrase /tmp/encrypted-repo - Enter passphrase for key file /home/USER/.borg/keys/tmp_encrypted_repo: + $ borg change-passphrase /mnt/backup + Enter passphrase for key file /home/USER/.borg/keys/mnt_backup: New passphrase: Enter same passphrase again: - Key file "/home/USER/.borg/keys/tmp_encrypted_repo" updated + Key file "/home/USER/.borg/keys/mnt_backup" updated + From 159315ef5dbe3491f5f1a53fb76b95675c7a4ace Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 9 May 2015 20:12:23 +0200 Subject: [PATCH 086/241] use BORG_* as file MAGIC, adapt tests --- attic/_hashindex.c | 2 +- attic/key.py | 5 +++-- attic/repository.py | 2 +- attic/testsuite/hashindex.py | 6 ++++-- attic/testsuite/key.py | 2 +- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/attic/_hashindex.c b/attic/_hashindex.c index a1f70699d..2c3ed01bf 100644 --- a/attic/_hashindex.c +++ b/attic/_hashindex.c @@ -39,7 +39,7 @@ typedef struct { off_t data_len; } HashIndex; -#define MAGIC "ATTICIDX" +#define MAGIC "BORG_IDX" #define EMPTY _htole32(0xffffffff) #define DELETED _htole32(0xfffffffe) #define MAX_BUCKET_SIZE 512 diff --git a/attic/key.py b/attic/key.py index 4b71d747b..f61798b9b 100644 --- a/attic/key.py +++ b/attic/key.py @@ -207,7 +207,7 @@ class PassphraseKey(AESKeyBase): class KeyfileKey(AESKeyBase): - FILE_ID = 'ATTIC KEY' + FILE_ID = 'BORG_KEY' TYPE = 0x00 @classmethod @@ -230,7 +230,8 @@ class KeyfileKey(AESKeyBase): filename = os.path.join(keys_dir, name) with open(filename, 'r') as fd: line = fd.readline().strip() - if line and line.startswith(cls.FILE_ID) and line[10:] == id: + if (line and line.startswith(cls.FILE_ID) and + line[len(cls.FILE_ID)+1:] == id): return filename raise KeyfileNotFoundError(repository._location.canonical_path(), get_keys_dir()) diff --git a/attic/repository.py b/attic/repository.py index 893c04711..83744ce66 100644 --- a/attic/repository.py +++ b/attic/repository.py @@ -13,7 +13,7 @@ from .helpers import Error, IntegrityError, read_msgpack, write_msgpack, unhexli from .lrucache import LRUCache MAX_OBJECT_SIZE = 20 * 1024 * 1024 -MAGIC = b'ATTICSEG' +MAGIC = b'BORG_SEG' TAG_PUT = 0 TAG_DELETE = 1 TAG_COMMIT = 2 diff --git a/attic/testsuite/hashindex.py b/attic/testsuite/hashindex.py index 1f6aa22aa..853fe844c 100644 --- a/attic/testsuite/hashindex.py +++ b/attic/testsuite/hashindex.py @@ -44,10 +44,12 @@ class HashIndexTestCase(AtticTestCase): self.assert_equal(len(cls.read(idx_name.name)), 0) def test_nsindex(self): - self._generic_test(NSIndex, lambda x: (x, x), '369a18ae6a52524eb2884a3c0fdc2824947edd017a2688c5d4d7b3510c245ab9') + self._generic_test(NSIndex, lambda x: (x, x), + '861d6d60069ea45e39d36bed2bdc1d0c07981e0641955f897ac6848be429abac') def test_chunkindex(self): - self._generic_test(ChunkIndex, lambda x: (x, x, x), 'ed22e8a883400453c0ee79a06c54df72c994a54eeefdc6c0989efdc5ee6d07b7') + self._generic_test(ChunkIndex, lambda x: (x, x, x), + '69464bd0ebbc5866b9f95d838bc48617d21bfe3dcf294682a5c21a2ef6b9dc0b') def test_resize(self): n = 2000 # Must be >= MIN_BUCKETS diff --git a/attic/testsuite/key.py b/attic/testsuite/key.py index 35e588aa4..dcc5295bd 100644 --- a/attic/testsuite/key.py +++ b/attic/testsuite/key.py @@ -15,7 +15,7 @@ class KeyTestCase(AtticTestCase): repository = Location(tempfile.mkstemp()[1]) keyfile2_key_file = """ - ATTIC KEY 0000000000000000000000000000000000000000000000000000000000000000 + BORG_KEY 0000000000000000000000000000000000000000000000000000000000000000 hqppdGVyYXRpb25zzgABhqCkaGFzaNoAIMyonNI+7Cjv0qHi0AOBM6bLGxACJhfgzVD2oq bIS9SFqWFsZ29yaXRobaZzaGEyNTakc2FsdNoAINNK5qqJc1JWSUjACwFEWGTdM7Nd0a5l 1uBGPEb+9XM9p3ZlcnNpb24BpGRhdGHaANAYDT5yfPpU099oBJwMomsxouKyx/OG4QIXK2 From 0b3710be225e86d2138586da7c386d1ad2ec2585 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 9 May 2015 20:47:50 +0200 Subject: [PATCH 087/241] rename test base class --- attic/testsuite/__init__.py | 2 +- attic/testsuite/archive.py | 8 ++++---- attic/testsuite/archiver.py | 4 ++-- attic/testsuite/chunker.py | 4 ++-- attic/testsuite/crypto.py | 4 ++-- attic/testsuite/hashindex.py | 4 ++-- attic/testsuite/helpers.py | 22 +++++++++++----------- attic/testsuite/key.py | 4 ++-- attic/testsuite/lrucache.py | 4 ++-- attic/testsuite/platform.py | 6 +++--- attic/testsuite/repository.py | 5 +++-- attic/testsuite/xattr.py | 4 ++-- 12 files changed, 36 insertions(+), 35 deletions(-) diff --git a/attic/testsuite/__init__.py b/attic/testsuite/__init__.py index 6b9d4fe91..421a0c329 100644 --- a/attic/testsuite/__init__.py +++ b/attic/testsuite/__init__.py @@ -32,7 +32,7 @@ has_mtime_ns = sys.version >= '3.3' utime_supports_fd = os.utime in getattr(os, 'supports_fd', {}) -class AtticTestCase(unittest.TestCase): +class BaseTestCase(unittest.TestCase): """ """ assert_in = unittest.TestCase.assertIn diff --git a/attic/testsuite/archive.py b/attic/testsuite/archive.py index a0af6c534..1d9b7004d 100644 --- a/attic/testsuite/archive.py +++ b/attic/testsuite/archive.py @@ -1,5 +1,5 @@ import msgpack -from attic.testsuite import AtticTestCase +from attic.testsuite import BaseTestCase from attic.testsuite.mock import Mock from attic.archive import Archive, CacheChunkBuffer, RobustUnpacker from attic.key import PlaintextKey @@ -17,7 +17,7 @@ class MockCache: return id, len(data), len(data) -class ArchiveTimestampTestCase(AtticTestCase): +class ArchiveTimestampTestCase(BaseTestCase): def _test_timestamp_parsing(self, isoformat, expected): repository = Mock() @@ -38,7 +38,7 @@ class ArchiveTimestampTestCase(AtticTestCase): datetime(1970, 1, 1, 0, 0, 1, 0, timezone.utc)) -class ChunkBufferTestCase(AtticTestCase): +class ChunkBufferTestCase(BaseTestCase): def test(self): data = [{b'foo': 1}, {b'bar': 2}] @@ -56,7 +56,7 @@ class ChunkBufferTestCase(AtticTestCase): self.assert_equal(data, list(unpacker)) -class RobustUnpackerTestCase(AtticTestCase): +class RobustUnpackerTestCase(BaseTestCase): def make_chunks(self, items): return b''.join(msgpack.packb({'path': item}) for item in items) diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index 534a46564..d0625a843 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -18,7 +18,7 @@ from attic.crypto import bytes_to_long, num_aes_blocks from attic.helpers import Manifest from attic.remote import RemoteRepository, PathNotAllowed from attic.repository import Repository -from attic.testsuite import AtticTestCase +from attic.testsuite import BaseTestCase from attic.testsuite.mock import patch try: @@ -60,7 +60,7 @@ class environment_variable: os.environ[k] = v -class ArchiverTestCaseBase(AtticTestCase): +class ArchiverTestCaseBase(BaseTestCase): prefix = '' diff --git a/attic/testsuite/chunker.py b/attic/testsuite/chunker.py index 90c4a8c50..cb5bb55ec 100644 --- a/attic/testsuite/chunker.py +++ b/attic/testsuite/chunker.py @@ -1,10 +1,10 @@ from attic.chunker import Chunker, buzhash, buzhash_update -from attic.testsuite import AtticTestCase +from attic.testsuite import BaseTestCase from attic.archive import CHUNK_MAX from io import BytesIO -class ChunkerTestCase(AtticTestCase): +class ChunkerTestCase(BaseTestCase): def test_chunkify(self): data = b'0' * int(1.5 * CHUNK_MAX) + b'Y' diff --git a/attic/testsuite/crypto.py b/attic/testsuite/crypto.py index f6ee3a4c5..066ba1815 100644 --- a/attic/testsuite/crypto.py +++ b/attic/testsuite/crypto.py @@ -1,9 +1,9 @@ from binascii import hexlify -from attic.testsuite import AtticTestCase +from attic.testsuite import BaseTestCase from attic.crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, pbkdf2_sha256, get_random_bytes -class CryptoTestCase(AtticTestCase): +class CryptoTestCase(BaseTestCase): def test_bytes_to_int(self): self.assert_equal(bytes_to_int(b'\0\0\0\1'), 1) diff --git a/attic/testsuite/hashindex.py b/attic/testsuite/hashindex.py index 853fe844c..de2a3eaae 100644 --- a/attic/testsuite/hashindex.py +++ b/attic/testsuite/hashindex.py @@ -2,10 +2,10 @@ import hashlib import os import tempfile from attic.hashindex import NSIndex, ChunkIndex -from attic.testsuite import AtticTestCase +from attic.testsuite import BaseTestCase -class HashIndexTestCase(AtticTestCase): +class HashIndexTestCase(BaseTestCase): def _generic_test(self, cls, make_value, sha): idx = cls() diff --git a/attic/testsuite/helpers.py b/attic/testsuite/helpers.py index b509b67e8..23c604f43 100644 --- a/attic/testsuite/helpers.py +++ b/attic/testsuite/helpers.py @@ -6,11 +6,11 @@ import tempfile import unittest from attic.helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, to_localtime, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp -from attic.testsuite import AtticTestCase +from attic.testsuite import BaseTestCase import msgpack -class BigIntTestCase(AtticTestCase): +class BigIntTestCase(BaseTestCase): def test_bigint(self): self.assert_equal(int_to_bigint(0), 0) @@ -22,7 +22,7 @@ class BigIntTestCase(AtticTestCase): self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70) -class LocationTestCase(AtticTestCase): +class LocationTestCase(BaseTestCase): def test(self): self.assert_equal( @@ -60,7 +60,7 @@ class LocationTestCase(AtticTestCase): Location(Location(location).canonical_path()).canonical_path()) -class FormatTimedeltaTestCase(AtticTestCase): +class FormatTimedeltaTestCase(BaseTestCase): def test(self): t0 = datetime(2001, 1, 1, 10, 20, 3, 0) @@ -71,7 +71,7 @@ class FormatTimedeltaTestCase(AtticTestCase): ) -class PatternTestCase(AtticTestCase): +class PatternTestCase(BaseTestCase): files = [ '/etc/passwd', '/etc/hosts', '/home', @@ -104,7 +104,7 @@ class PatternTestCase(AtticTestCase): ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']) -class MakePathSafeTestCase(AtticTestCase): +class MakePathSafeTestCase(BaseTestCase): def test(self): self.assert_equal(make_path_safe('/foo/bar'), 'foo/bar') @@ -116,7 +116,7 @@ class MakePathSafeTestCase(AtticTestCase): self.assert_equal(make_path_safe('/'), '.') self.assert_equal(make_path_safe('/'), '.') -class UpgradableLockTestCase(AtticTestCase): +class UpgradableLockTestCase(BaseTestCase): def test(self): file = tempfile.NamedTemporaryFile() @@ -143,7 +143,7 @@ class MockArchive: return repr(self.ts) -class PruneSplitTestCase(AtticTestCase): +class PruneSplitTestCase(BaseTestCase): def test(self): @@ -172,7 +172,7 @@ class PruneSplitTestCase(AtticTestCase): dotest(test_archives, 0, [], []) -class PruneWithinTestCase(AtticTestCase): +class PruneWithinTestCase(BaseTestCase): def test(self): @@ -203,7 +203,7 @@ class PruneWithinTestCase(AtticTestCase): dotest(test_archives, '1y', [0, 1, 2, 3, 4, 5]) -class StableDictTestCase(AtticTestCase): +class StableDictTestCase(BaseTestCase): def test(self): d = StableDict(foo=1, bar=2, boo=3, baz=4) @@ -211,7 +211,7 @@ class StableDictTestCase(AtticTestCase): self.assert_equal(hashlib.md5(msgpack.packb(d)).hexdigest(), 'fc78df42cd60691b3ac3dd2a2b39903f') -class TestParseTimestamp(AtticTestCase): +class TestParseTimestamp(BaseTestCase): def test(self): self.assert_equal(parse_timestamp('2015-04-19T20:25:00.226410'), datetime(2015, 4, 19, 20, 25, 0, 226410, timezone.utc)) diff --git a/attic/testsuite/key.py b/attic/testsuite/key.py index dcc5295bd..32ac64974 100644 --- a/attic/testsuite/key.py +++ b/attic/testsuite/key.py @@ -4,12 +4,12 @@ import shutil import tempfile from binascii import hexlify from attic.crypto import bytes_to_long, num_aes_blocks -from attic.testsuite import AtticTestCase +from attic.testsuite import BaseTestCase from attic.key import PlaintextKey, PassphraseKey, KeyfileKey from attic.helpers import Location, unhexlify -class KeyTestCase(AtticTestCase): +class KeyTestCase(BaseTestCase): class MockArgs: repository = Location(tempfile.mkstemp()[1]) diff --git a/attic/testsuite/lrucache.py b/attic/testsuite/lrucache.py index 9b51a7aab..83d23cfc5 100644 --- a/attic/testsuite/lrucache.py +++ b/attic/testsuite/lrucache.py @@ -1,8 +1,8 @@ from attic.lrucache import LRUCache -from attic.testsuite import AtticTestCase +from attic.testsuite import BaseTestCase -class LRUCacheTestCase(AtticTestCase): +class LRUCacheTestCase(BaseTestCase): def test(self): c = LRUCache(2) diff --git a/attic/testsuite/platform.py b/attic/testsuite/platform.py index 90abfd85e..bb3ab32b7 100644 --- a/attic/testsuite/platform.py +++ b/attic/testsuite/platform.py @@ -4,7 +4,7 @@ import sys import tempfile import unittest from attic.platform import acl_get, acl_set -from attic.testsuite import AtticTestCase +from attic.testsuite import BaseTestCase ACCESS_ACL = """ @@ -36,7 +36,7 @@ def fakeroot_detected(): @unittest.skipUnless(sys.platform.startswith('linux'), 'linux only test') @unittest.skipIf(fakeroot_detected(), 'not compatible with fakeroot') -class PlatformLinuxTestCase(AtticTestCase): +class PlatformLinuxTestCase(BaseTestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() @@ -74,7 +74,7 @@ class PlatformLinuxTestCase(AtticTestCase): @unittest.skipUnless(sys.platform.startswith('darwin'), 'OS X only test') @unittest.skipIf(fakeroot_detected(), 'not compatible with fakeroot') -class PlatformDarwinTestCase(AtticTestCase): +class PlatformDarwinTestCase(BaseTestCase): def setUp(self): self.tmpdir = tempfile.mkdtemp() diff --git a/attic/testsuite/repository.py b/attic/testsuite/repository.py index 18946a3ed..7be454e5b 100644 --- a/attic/testsuite/repository.py +++ b/attic/testsuite/repository.py @@ -6,10 +6,11 @@ from attic.hashindex import NSIndex from attic.helpers import Location, IntegrityError, UpgradableLock from attic.remote import RemoteRepository, InvalidRPCMethod from attic.repository import Repository -from attic.testsuite import AtticTestCase +from attic.testsuite import BaseTestCase -class RepositoryTestCaseBase(AtticTestCase): +class RepositoryTestCaseBase(BaseTestCase): + key_size = 32 def open(self, create=False): return Repository(os.path.join(self.tmppath, 'repository'), create=create) diff --git a/attic/testsuite/xattr.py b/attic/testsuite/xattr.py index 7d6e5939a..952b70d80 100644 --- a/attic/testsuite/xattr.py +++ b/attic/testsuite/xattr.py @@ -1,12 +1,12 @@ import os import tempfile import unittest -from attic.testsuite import AtticTestCase +from attic.testsuite import BaseTestCase from attic.xattr import is_enabled, getxattr, setxattr, listxattr @unittest.skipUnless(is_enabled(), 'xattr not enabled on filesystem') -class XattrTestCase(AtticTestCase): +class XattrTestCase(BaseTestCase): def setUp(self): self.tmpfile = tempfile.NamedTemporaryFile() From d84bbb72ecccfdeb05e966d8a531878784fa8326 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 9 May 2015 20:52:04 +0200 Subject: [PATCH 088/241] rename fuse base class --- attic/archiver.py | 4 ++-- attic/fuse.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/attic/archiver.py b/attic/archiver.py index 315bbffd9..78864be00 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -293,7 +293,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") def do_mount(self, args): """Mount archive or an entire repository as a FUSE fileystem""" try: - from attic.fuse import AtticOperations + from attic.fuse import FuseOperations except ImportError as e: self.print_error('loading fuse support failed [ImportError: %s]' % str(e)) return self.exit_code @@ -308,7 +308,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") archive = Archive(repository, key, manifest, args.src.archive) else: archive = None - operations = AtticOperations(key, repository, manifest, archive) + operations = FuseOperations(key, repository, manifest, archive) self.print_verbose("Mounting filesystem") try: operations.mount(args.mountpoint, args.options, args.foreground) diff --git a/attic/fuse.py b/attic/fuse.py index e5fe46151..1e93eb951 100644 --- a/attic/fuse.py +++ b/attic/fuse.py @@ -34,7 +34,7 @@ class FuseOperations(llfuse.Operations): """Export archive as a fuse filesystem """ def __init__(self, key, repository, manifest, archive): - super(AtticOperations, self).__init__() + super(FuseOperations, self).__init__() self._inode_count = 0 self.key = key self.repository = cache_if_remote(repository) From 31fce54f671c20fa0f12fb8c0b1346e2e9e7d43f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 9 May 2015 22:31:21 +0200 Subject: [PATCH 089/241] rename cli level test method --- attic/testsuite/archiver.py | 250 ++++++++++++++++++------------------ 1 file changed, 125 insertions(+), 125 deletions(-) diff --git a/attic/testsuite/archiver.py b/attic/testsuite/archiver.py index d0625a843..85de28cd3 100644 --- a/attic/testsuite/archiver.py +++ b/attic/testsuite/archiver.py @@ -90,7 +90,7 @@ class ArchiverTestCaseBase(BaseTestCase): shutil.rmtree(self.tmpdir) os.chdir(self._old_wd) - def attic(self, *args, **kw): + def cmd(self, *args, **kw): exit_code = kw.get('exit_code', 0) fork = kw.get('fork', False) if fork: @@ -121,7 +121,7 @@ class ArchiverTestCaseBase(BaseTestCase): sys.stdin, sys.stdout, sys.stderr = stdin, stdout, stderr def create_src_archive(self, name): - self.attic('create', self.repository_location + '::' + name, src_dir) + self.cmd('create', self.repository_location + '::' + name, src_dir) class ArchiverTestCase(ArchiverTestCaseBase): @@ -179,19 +179,19 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_basic_functionality(self): self.create_test_files() - self.attic('init', self.repository_location) - self.attic('create', self.repository_location + '::test', 'input') - self.attic('create', self.repository_location + '::test.2', 'input') + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + self.cmd('create', self.repository_location + '::test.2', 'input') with changedir('output'): - self.attic('extract', self.repository_location + '::test') - self.assert_equal(len(self.attic('list', self.repository_location).splitlines()), 2) - self.assert_equal(len(self.attic('list', self.repository_location + '::test').splitlines()), 11) + self.cmd('extract', self.repository_location + '::test') + self.assert_equal(len(self.cmd('list', self.repository_location).splitlines()), 2) + self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), 11) self.assert_dirs_equal('input', 'output/input') - info_output = self.attic('info', self.repository_location + '::test') + info_output = self.cmd('info', self.repository_location + '::test') self.assert_in('Number of files: 4', info_output) shutil.rmtree(self.cache_path) with environment_variable(BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK='1'): - info_output2 = self.attic('info', self.repository_location + '::test') + info_output2 = self.cmd('info', self.repository_location + '::test') # info_output2 starts with some "initializing cache" text but should # end the same way as info_output assert info_output2.endswith(info_output) @@ -226,10 +226,10 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_equal(st.st_size, total_len) if sparse_support and hasattr(st, 'st_blocks'): self.assert_true(st.st_blocks * 512 < total_len / 10) # is input sparse? - self.attic('init', self.repository_location) - self.attic('create', self.repository_location + '::test', 'input') + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') with changedir('output'): - self.attic('extract', '--sparse', self.repository_location + '::test') + self.cmd('extract', '--sparse', self.repository_location + '::test') self.assert_dirs_equal('input', 'output/input') filename = os.path.join(self.output_path, 'input', 'sparse') with open(filename, 'rb') as fd: @@ -245,132 +245,132 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_repository_swap_detection(self): self.create_test_files() os.environ['BORG_PASSPHRASE'] = 'passphrase' - self.attic('init', '--encryption=passphrase', self.repository_location) + self.cmd('init', '--encryption=passphrase', self.repository_location) repository_id = self._extract_repository_id(self.repository_path) - self.attic('create', self.repository_location + '::test', 'input') + self.cmd('create', self.repository_location + '::test', 'input') shutil.rmtree(self.repository_path) - self.attic('init', '--encryption=none', self.repository_location) + self.cmd('init', '--encryption=none', self.repository_location) self._set_repository_id(self.repository_path, repository_id) self.assert_equal(repository_id, self._extract_repository_id(self.repository_path)) - self.assert_raises(Cache.EncryptionMethodMismatch, lambda :self.attic('create', self.repository_location + '::test.2', 'input')) + self.assert_raises(Cache.EncryptionMethodMismatch, lambda :self.cmd('create', self.repository_location + '::test.2', 'input')) def test_repository_swap_detection2(self): self.create_test_files() - self.attic('init', '--encryption=none', self.repository_location + '_unencrypted') + self.cmd('init', '--encryption=none', self.repository_location + '_unencrypted') os.environ['BORG_PASSPHRASE'] = 'passphrase' - self.attic('init', '--encryption=passphrase', self.repository_location + '_encrypted') - self.attic('create', self.repository_location + '_encrypted::test', 'input') + self.cmd('init', '--encryption=passphrase', self.repository_location + '_encrypted') + self.cmd('create', self.repository_location + '_encrypted::test', 'input') shutil.rmtree(self.repository_path + '_encrypted') os.rename(self.repository_path + '_unencrypted', self.repository_path + '_encrypted') - self.assert_raises(Cache.RepositoryAccessAborted, lambda :self.attic('create', self.repository_location + '_encrypted::test.2', 'input')) + self.assert_raises(Cache.RepositoryAccessAborted, lambda :self.cmd('create', self.repository_location + '_encrypted::test.2', 'input')) def test_strip_components(self): - self.attic('init', self.repository_location) + self.cmd('init', self.repository_location) self.create_regular_file('dir/file') - self.attic('create', self.repository_location + '::test', 'input') + self.cmd('create', self.repository_location + '::test', 'input') with changedir('output'): - self.attic('extract', self.repository_location + '::test', '--strip-components', '3') + self.cmd('extract', self.repository_location + '::test', '--strip-components', '3') self.assert_true(not os.path.exists('file')) with self.assert_creates_file('file'): - self.attic('extract', self.repository_location + '::test', '--strip-components', '2') + self.cmd('extract', self.repository_location + '::test', '--strip-components', '2') with self.assert_creates_file('dir/file'): - self.attic('extract', self.repository_location + '::test', '--strip-components', '1') + self.cmd('extract', self.repository_location + '::test', '--strip-components', '1') with self.assert_creates_file('input/dir/file'): - self.attic('extract', self.repository_location + '::test', '--strip-components', '0') + self.cmd('extract', self.repository_location + '::test', '--strip-components', '0') def test_extract_include_exclude(self): - self.attic('init', self.repository_location) + self.cmd('init', self.repository_location) self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('file2', size=1024 * 80) self.create_regular_file('file3', size=1024 * 80) self.create_regular_file('file4', size=1024 * 80) - self.attic('create', '--exclude=input/file4', self.repository_location + '::test', 'input') + self.cmd('create', '--exclude=input/file4', self.repository_location + '::test', 'input') with changedir('output'): - self.attic('extract', self.repository_location + '::test', 'input/file1', ) + self.cmd('extract', self.repository_location + '::test', 'input/file1', ) self.assert_equal(sorted(os.listdir('output/input')), ['file1']) with changedir('output'): - self.attic('extract', '--exclude=input/file2', self.repository_location + '::test') + self.cmd('extract', '--exclude=input/file2', self.repository_location + '::test') self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file3']) with changedir('output'): - self.attic('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test') + self.cmd('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test') self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file3']) def test_exclude_caches(self): - self.attic('init', self.repository_location) + self.cmd('init', self.repository_location) self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('cache1/CACHEDIR.TAG', contents=b'Signature: 8a477f597d28d172789f06886806bc55 extra stuff') self.create_regular_file('cache2/CACHEDIR.TAG', contents=b'invalid signature') - self.attic('create', '--exclude-caches', self.repository_location + '::test', 'input') + self.cmd('create', '--exclude-caches', self.repository_location + '::test', 'input') with changedir('output'): - self.attic('extract', self.repository_location + '::test') + self.cmd('extract', self.repository_location + '::test') self.assert_equal(sorted(os.listdir('output/input')), ['cache2', 'file1']) self.assert_equal(sorted(os.listdir('output/input/cache2')), ['CACHEDIR.TAG']) def test_path_normalization(self): - self.attic('init', self.repository_location) + self.cmd('init', self.repository_location) self.create_regular_file('dir1/dir2/file', size=1024 * 80) with changedir('input/dir1/dir2'): - self.attic('create', self.repository_location + '::test', '../../../input/dir1/../dir1/dir2/..') - output = self.attic('list', self.repository_location + '::test') + self.cmd('create', self.repository_location + '::test', '../../../input/dir1/../dir1/dir2/..') + output = self.cmd('list', self.repository_location + '::test') self.assert_not_in('..', output) self.assert_in(' input/dir1/dir2/file', output) def test_exclude_normalization(self): - self.attic('init', self.repository_location) + self.cmd('init', self.repository_location) self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('file2', size=1024 * 80) with changedir('input'): - self.attic('create', '--exclude=file1', self.repository_location + '::test1', '.') + self.cmd('create', '--exclude=file1', self.repository_location + '::test1', '.') with changedir('output'): - self.attic('extract', self.repository_location + '::test1') + self.cmd('extract', self.repository_location + '::test1') self.assert_equal(sorted(os.listdir('output')), ['file2']) with changedir('input'): - self.attic('create', '--exclude=./file1', self.repository_location + '::test2', '.') + self.cmd('create', '--exclude=./file1', self.repository_location + '::test2', '.') with changedir('output'): - self.attic('extract', self.repository_location + '::test2') + self.cmd('extract', self.repository_location + '::test2') self.assert_equal(sorted(os.listdir('output')), ['file2']) - self.attic('create', '--exclude=input/./file1', self.repository_location + '::test3', 'input') + self.cmd('create', '--exclude=input/./file1', self.repository_location + '::test3', 'input') with changedir('output'): - self.attic('extract', self.repository_location + '::test3') + self.cmd('extract', self.repository_location + '::test3') self.assert_equal(sorted(os.listdir('output/input')), ['file2']) def test_repeated_files(self): self.create_regular_file('file1', size=1024 * 80) - self.attic('init', self.repository_location) - self.attic('create', self.repository_location + '::test', 'input', 'input') + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input', 'input') def test_overwrite(self): self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('dir2/file2', size=1024 * 80) - self.attic('init', self.repository_location) - self.attic('create', self.repository_location + '::test', 'input') + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') # Overwriting regular files and directories should be supported os.mkdir('output/input') os.mkdir('output/input/file1') os.mkdir('output/input/dir2') with changedir('output'): - self.attic('extract', self.repository_location + '::test') + self.cmd('extract', self.repository_location + '::test') self.assert_dirs_equal('input', 'output/input') # But non-empty dirs should fail os.unlink('output/input/file1') os.mkdir('output/input/file1') os.mkdir('output/input/file1/dir') with changedir('output'): - self.attic('extract', self.repository_location + '::test', exit_code=1) + self.cmd('extract', self.repository_location + '::test', exit_code=1) def test_rename(self): self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('dir2/file2', size=1024 * 80) - self.attic('init', self.repository_location) - self.attic('create', self.repository_location + '::test', 'input') - self.attic('create', self.repository_location + '::test.2', 'input') - self.attic('extract', '--dry-run', self.repository_location + '::test') - self.attic('extract', '--dry-run', self.repository_location + '::test.2') - self.attic('rename', self.repository_location + '::test', 'test.3') - self.attic('extract', '--dry-run', self.repository_location + '::test.2') - self.attic('rename', self.repository_location + '::test.2', 'test.4') - self.attic('extract', '--dry-run', self.repository_location + '::test.3') - self.attic('extract', '--dry-run', self.repository_location + '::test.4') + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + self.cmd('create', self.repository_location + '::test.2', 'input') + self.cmd('extract', '--dry-run', self.repository_location + '::test') + self.cmd('extract', '--dry-run', self.repository_location + '::test.2') + self.cmd('rename', self.repository_location + '::test', 'test.3') + self.cmd('extract', '--dry-run', self.repository_location + '::test.2') + self.cmd('rename', self.repository_location + '::test.2', 'test.4') + self.cmd('extract', '--dry-run', self.repository_location + '::test.3') + self.cmd('extract', '--dry-run', self.repository_location + '::test.4') # Make sure both archives have been renamed repository = Repository(self.repository_path) manifest, key = Manifest.load(repository) @@ -381,77 +381,77 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_delete(self): self.create_regular_file('file1', size=1024 * 80) self.create_regular_file('dir2/file2', size=1024 * 80) - self.attic('init', self.repository_location) - self.attic('create', self.repository_location + '::test', 'input') - self.attic('create', self.repository_location + '::test.2', 'input') - self.attic('extract', '--dry-run', self.repository_location + '::test') - self.attic('extract', '--dry-run', self.repository_location + '::test.2') - self.attic('delete', self.repository_location + '::test') - self.attic('extract', '--dry-run', self.repository_location + '::test.2') - self.attic('delete', self.repository_location + '::test.2') + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + self.cmd('create', self.repository_location + '::test.2', 'input') + self.cmd('extract', '--dry-run', self.repository_location + '::test') + self.cmd('extract', '--dry-run', self.repository_location + '::test.2') + self.cmd('delete', self.repository_location + '::test') + self.cmd('extract', '--dry-run', self.repository_location + '::test.2') + self.cmd('delete', self.repository_location + '::test.2') # Make sure all data except the manifest has been deleted repository = Repository(self.repository_path) self.assert_equal(len(repository), 1) def test_corrupted_repository(self): - self.attic('init', self.repository_location) + self.cmd('init', self.repository_location) self.create_src_archive('test') - self.attic('extract', '--dry-run', self.repository_location + '::test') - self.attic('check', self.repository_location) + self.cmd('extract', '--dry-run', self.repository_location + '::test') + self.cmd('check', self.repository_location) name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0] with open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+') as fd: fd.seek(100) fd.write('XXXX') - self.attic('check', self.repository_location, exit_code=1) + self.cmd('check', self.repository_location, exit_code=1) def test_readonly_repository(self): - self.attic('init', self.repository_location) + self.cmd('init', self.repository_location) self.create_src_archive('test') os.system('chmod -R ugo-w ' + self.repository_path) try: - self.attic('extract', '--dry-run', self.repository_location + '::test') + self.cmd('extract', '--dry-run', self.repository_location + '::test') finally: # Restore permissions so shutil.rmtree is able to delete it os.system('chmod -R u+w ' + self.repository_path) def test_cmdline_compatibility(self): self.create_regular_file('file1', size=1024 * 80) - self.attic('init', self.repository_location) - self.attic('create', self.repository_location + '::test', 'input') - output = self.attic('verify', '-v', self.repository_location + '::test') + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + output = self.cmd('verify', '-v', self.repository_location + '::test') self.assert_in('"borg verify" has been deprecated', output) - output = self.attic('prune', self.repository_location, '--hourly=1') + output = self.cmd('prune', self.repository_location, '--hourly=1') self.assert_in('"--hourly" has been deprecated. Use "--keep-hourly" instead', output) def test_prune_repository(self): - self.attic('init', self.repository_location) - self.attic('create', self.repository_location + '::test1', src_dir) - self.attic('create', self.repository_location + '::test2', src_dir) - output = self.attic('prune', '-v', '--dry-run', self.repository_location, '--keep-daily=2') + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test1', src_dir) + self.cmd('create', self.repository_location + '::test2', src_dir) + output = self.cmd('prune', '-v', '--dry-run', self.repository_location, '--keep-daily=2') self.assert_in('Keeping archive: test2', output) self.assert_in('Would prune: test1', output) - output = self.attic('list', self.repository_location) + output = self.cmd('list', self.repository_location) self.assert_in('test1', output) self.assert_in('test2', output) - self.attic('prune', self.repository_location, '--keep-daily=2') - output = self.attic('list', self.repository_location) + self.cmd('prune', self.repository_location, '--keep-daily=2') + output = self.cmd('list', self.repository_location) self.assert_not_in('test1', output) self.assert_in('test2', output) def test_usage(self): - self.assert_raises(SystemExit, lambda: self.attic()) - self.assert_raises(SystemExit, lambda: self.attic('-h')) + self.assert_raises(SystemExit, lambda: self.cmd()) + self.assert_raises(SystemExit, lambda: self.cmd('-h')) @unittest.skipUnless(has_llfuse, 'llfuse not installed') def test_fuse_mount_repository(self): mountpoint = os.path.join(self.tmpdir, 'mountpoint') os.mkdir(mountpoint) - self.attic('init', self.repository_location) + self.cmd('init', self.repository_location) self.create_test_files() - self.attic('create', self.repository_location + '::archive', 'input') - self.attic('create', self.repository_location + '::archive2', 'input') + self.cmd('create', self.repository_location + '::archive', 'input') + self.cmd('create', self.repository_location + '::archive2', 'input') try: - self.attic('mount', self.repository_location, mountpoint, fork=True) + self.cmd('mount', self.repository_location, mountpoint, fork=True) self.wait_for_mount(mountpoint) self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive', 'input')) self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive2', 'input')) @@ -468,11 +468,11 @@ class ArchiverTestCase(ArchiverTestCaseBase): def test_fuse_mount_archive(self): mountpoint = os.path.join(self.tmpdir, 'mountpoint') os.mkdir(mountpoint) - self.attic('init', self.repository_location) + self.cmd('init', self.repository_location) self.create_test_files() - self.attic('create', self.repository_location + '::archive', 'input') + self.cmd('create', self.repository_location + '::archive', 'input') try: - self.attic('mount', self.repository_location + '::archive', mountpoint, fork=True) + self.cmd('mount', self.repository_location + '::archive', mountpoint, fork=True) self.wait_for_mount(mountpoint) self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input')) finally: @@ -503,13 +503,13 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.create_test_files() os.environ['BORG_PASSPHRASE'] = 'passphrase' - self.attic('init', '--encryption=' + method, self.repository_location) + self.cmd('init', '--encryption=' + method, self.repository_location) verify_uniqueness() - self.attic('create', self.repository_location + '::test', 'input') + self.cmd('create', self.repository_location + '::test', 'input') verify_uniqueness() - self.attic('create', self.repository_location + '::test.2', 'input') + self.cmd('create', self.repository_location + '::test.2', 'input') verify_uniqueness() - self.attic('delete', self.repository_location + '::test.2') + self.cmd('delete', self.repository_location + '::test.2') verify_uniqueness() self.assert_equal(used, set(range(len(used)))) @@ -525,7 +525,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): def setUp(self): super(ArchiverCheckTestCase, self).setUp() with patch.object(ChunkBuffer, 'BUFFER_SIZE', 10): - self.attic('init', self.repository_location) + self.cmd('init', self.repository_location) self.create_src_archive('archive1') self.create_src_archive('archive2') @@ -536,13 +536,13 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): return archive, repository def test_check_usage(self): - output = self.attic('check', self.repository_location, exit_code=0) + output = self.cmd('check', self.repository_location, exit_code=0) self.assert_in('Starting repository check', output) self.assert_in('Starting archive consistency check', output) - output = self.attic('check', '--repository-only', self.repository_location, exit_code=0) + output = self.cmd('check', '--repository-only', self.repository_location, exit_code=0) self.assert_in('Starting repository check', output) self.assert_not_in('Starting archive consistency check', output) - output = self.attic('check', '--archives-only', self.repository_location, exit_code=0) + output = self.cmd('check', '--archives-only', self.repository_location, exit_code=0) self.assert_not_in('Starting repository check', output) self.assert_in('Starting archive consistency check', output) @@ -553,58 +553,58 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): repository.delete(item[b'chunks'][-1][0]) break repository.commit() - self.attic('check', self.repository_location, exit_code=1) - self.attic('check', '--repair', self.repository_location, exit_code=0) - self.attic('check', self.repository_location, exit_code=0) + self.cmd('check', self.repository_location, exit_code=1) + self.cmd('check', '--repair', self.repository_location, exit_code=0) + self.cmd('check', self.repository_location, exit_code=0) def test_missing_archive_item_chunk(self): archive, repository = self.open_archive('archive1') repository.delete(archive.metadata[b'items'][-5]) repository.commit() - self.attic('check', self.repository_location, exit_code=1) - self.attic('check', '--repair', self.repository_location, exit_code=0) - self.attic('check', self.repository_location, exit_code=0) + self.cmd('check', self.repository_location, exit_code=1) + self.cmd('check', '--repair', self.repository_location, exit_code=0) + self.cmd('check', self.repository_location, exit_code=0) def test_missing_archive_metadata(self): archive, repository = self.open_archive('archive1') repository.delete(archive.id) repository.commit() - self.attic('check', self.repository_location, exit_code=1) - self.attic('check', '--repair', self.repository_location, exit_code=0) - self.attic('check', self.repository_location, exit_code=0) + self.cmd('check', self.repository_location, exit_code=1) + self.cmd('check', '--repair', self.repository_location, exit_code=0) + self.cmd('check', self.repository_location, exit_code=0) def test_missing_manifest(self): archive, repository = self.open_archive('archive1') repository.delete(Manifest.MANIFEST_ID) repository.commit() - self.attic('check', self.repository_location, exit_code=1) - output = self.attic('check', '--repair', self.repository_location, exit_code=0) + self.cmd('check', self.repository_location, exit_code=1) + output = self.cmd('check', '--repair', self.repository_location, exit_code=0) self.assert_in('archive1', output) self.assert_in('archive2', output) - self.attic('check', self.repository_location, exit_code=0) + self.cmd('check', self.repository_location, exit_code=0) def test_extra_chunks(self): - self.attic('check', self.repository_location, exit_code=0) + self.cmd('check', self.repository_location, exit_code=0) repository = Repository(self.repository_location) repository.put(b'01234567890123456789012345678901', b'xxxx') repository.commit() repository.close() - self.attic('check', self.repository_location, exit_code=1) - self.attic('check', self.repository_location, exit_code=1) - self.attic('check', '--repair', self.repository_location, exit_code=0) - self.attic('check', self.repository_location, exit_code=0) - self.attic('extract', '--dry-run', self.repository_location + '::archive1', exit_code=0) + self.cmd('check', self.repository_location, exit_code=1) + self.cmd('check', self.repository_location, exit_code=1) + self.cmd('check', '--repair', self.repository_location, exit_code=0) + self.cmd('check', self.repository_location, exit_code=0) + self.cmd('extract', '--dry-run', self.repository_location + '::archive1', exit_code=0) class RemoteArchiverTestCase(ArchiverTestCase): prefix = '__testsuite__:' def test_remote_repo_restrict_to_path(self): - self.attic('init', self.repository_location) + self.cmd('init', self.repository_location) path_prefix = os.path.dirname(self.repository_path) with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', '/foo']): - self.assert_raises(PathNotAllowed, lambda: self.attic('init', self.repository_location + '_1')) + self.assert_raises(PathNotAllowed, lambda: self.cmd('init', self.repository_location + '_1')) with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', path_prefix]): - self.attic('init', self.repository_location + '_2') + self.cmd('init', self.repository_location + '_2') with patch.object(RemoteRepository, 'extra_test_args', ['--restrict-to-path', '/foo', '--restrict-to-path', path_prefix]): - self.attic('init', self.repository_location + '_3') + self.cmd('init', self.repository_location + '_3') From a962c19a09e385419f163f4b9282963ca1d7b024 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 13 May 2015 18:45:25 +0200 Subject: [PATCH 090/241] renamed CHANGES-merge.txt to -borg --- CHANGES-merge.txt => CHANGES-borg.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename CHANGES-merge.txt => CHANGES-borg.txt (100%) diff --git a/CHANGES-merge.txt b/CHANGES-borg.txt similarity index 100% rename from CHANGES-merge.txt rename to CHANGES-borg.txt From 247677dd3d1ed0e8c3f005c63b499d6e5e13f935 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 13 May 2015 18:51:01 +0200 Subject: [PATCH 091/241] updated CHANGES-borg.txt --- CHANGES-borg.txt | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/CHANGES-borg.txt b/CHANGES-borg.txt index b9df3a2f6..80906931f 100644 --- a/CHANGES-borg.txt +++ b/CHANGES-borg.txt @@ -1,21 +1,17 @@ -Important note about "merge" branch -=================================== +Important note about Borg +========================= -Goal of the "merge" branch is to be conservative and only merge: -- bug fixes -- new features that do not impact compatibility -- docs updates -- etc. +BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC. +UNTIL FURTHER NOTICE, EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY. +THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF IF IT FITS YOUR NEEDS. -BUT: THERE IS NO GUARANTEE THAT IT IS ALWAYS COMPATIBLE WITH ORIGINAL ATTIC -nor THAT YOU CAN SWITCH BACK AND FORTH BETWEEN ORIGINAL CODE AND THIS CODE -WITHIN THE SAME REPOSITORY WITHOUT ENCOUNTERING ISSUES. +Read issue #1 on the issue tracker, goals are being defined there. Please also see the LICENSE for more informations. -Stuff in "merge" that is not in "master" minus minor changes -============================================================ +Stuff in Borg that is not in original Attic minus minor changes +=============================================================== give specific path to xattr.is_enabled(), disable symlink setattr call that always fails fix misleading hint the fuse ImportError handler gave, fixes #237 From c32f4b64b1743b27c5bcbd404c3f866df52c9264 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 14 May 2015 00:37:09 +0200 Subject: [PATCH 092/241] integrate CHANGES-borg.txt into CHANGES and README.rst --- CHANGES | 45 +++++++++++++++++++++++++++++++++++++++++++- CHANGES-borg.txt | 49 ------------------------------------------------ README.rst | 8 ++++++++ 3 files changed, 52 insertions(+), 50 deletions(-) delete mode 100644 CHANGES-borg.txt diff --git a/CHANGES b/CHANGES index 7b312a69a..e0effdf8d 100644 --- a/CHANGES +++ b/CHANGES @@ -1,7 +1,50 @@ +Borg Changelog +============== + +Version +------------- + +- give specific path to xattr.is_enabled(), disable symlink setattr call that + always fails +- fix misleading hint the fuse ImportError handler gave, fixes #237 +- source: misc. cleanups, pep8, style +- implement check --last N +- check: sort archives in reverse time order +- avoid defect python-msgpack releases, fixes #171, fixes #185 +- check unpacked data from RPC for tuple type and correct length, fixes #127 +- less memory usage: add global option --no-cache-files +- fix traceback when trying to do unsupported passphrase change, fixes #189 +- datetime does not like the year 10.000, fixes issue #139 +- docs improvements, fixes, updates +- cleanup crypto.pyx, make it easier to adapt to other modes +- extract: if --stdout is given, write all extracted binary data to stdout +- create: if "-" is given as path, read binary from stdin +- do os.fsync like recommended in the python docs +- extra debug information for 'fread failed' +- FUSE: reflect deduplication in allocated blocks +- only allow whitelisted RPC calls in server mode +- normalize source/exclude paths before matching +- fix "info" all archives stats, fixes #183 +- implement create --timestamp, utc string or reference file/dir +- simple sparse file support (extract --sparse) +- fix parsing with missing microseconds, fixes #282 +- use posix_fadvise to not spoil the OS cache, fixes #252 +- source: Let chunker optionally work with os-level file descriptor. +- source: Linux: remove duplicate os.fsencode calls +- fix test setup for 32bit platforms, partial fix for #196 +- source: refactor _open_rb code a bit, so it is more consistent / regular +- implement rename repo::oldname newname +- implement create --progress +- source: refactor indicator (status) and item processing +- implement delete (also deletes local cache) +- better create -v output + + Attic Changelog =============== -Here you can see the full list of changes between each Attic release. +Here you can see the full list of changes between each Attic release until Borg +forked from Attic: Version 0.16 ------------ diff --git a/CHANGES-borg.txt b/CHANGES-borg.txt deleted file mode 100644 index 80906931f..000000000 --- a/CHANGES-borg.txt +++ /dev/null @@ -1,49 +0,0 @@ -Important note about Borg -========================= - -BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC. -UNTIL FURTHER NOTICE, EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY. -THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF IF IT FITS YOUR NEEDS. - -Read issue #1 on the issue tracker, goals are being defined there. - -Please also see the LICENSE for more informations. - - -Stuff in Borg that is not in original Attic minus minor changes -=============================================================== - -give specific path to xattr.is_enabled(), disable symlink setattr call that always fails -fix misleading hint the fuse ImportError handler gave, fixes #237 -source: misc. cleanups, pep8, style -implement attic check --last N -check: sort archives in reverse time order -avoid defect python-msgpack releases, fixes #171, fixes #185 -check unpacked data from RPC for tuple type and correct length, fixes #127 -less memory usage: add global option --no-cache-files -fix traceback when trying to do unsupported passphrase change, fixes #189 -datetime does not like the year 10.000, fixes issue #139 -docs improvements, fixes, updates -cleanup crypto.pyx, make it easier to adapt to other modes -attic extract: if --stdout is given, write all extracted binary data to stdout -attic create: if "-" is given as path, read binary from stdin -do os.fsync like recommended in the python docs -extra debug information for 'fread failed' -FUSE: reflect deduplication in allocated blocks -only allow whitelisted RPC calls in server mode -normalize source/exclude paths before matching -fix "attic info" all archives stats, fixes #183 -implement attic create --timestamp, utc string or reference file/dir -simple sparse file support (attic extract --sparse) -fix parsing with missing microseconds, fixes #282 -use posix_fadvise to not spoil the OS cache, fixes #252 -source: Let chunker optionally work with os-level file descriptor. -source: Linux: remove duplicate os.fsencode calls -fix test setup for 32bit platforms, partial fix for #196 -source: refactor _open_rb code a bit, so it is more consistent / regular -implement attic rename repo::oldname newname -implement attic create --progress -source: refactor indicator (status) and item processing -implement attic delete (also deletes local cache) -better attic create -v output - diff --git a/README.rst b/README.rst index 07421f280..214b7fb04 100644 --- a/README.rst +++ b/README.rst @@ -9,6 +9,14 @@ are stored. Borg is a fork of Attic and maintained by "The Borg Collective" (see AUTHORS file). +BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC. +UNTIL FURTHER NOTICE, EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY. +THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF IF IT FITS YOUR NEEDS. + +Read issue #1 on the issue tracker, goals are being defined there. + +Please also see the LICENSE for more informations. + Easy to use ~~~~~~~~~~~ Initialize backup repository and create a backup archive:: From 673fda39c1b4ffc6a92ead193020d9edaaea0995 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 14 May 2015 01:25:18 +0200 Subject: [PATCH 093/241] upgrade versioneer to 0.14, makes PEP440 compliant version numbers now --- MANIFEST.in | 1 + attic/_version.py | 302 ++++++----- setup.py | 2 + versioneer.py | 1232 +++++++++++++++++++++++++++++---------------- 4 files changed, 985 insertions(+), 552 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 596768190..7584a33c2 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,3 +4,4 @@ recursive-include docs * recursive-exclude docs *.pyc recursive-exclude docs *.pyo prune docs/_build +include attic/_version.py diff --git a/attic/_version.py b/attic/_version.py index b8a6306a2..a7103c1a6 100644 --- a/attic/_version.py +++ b/attic/_version.py @@ -1,34 +1,53 @@ -IN_LONG_VERSION_PY = True # This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by github's download-from-tag -# feature). Distribution tarballs (build by setup.py sdist) and build +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. Generated by -# versioneer-0.7+ (https://github.com/warner/python-versioneer) +# versioneer-0.14 (https://github.com/warner/python-versioneer) + +import errno +import os +import re +import subprocess +import sys # these strings will be replaced by git during git-archive git_refnames = "$Format:%d$" git_full = "$Format:%H$" - -import subprocess +# these strings are filled in when 'setup.py versioneer' creates _version.py +tag_prefix = "" +parentdir_prefix = "borgbackup-" +versionfile_source = "attic/_version.py" -def run_command(args, cwd=None, verbose=False): - try: - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd) - except EnvironmentError: - e = sys.exc_info()[1] +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): + assert isinstance(commands, list) + p = None + for c in commands: + try: + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None)) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % args[0]) + print(e) + return None + else: if verbose: - print("unable to run %s" % args[0]) - print(e) + print("unable to find command, tried %s" % (commands,)) return None stdout = p.communicate()[0].strip() - if sys.version >= '3': + if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: @@ -37,165 +56,184 @@ def run_command(args, cwd=None, verbose=False): return stdout -import sys -import re -import os.path +def versions_from_parentdir(parentdir_prefix, root, verbose=False): + # Source tarballs conventionally unpack into a directory that includes + # both the project name and a version string. + dirname = os.path.basename(root) + if not dirname.startswith(parentdir_prefix): + if verbose: + print("guessing rootdir is '%s', but '%s' doesn't start with " + "prefix '%s'" % (root, dirname, parentdir_prefix)) + return None + return {"version": dirname[len(parentdir_prefix):], "full": ""} -def get_expanded_variables(versionfile_source): +def git_get_keywords(versionfile_abs): # the code embedded in _version.py can just fetch the value of these - # variables. When used from setup.py, we don't want to import - # _version.py, so we do it with a regexp instead. This function is not - # used from _version.py. - variables = {} + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} try: - for line in open(versionfile_source, "r").readlines(): + f = open(versionfile_abs, "r") + for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: - variables["refnames"] = mo.group(1) + keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: - variables["full"] = mo.group(1) + keywords["full"] = mo.group(1) + f.close() except EnvironmentError: pass - return variables + return keywords -def versions_from_expanded_variables(variables, tag_prefix, verbose=False): - refnames = variables["refnames"].strip() +def git_versions_from_keywords(keywords, tag_prefix, verbose=False): + if not keywords: + return {} # keyword-finding function failed to find keywords + refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: - print("variables are unexpanded, not using") + print("keywords are unexpanded, not using") return {} # unexpanded, so not in an unpacked git-archive tarball refs = set([r.strip() for r in refnames.strip("()").split(",")]) - for ref in list(refs): - if not re.search(r'\d', ref): - if verbose: - print("discarding '%s', no digits" % ref) - refs.discard(ref) - # Assume all version tags have a digit. git's %d expansion - # behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us - # distinguish between branches and tags. By ignoring refnames - # without digits, we filter out many common branch names like - # "release" and "stabilization", as well as "HEAD" and "master". + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r'\d', r)]) + if verbose: + print("discarding '%s', no digits" % ",".join(refs-tags)) if verbose: - print("remaining refs: %s" % ",".join(sorted(refs))) - for ref in sorted(refs): + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %s" % r) return {"version": r, - "full": variables["full"].strip()} - # no suitable tags, so we use the full revision id + "full": keywords["full"].strip()} + # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: - print("no suitable tags, using full revision id") - return {"version": variables["full"].strip(), - "full": variables["full"].strip()} + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full": keywords["full"].strip()} -def versions_from_vcs(tag_prefix, versionfile_source, verbose=False): - # this runs 'git' from the root of the source tree. That either means - # someone ran a setup.py command (and this code is in versioneer.py, so - # IN_LONG_VERSION_PY=False, thus the containing directory is the root of - # the source tree), or someone ran a project-specific entry point (and - # this code is in _version.py, so IN_LONG_VERSION_PY=True, thus the - # containing directory is somewhere deeper in the source tree). This only - # gets called if the git-archive 'subst' variables were *not* expanded, - # and _version.py hasn't already been rewritten with a short version - # string, meaning we're inside a checked out source tree. +def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False): + # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens. - try: - here = os.path.abspath(__file__) - except NameError: - # some py2exe/bbfreeze/non-CPython implementations don't do __file__ - return {} # not always correct + # dirty + dirty = git_describe.endswith("-dirty") + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + dirty_suffix = ".dirty" if dirty else "" + + # now we have TAG-NUM-gHEX or HEX + + if "-" not in git_describe: # just HEX + return "0+untagged.g"+git_describe+dirty_suffix, dirty + + # just TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + return "0+unparseable"+dirty_suffix, dirty + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + return None, dirty + tag = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + distance = int(mo.group(2)) + + # commit: short hex revision ID + commit = mo.group(3) + + # now build up version string, with post-release "local version + # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a + # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you + # can always test version.endswith(".dirty"). + version = tag + if distance or dirty: + version += "+%d.g%s" % (distance, commit) + dirty_suffix + + return version, dirty + + +def git_versions_from_vcs(tag_prefix, root, verbose=False): + # this runs 'git' from the root of the source tree. This only gets called + # if the git-archive 'subst' keywords were *not* expanded, and + # _version.py hasn't already been rewritten with a short version string, + # meaning we're inside a checked out source tree. - # versionfile_source is the relative path from the top of the source tree - # (where the .git directory might live) to this file. Invert this to find - # the root from __file__. - root = here - if IN_LONG_VERSION_PY: - for i in range(len(versionfile_source.split("/"))): - root = os.path.dirname(root) - else: - root = os.path.dirname(here) if not os.path.exists(os.path.join(root, ".git")): if verbose: print("no .git in %s" % root) - return {} + return {} # get_versions() will try next method - GIT = "git" + GITS = ["git"] if sys.platform == "win32": - GIT = "git.cmd" - stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"], + GITS = ["git.cmd", "git.exe"] + # if there is a tag, this yields TAG-NUM-gHEX[-dirty] + # if there are no tags, this yields HEX[-dirty] (no NUM) + stdout = run_command(GITS, ["describe", "--tags", "--dirty", + "--always", "--long"], cwd=root) + # --long was added in git-1.5.5 if stdout is None: - return {} - if not stdout.startswith(tag_prefix): - if verbose: - print("tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix)) - return {} - tag = stdout[len(tag_prefix):] - stdout = run_command([GIT, "rev-parse", "HEAD"], cwd=root) + return {} # try next method + version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose) + + # build "full", which is FULLHEX[.dirty] + stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if stdout is None: return {} full = stdout.strip() - if tag.endswith("-dirty"): - full += "-dirty" - return {"version": tag, "full": full} + if dirty: + full += ".dirty" + + return {"version": version, "full": full} -def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False): - if IN_LONG_VERSION_PY: - # We're running from _version.py. If it's from a source tree - # (execute-in-place), we can work upwards to find the root of the - # tree, and then check the parent directory for a version string. If - # it's in an installed application, there's no hope. - try: - here = os.path.abspath(__file__) - except NameError: - # py2exe/bbfreeze/non-CPython don't have __file__ - return {} # without __file__, we have no hope +def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False): + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + keywords = {"refnames": git_refnames, "full": git_full} + ver = git_versions_from_keywords(keywords, tag_prefix, verbose) + if ver: + return ver + + try: + root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source - # tree to _version.py. Invert this to find the root from __file__. - root = here - for i in range(len(versionfile_source.split("/"))): + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for i in versionfile_source.split('/'): root = os.path.dirname(root) - else: - # we're running from versioneer.py, which means we're running from - # the setup.py in a source tree. sys.argv[0] is setup.py in the root. - here = os.path.abspath(sys.argv[0]) - root = os.path.dirname(here) - - # Source tarballs conventionally unpack into a directory that includes - # both the project name and a version string. - dirname = os.path.basename(root) - if not dirname.startswith(parentdir_prefix): - if verbose: - print("guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" % - (root, dirname, parentdir_prefix)) - return None - return {"version": dirname[len(parentdir_prefix):], "full": ""} - -tag_prefix = "" -parentdir_prefix = "borgbackup-" -versionfile_source = "attic/_version.py" - - -def get_versions(default={"version": "unknown", "full": ""}, verbose=False): - variables = {"refnames": git_refnames, "full": git_full} - ver = versions_from_expanded_variables(variables, tag_prefix, verbose) - if not ver: - ver = versions_from_vcs(tag_prefix, versionfile_source, verbose) - if not ver: - ver = versions_from_parentdir(parentdir_prefix, versionfile_source, - verbose) - if not ver: - ver = default - return ver + except NameError: + return default + return (git_versions_from_vcs(tag_prefix, root, verbose) + or versions_from_parentdir(parentdir_prefix, root, verbose) + or default) diff --git a/setup.py b/setup.py index 1c6da086b..89ba31873 100644 --- a/setup.py +++ b/setup.py @@ -4,6 +4,8 @@ import sys from glob import glob import versioneer +versioneer.VCS = 'git' +versioneer.style = 'pep440' versioneer.versionfile_source = 'attic/_version.py' versioneer.versionfile_build = 'attic/_version.py' versioneer.tag_prefix = '' diff --git a/versioneer.py b/versioneer.py index 1614e46a8..c00770fe4 100644 --- a/versioneer.py +++ b/versioneer.py @@ -1,128 +1,387 @@ -"""versioneer.py -(like a rocketeer, but for versions) +# Version: 0.14 +""" +The Versioneer +============== + +* like a rocketeer, but for versions! * https://github.com/warner/python-versioneer * Brian Warner * License: Public Domain -* Version: 0.7+ +* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, and pypy +* [![Latest Version] +(https://pypip.in/version/versioneer/badge.svg?style=flat) +](https://pypi.python.org/pypi/versioneer/) +* [![Build Status] +(https://travis-ci.org/warner/python-versioneer.png?branch=master) +](https://travis-ci.org/warner/python-versioneer) -This file helps distutils-based projects manage their version number by just -creating version-control tags. +This is a tool for managing a recorded version number in distutils-based +python projects. The goal is to remove the tedious and error-prone "update +the embedded version string" step from your release process. Making a new +release should be as easy as recording a new tag in your version-control +system, and maybe making new tarballs. -For developers who work from a VCS-generated tree (e.g. 'git clone' etc), -each 'setup.py version', 'setup.py build', 'setup.py sdist' will compute a -version number by asking your version-control tool about the current -checkout. The version number will be written into a generated _version.py -file of your choosing, where it can be included by your __init__.py -For users who work from a VCS-generated tarball (e.g. 'git archive'), it will -compute a version number by looking at the name of the directory created when -te tarball is unpacked. This conventionally includes both the name of the -project and a version number. +## Quick Install -For users who work from a tarball built by 'setup.py sdist', it will get a -version number from a previously-generated _version.py file. +* `pip install versioneer` to somewhere to your $PATH +* run `versioneer-installer` in your source tree: this installs `versioneer.py` +* follow the instructions below (also in the `versioneer.py` docstring) -As a result, loading code directly from the source tree will not result in a -real version. If you want real versions from VCS trees (where you frequently -update from the upstream repository, or do new development), you will need to -do a 'setup.py version' after each update, and load code from the build/ -directory. +## Version Identifiers -You need to provide this code with a few configuration values: +Source trees come from a variety of places: - versionfile_source: - A project-relative pathname into which the generated version strings - should be written. This is usually a _version.py next to your project's - main __init__.py file. If your project uses src/myproject/__init__.py, - this should be 'src/myproject/_version.py'. This file should be checked - in to your VCS as usual: the copy created below by 'setup.py - update_files' will include code that parses expanded VCS keywords in - generated tarballs. The 'build' and 'sdist' commands will replace it with - a copy that has just the calculated version string. +* a version-control system checkout (mostly used by developers) +* a nightly tarball, produced by build automation +* a snapshot tarball, produced by a web-based VCS browser, like github's + "tarball from tag" feature +* a release tarball, produced by "setup.py sdist", distributed through PyPI - versionfile_build: - Like versionfile_source, but relative to the build directory instead of - the source directory. These will differ when your setup.py uses - 'package_dir='. If you have package_dir={'myproject': 'src/myproject'}, - then you will probably have versionfile_build='myproject/_version.py' and - versionfile_source='src/myproject/_version.py'. +Within each source tree, the version identifier (either a string or a number, +this tool is format-agnostic) can come from a variety of places: - tag_prefix: a string, like 'PROJECTNAME-', which appears at the start of all - VCS tags. If your tags look like 'myproject-1.2.0', then you - should use tag_prefix='myproject-'. If you use unprefixed tags - like '1.2.0', this should be an empty string. +* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows + about recent "tags" and an absolute revision-id +* the name of the directory into which the tarball was unpacked +* an expanded VCS keyword ($Id$, etc) +* a `_version.py` created by some earlier build step - parentdir_prefix: a string, frequently the same as tag_prefix, which - appears at the start of all unpacked tarball filenames. If - your tarball unpacks into 'myproject-1.2.0', this should - be 'myproject-'. +For released software, the version identifier is closely related to a VCS +tag. Some projects use tag names that include more than just the version +string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool +needs to strip the tag prefix to extract the version identifier. For +unreleased software (between tags), the version identifier should provide +enough information to help developers recreate the same tree, while also +giving them an idea of roughly how old the tree is (after version 1.2, before +version 1.3). Many VCS systems can report a description that captures this, +for example 'git describe --tags --dirty --always' reports things like +"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the +0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has +uncommitted changes. -To use it: +The version identifier is used for multiple purposes: + +* to allow the module to self-identify its version: `myproject.__version__` +* to choose a name and prefix for a 'setup.py sdist' tarball + +## Theory of Operation + +Versioneer works by adding a special `_version.py` file into your source +tree, where your `__init__.py` can import it. This `_version.py` knows how to +dynamically ask the VCS tool for version information at import time. However, +when you use "setup.py build" or "setup.py sdist", `_version.py` in the new +copy is replaced by a small static file that contains just the generated +version data. + +`_version.py` also contains `$Revision$` markers, and the installation +process marks `_version.py` to have this marker rewritten with a tag name +during the "git archive" command. As a result, generated tarballs will +contain enough information to get the proper version. + + +## Installation + +First, decide on values for the following configuration variables: + +* `VCS`: the version control system you use. Currently accepts "git". + +* `versionfile_source`: + + A project-relative pathname into which the generated version strings should + be written. This is usually a `_version.py` next to your project's main + `__init__.py` file, so it can be imported at runtime. If your project uses + `src/myproject/__init__.py`, this should be `src/myproject/_version.py`. + This file should be checked in to your VCS as usual: the copy created below + by `setup.py versioneer` will include code that parses expanded VCS + keywords in generated tarballs. The 'build' and 'sdist' commands will + replace it with a copy that has just the calculated version string. + + This must be set even if your project does not have any modules (and will + therefore never import `_version.py`), since "setup.py sdist" -based trees + still need somewhere to record the pre-calculated version strings. Anywhere + in the source tree should do. If there is a `__init__.py` next to your + `_version.py`, the `setup.py versioneer` command (described below) will + append some `__version__`-setting assignments, if they aren't already + present. + +* `versionfile_build`: + + Like `versionfile_source`, but relative to the build directory instead of + the source directory. These will differ when your setup.py uses + 'package_dir='. If you have `package_dir={'myproject': 'src/myproject'}`, + then you will probably have `versionfile_build='myproject/_version.py'` and + `versionfile_source='src/myproject/_version.py'`. + + If this is set to None, then `setup.py build` will not attempt to rewrite + any `_version.py` in the built tree. If your project does not have any + libraries (e.g. if it only builds a script), then you should use + `versionfile_build = None` and override `distutils.command.build_scripts` + to explicitly insert a copy of `versioneer.get_version()` into your + generated script. + +* `tag_prefix`: + + a string, like 'PROJECTNAME-', which appears at the start of all VCS tags. + If your tags look like 'myproject-1.2.0', then you should use + tag_prefix='myproject-'. If you use unprefixed tags like '1.2.0', this + should be an empty string. + +* `parentdir_prefix`: + + a string, frequently the same as tag_prefix, which appears at the start of + all unpacked tarball filenames. If your tarball unpacks into + 'myproject-1.2.0', this should be 'myproject-'. + +This tool provides one script, named `versioneer-installer`. That script does +one thing: write a copy of `versioneer.py` into the current directory. + +To versioneer-enable your project: + +* 1: Run `versioneer-installer` to copy `versioneer.py` into the top of your + source tree. + +* 2: add the following lines to the top of your `setup.py`, with the + configuration values you decided earlier: + + ```` + import versioneer + versioneer.VCS = 'git' + versioneer.versionfile_source = 'src/myproject/_version.py' + versioneer.versionfile_build = 'myproject/_version.py' + versioneer.tag_prefix = '' # tags are like 1.2.0 + versioneer.parentdir_prefix = 'myproject-' # dirname like 'myproject-1.2.0' + ```` + +* 3: add the following arguments to the setup() call in your setup.py: + + version=versioneer.get_version(), + cmdclass=versioneer.get_cmdclass(), + +* 4: now run `setup.py versioneer`, which will create `_version.py`, and will + modify your `__init__.py` (if one exists next to `_version.py`) to define + `__version__` (by calling a function from `_version.py`). It will also + modify your `MANIFEST.in` to include both `versioneer.py` and the generated + `_version.py` in sdist tarballs. + +* 5: commit these changes to your VCS. To make sure you won't forget, + `setup.py versioneer` will mark everything it touched for addition. + +## Post-Installation Usage + +Once established, all uses of your tree from a VCS checkout should get the +current version string. All generated tarballs should include an embedded +version string (so users who unpack them will not need a VCS tool installed). + +If you distribute your project through PyPI, then the release process should +boil down to two steps: + +* 1: git tag 1.0 +* 2: python setup.py register sdist upload + +If you distribute it through github (i.e. users use github to generate +tarballs with `git archive`), the process is: + +* 1: git tag 1.0 +* 2: git push; git push --tags + +Currently, all version strings must be based upon a tag. Versioneer will +report "unknown" until your tree has at least one tag in its history. This +restriction will be fixed eventually (see issue #12). + +## Version-String Flavors + +Code which uses Versioneer can learn about its version string at runtime by +importing `_version` from your main `__init__.py` file and running the +`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can +import the top-level `versioneer.py` and run `get_versions()`. + +Both functions return a dictionary with different keys for different flavors +of the version string: + +* `['version']`: A condensed PEP440-compliant string, equal to the + un-prefixed tag name for actual releases, and containing an additional + "local version" section with more detail for in-between builds. For Git, + this is TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe + --tags --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates + that the tree is like the "1076c97" commit but has uncommitted changes + (".dirty"), and that this commit is two revisions ("+2") beyond the "0.11" + tag. For released software (exactly equal to a known tag), the identifier + will only contain the stripped tag, e.g. "0.11". + +* `['full']`: detailed revision identifier. For Git, this is the full SHA1 + commit id, followed by ".dirty" if the tree contains uncommitted changes, + e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac.dirty". + +Some variants are more useful than others. Including `full` in a bug report +should allow developers to reconstruct the exact code being tested (or +indicate the presence of local changes that should be shared with the +developers). `version` is suitable for display in an "about" box or a CLI +`--version` output: it can be easily compared against release notes and lists +of bugs fixed in various releases. + +The `setup.py versioneer` command adds the following text to your +`__init__.py` to place a basic version in `YOURPROJECT.__version__`: + + from ._version import get_versions + __version__ = get_versions()['version'] + del get_versions + +## Updating Versioneer + +To upgrade your project to a new release of Versioneer, do the following: + +* install the new Versioneer (`pip install -U versioneer` or equivalent) +* re-run `versioneer-installer` in your source tree to replace your copy of + `versioneer.py` +* edit `setup.py`, if necessary, to include any new configuration settings + indicated by the release notes +* re-run `setup.py versioneer` to replace `SRC/_version.py` +* commit any changed files + +### Upgrading from 0.10 to 0.11 + +You must add a `versioneer.VCS = "git"` to your `setup.py` before re-running +`setup.py versioneer`. This will enable the use of additional version-control +systems (SVN, etc) in the future. + +### Upgrading from 0.11 to 0.12 + +Nothing special. + +## Upgrading to 0.14 + +0.14 changes the format of the version string. 0.13 and earlier used +hyphen-separated strings like "0.11-2-g1076c97-dirty". 0.14 and beyond use a +plus-separated "local version" section strings, with dot-separated +components, like "0.11+2.g1076c97". PEP440-strict tools did not like the old +format, but should be ok with the new one. + +## Future Directions + +This tool is designed to make it easily extended to other version-control +systems: all VCS-specific components are in separate directories like +src/git/ . The top-level `versioneer.py` script is assembled from these +components by running make-versioneer.py . In the future, make-versioneer.py +will take a VCS name as an argument, and will construct a version of +`versioneer.py` that is specific to the given VCS. It might also take the +configuration arguments that are currently provided manually during +installation by editing setup.py . Alternatively, it might go the other +direction and include code from all supported VCS systems, reducing the +number of intermediate scripts. + + +## License + +To make Versioneer easier to embed, all its code is hereby released into the +public domain. The `_version.py` that it creates is also in the public +domain. - 1: include this file in the top level of your project - 2: make the following changes to the top of your setup.py: - import versioneer - versioneer.versionfile_source = 'src/myproject/_version.py' - versioneer.versionfile_build = 'myproject/_version.py' - versioneer.tag_prefix = '' # tags are like 1.2.0 - versioneer.parentdir_prefix = 'myproject-' # dirname like 'myproject-1.2.0' - 3: add the following arguments to the setup() call in your setup.py: - version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), - 4: run 'setup.py update_files', which will create _version.py, and will - append the following to your __init__.py: - from _version import __version__ - 5: modify your MANIFEST.in to include versioneer.py - 6: add both versioneer.py and the generated _version.py to your VCS """ -import os, sys, re -from distutils.core import Command -from distutils.command.sdist import sdist as _sdist +import errno +import os +import re +import subprocess +import sys from distutils.command.build import build as _build +from distutils.command.sdist import sdist as _sdist +from distutils.core import Command +# these configuration settings will be overridden by setup.py after it +# imports us versionfile_source = None versionfile_build = None tag_prefix = None parentdir_prefix = None +VCS = None -VCS = "git" -IN_LONG_VERSION_PY = False +# these dictionaries contain VCS-specific tools +LONG_VERSION_PY = {} -LONG_VERSION_PY = ''' -IN_LONG_VERSION_PY = True +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): + assert isinstance(commands, list) + p = None + for c in commands: + try: + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None)) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % args[0]) + print(e) + return None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None + stdout = p.communicate()[0].strip() + if sys.version_info[0] >= 3: + stdout = stdout.decode() + if p.returncode != 0: + if verbose: + print("unable to run %s (error)" % args[0]) + return None + return stdout +LONG_VERSION_PY['git'] = ''' # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (build by setup.py sdist) and build +# feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. Generated by -# versioneer-0.7+ (https://github.com/warner/python-versioneer) +# versioneer-0.14 (https://github.com/warner/python-versioneer) + +import errno +import os +import re +import subprocess +import sys # these strings will be replaced by git during git-archive git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" +# these strings are filled in when 'setup.py versioneer' creates _version.py +tag_prefix = "%(TAG_PREFIX)s" +parentdir_prefix = "%(PARENTDIR_PREFIX)s" +versionfile_source = "%(VERSIONFILE_SOURCE)s" -import subprocess -import sys -def run_command(args, cwd=None, verbose=False): - try: - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd) - except EnvironmentError: - e = sys.exc_info()[1] +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): + assert isinstance(commands, list) + p = None + for c in commands: + try: + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None)) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %%s" %% args[0]) + print(e) + return None + else: if verbose: - print("unable to run %%s" %% args[0]) - print(e) + print("unable to find command, tried %%s" %% (commands,)) return None stdout = p.communicate()[0].strip() - if sys.version >= '3': + if sys.version_info[0] >= 3: stdout = stdout.decode() if p.returncode != 0: if verbose: @@ -131,341 +390,350 @@ def run_command(args, cwd=None, verbose=False): return stdout -import sys -import re -import os.path +def versions_from_parentdir(parentdir_prefix, root, verbose=False): + # Source tarballs conventionally unpack into a directory that includes + # both the project name and a version string. + dirname = os.path.basename(root) + if not dirname.startswith(parentdir_prefix): + if verbose: + print("guessing rootdir is '%%s', but '%%s' doesn't start with " + "prefix '%%s'" %% (root, dirname, parentdir_prefix)) + return None + return {"version": dirname[len(parentdir_prefix):], "full": ""} -def get_expanded_variables(versionfile_source): + +def git_get_keywords(versionfile_abs): # the code embedded in _version.py can just fetch the value of these - # variables. When used from setup.py, we don't want to import - # _version.py, so we do it with a regexp instead. This function is not - # used from _version.py. - variables = {} + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} try: - for line in open(versionfile_source,"r").readlines(): + f = open(versionfile_abs, "r") + for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: - variables["refnames"] = mo.group(1) + keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: - variables["full"] = mo.group(1) + keywords["full"] = mo.group(1) + f.close() except EnvironmentError: pass - return variables + return keywords -def versions_from_expanded_variables(variables, tag_prefix, verbose=False): - refnames = variables["refnames"].strip() + +def git_versions_from_keywords(keywords, tag_prefix, verbose=False): + if not keywords: + return {} # keyword-finding function failed to find keywords + refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: - print("variables are unexpanded, not using") - return {} # unexpanded, so not in an unpacked git-archive tarball + print("keywords are unexpanded, not using") + return {} # unexpanded, so not in an unpacked git-archive tarball refs = set([r.strip() for r in refnames.strip("()").split(",")]) - for ref in list(refs): - if not re.search(r'\d', ref): - if verbose: - print("discarding '%%s', no digits" %% ref) - refs.discard(ref) - # Assume all version tags have a digit. git's %%d expansion - # behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us - # distinguish between branches and tags. By ignoring refnames - # without digits, we filter out many common branch names like - # "release" and "stabilization", as well as "HEAD" and "master". + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %%d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r'\d', r)]) + if verbose: + print("discarding '%%s', no digits" %% ",".join(refs-tags)) if verbose: - print("remaining refs: %%s" %% ",".join(sorted(refs))) - for ref in sorted(refs): + print("likely tags: %%s" %% ",".join(sorted(tags))) + for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %%s" %% r) - return { "version": r, - "full": variables["full"].strip() } - # no suitable tags, so we use the full revision id + return {"version": r, + "full": keywords["full"].strip()} + # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: - print("no suitable tags, using full revision id") - return { "version": variables["full"].strip(), - "full": variables["full"].strip() } + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full": keywords["full"].strip()} -def versions_from_vcs(tag_prefix, versionfile_source, verbose=False): - # this runs 'git' from the root of the source tree. That either means - # someone ran a setup.py command (and this code is in versioneer.py, so - # IN_LONG_VERSION_PY=False, thus the containing directory is the root of - # the source tree), or someone ran a project-specific entry point (and - # this code is in _version.py, so IN_LONG_VERSION_PY=True, thus the - # containing directory is somewhere deeper in the source tree). This only - # gets called if the git-archive 'subst' variables were *not* expanded, - # and _version.py hasn't already been rewritten with a short version - # string, meaning we're inside a checked out source tree. - try: - here = os.path.abspath(__file__) - except NameError: - # some py2exe/bbfreeze/non-CPython implementations don't do __file__ - return {} # not always correct +def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False): + # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens. + + # dirty + dirty = git_describe.endswith("-dirty") + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + dirty_suffix = ".dirty" if dirty else "" + + # now we have TAG-NUM-gHEX or HEX + + if "-" not in git_describe: # just HEX + return "0+untagged.g"+git_describe+dirty_suffix, dirty + + # just TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + return "0+unparseable"+dirty_suffix, dirty + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%%s' doesn't start with prefix '%%s'" + print(fmt %% (full_tag, tag_prefix)) + return None, dirty + tag = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + distance = int(mo.group(2)) + + # commit: short hex revision ID + commit = mo.group(3) + + # now build up version string, with post-release "local version + # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a + # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you + # can always test version.endswith(".dirty"). + version = tag + if distance or dirty: + version += "+%%d.g%%s" %% (distance, commit) + dirty_suffix + + return version, dirty + + +def git_versions_from_vcs(tag_prefix, root, verbose=False): + # this runs 'git' from the root of the source tree. This only gets called + # if the git-archive 'subst' keywords were *not* expanded, and + # _version.py hasn't already been rewritten with a short version string, + # meaning we're inside a checked out source tree. - # versionfile_source is the relative path from the top of the source tree - # (where the .git directory might live) to this file. Invert this to find - # the root from __file__. - root = here - if IN_LONG_VERSION_PY: - for i in range(len(versionfile_source.split("/"))): - root = os.path.dirname(root) - else: - root = os.path.dirname(here) if not os.path.exists(os.path.join(root, ".git")): if verbose: print("no .git in %%s" %% root) - return {} + return {} # get_versions() will try next method - GIT = "git" + GITS = ["git"] if sys.platform == "win32": - GIT = "git.cmd" - stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"], + GITS = ["git.cmd", "git.exe"] + # if there is a tag, this yields TAG-NUM-gHEX[-dirty] + # if there are no tags, this yields HEX[-dirty] (no NUM) + stdout = run_command(GITS, ["describe", "--tags", "--dirty", + "--always", "--long"], cwd=root) + # --long was added in git-1.5.5 if stdout is None: - return {} - if not stdout.startswith(tag_prefix): - if verbose: - print("tag '%%s' doesn't start with prefix '%%s'" %% (stdout, tag_prefix)) - return {} - tag = stdout[len(tag_prefix):] - stdout = run_command([GIT, "rev-parse", "HEAD"], cwd=root) + return {} # try next method + version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose) + + # build "full", which is FULLHEX[.dirty] + stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if stdout is None: return {} full = stdout.strip() - if tag.endswith("-dirty"): - full += "-dirty" - return {"version": tag, "full": full} + if dirty: + full += ".dirty" + + return {"version": version, "full": full} -def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False): - if IN_LONG_VERSION_PY: - # We're running from _version.py. If it's from a source tree - # (execute-in-place), we can work upwards to find the root of the - # tree, and then check the parent directory for a version string. If - # it's in an installed application, there's no hope. - try: - here = os.path.abspath(__file__) - except NameError: - # py2exe/bbfreeze/non-CPython don't have __file__ - return {} # without __file__, we have no hope +def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False): + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + keywords = {"refnames": git_refnames, "full": git_full} + ver = git_versions_from_keywords(keywords, tag_prefix, verbose) + if ver: + return ver + + try: + root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source - # tree to _version.py. Invert this to find the root from __file__. - root = here - for i in range(len(versionfile_source.split("/"))): + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for i in versionfile_source.split('/'): root = os.path.dirname(root) - else: - # we're running from versioneer.py, which means we're running from - # the setup.py in a source tree. sys.argv[0] is setup.py in the root. - here = os.path.abspath(sys.argv[0]) - root = os.path.dirname(here) - - # Source tarballs conventionally unpack into a directory that includes - # both the project name and a version string. - dirname = os.path.basename(root) - if not dirname.startswith(parentdir_prefix): - if verbose: - print("guessing rootdir is '%%s', but '%%s' doesn't start with prefix '%%s'" %% - (root, dirname, parentdir_prefix)) - return None - return {"version": dirname[len(parentdir_prefix):], "full": ""} - -tag_prefix = "%(TAG_PREFIX)s" -parentdir_prefix = "%(PARENTDIR_PREFIX)s" -versionfile_source = "%(VERSIONFILE_SOURCE)s" - -def get_versions(default={"version": "unknown", "full": ""}, verbose=False): - variables = { "refnames": git_refnames, "full": git_full } - ver = versions_from_expanded_variables(variables, tag_prefix, verbose) - if not ver: - ver = versions_from_vcs(tag_prefix, versionfile_source, verbose) - if not ver: - ver = versions_from_parentdir(parentdir_prefix, versionfile_source, - verbose) - if not ver: - ver = default - return ver + except NameError: + return default + return (git_versions_from_vcs(tag_prefix, root, verbose) + or versions_from_parentdir(parentdir_prefix, root, verbose) + or default) ''' -import subprocess -import sys - -def run_command(args, cwd=None, verbose=False): - try: - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd) - except EnvironmentError: - e = sys.exc_info()[1] - if verbose: - print("unable to run %s" % args[0]) - print(e) - return None - stdout = p.communicate()[0].strip() - if sys.version >= '3': - stdout = stdout.decode() - if p.returncode != 0: - if verbose: - print("unable to run %s (error)" % args[0]) - return None - return stdout - - -import sys -import re -import os.path - -def get_expanded_variables(versionfile_source): +def git_get_keywords(versionfile_abs): # the code embedded in _version.py can just fetch the value of these - # variables. When used from setup.py, we don't want to import - # _version.py, so we do it with a regexp instead. This function is not - # used from _version.py. - variables = {} + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} try: - for line in open(versionfile_source,"r").readlines(): + f = open(versionfile_abs, "r") + for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: - variables["refnames"] = mo.group(1) + keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: - variables["full"] = mo.group(1) + keywords["full"] = mo.group(1) + f.close() except EnvironmentError: pass - return variables + return keywords -def versions_from_expanded_variables(variables, tag_prefix, verbose=False): - refnames = variables["refnames"].strip() + +def git_versions_from_keywords(keywords, tag_prefix, verbose=False): + if not keywords: + return {} # keyword-finding function failed to find keywords + refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: - print("variables are unexpanded, not using") - return {} # unexpanded, so not in an unpacked git-archive tarball + print("keywords are unexpanded, not using") + return {} # unexpanded, so not in an unpacked git-archive tarball refs = set([r.strip() for r in refnames.strip("()").split(",")]) - for ref in list(refs): - if not re.search(r'\d', ref): - if verbose: - print("discarding '%s', no digits" % ref) - refs.discard(ref) - # Assume all version tags have a digit. git's %d expansion - # behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us - # distinguish between branches and tags. By ignoring refnames - # without digits, we filter out many common branch names like - # "release" and "stabilization", as well as "HEAD" and "master". + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r'\d', r)]) + if verbose: + print("discarding '%s', no digits" % ",".join(refs-tags)) if verbose: - print("remaining refs: %s" % ",".join(sorted(refs))) - for ref in sorted(refs): + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %s" % r) - return { "version": r, - "full": variables["full"].strip() } - # no suitable tags, so we use the full revision id + return {"version": r, + "full": keywords["full"].strip()} + # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: - print("no suitable tags, using full revision id") - return { "version": variables["full"].strip(), - "full": variables["full"].strip() } + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full": keywords["full"].strip()} -def versions_from_vcs(tag_prefix, versionfile_source, verbose=False): - # this runs 'git' from the root of the source tree. That either means - # someone ran a setup.py command (and this code is in versioneer.py, so - # IN_LONG_VERSION_PY=False, thus the containing directory is the root of - # the source tree), or someone ran a project-specific entry point (and - # this code is in _version.py, so IN_LONG_VERSION_PY=True, thus the - # containing directory is somewhere deeper in the source tree). This only - # gets called if the git-archive 'subst' variables were *not* expanded, - # and _version.py hasn't already been rewritten with a short version - # string, meaning we're inside a checked out source tree. - try: - here = os.path.abspath(__file__) - except NameError: - # some py2exe/bbfreeze/non-CPython implementations don't do __file__ - return {} # not always correct +def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False): + # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens. + + # dirty + dirty = git_describe.endswith("-dirty") + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + dirty_suffix = ".dirty" if dirty else "" + + # now we have TAG-NUM-gHEX or HEX + + if "-" not in git_describe: # just HEX + return "0+untagged.g"+git_describe+dirty_suffix, dirty + + # just TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + return "0+unparseable"+dirty_suffix, dirty + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + return None, dirty + tag = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + distance = int(mo.group(2)) + + # commit: short hex revision ID + commit = mo.group(3) + + # now build up version string, with post-release "local version + # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a + # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you + # can always test version.endswith(".dirty"). + version = tag + if distance or dirty: + version += "+%d.g%s" % (distance, commit) + dirty_suffix + + return version, dirty + + +def git_versions_from_vcs(tag_prefix, root, verbose=False): + # this runs 'git' from the root of the source tree. This only gets called + # if the git-archive 'subst' keywords were *not* expanded, and + # _version.py hasn't already been rewritten with a short version string, + # meaning we're inside a checked out source tree. - # versionfile_source is the relative path from the top of the source tree - # (where the .git directory might live) to this file. Invert this to find - # the root from __file__. - root = here - if IN_LONG_VERSION_PY: - for i in range(len(versionfile_source.split("/"))): - root = os.path.dirname(root) - else: - root = os.path.dirname(here) if not os.path.exists(os.path.join(root, ".git")): if verbose: print("no .git in %s" % root) - return {} + return {} # get_versions() will try next method - GIT = "git" + GITS = ["git"] if sys.platform == "win32": - GIT = "git.cmd" - stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"], + GITS = ["git.cmd", "git.exe"] + # if there is a tag, this yields TAG-NUM-gHEX[-dirty] + # if there are no tags, this yields HEX[-dirty] (no NUM) + stdout = run_command(GITS, ["describe", "--tags", "--dirty", + "--always", "--long"], cwd=root) + # --long was added in git-1.5.5 if stdout is None: - return {} - if not stdout.startswith(tag_prefix): - if verbose: - print("tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix)) - return {} - tag = stdout[len(tag_prefix):] - stdout = run_command([GIT, "rev-parse", "HEAD"], cwd=root) + return {} # try next method + version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose) + + # build "full", which is FULLHEX[.dirty] + stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if stdout is None: return {} full = stdout.strip() - if tag.endswith("-dirty"): - full += "-dirty" - return {"version": tag, "full": full} + if dirty: + full += ".dirty" + + return {"version": version, "full": full} -def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False): - if IN_LONG_VERSION_PY: - # We're running from _version.py. If it's from a source tree - # (execute-in-place), we can work upwards to find the root of the - # tree, and then check the parent directory for a version string. If - # it's in an installed application, there's no hope. - try: - here = os.path.abspath(__file__) - except NameError: - # py2exe/bbfreeze/non-CPython don't have __file__ - return {} # without __file__, we have no hope - # versionfile_source is the relative path from the top of the source - # tree to _version.py. Invert this to find the root from __file__. - root = here - for i in range(len(versionfile_source.split("/"))): - root = os.path.dirname(root) - else: - # we're running from versioneer.py, which means we're running from - # the setup.py in a source tree. sys.argv[0] is setup.py in the root. - here = os.path.abspath(sys.argv[0]) - root = os.path.dirname(here) - - # Source tarballs conventionally unpack into a directory that includes - # both the project name and a version string. - dirname = os.path.basename(root) - if not dirname.startswith(parentdir_prefix): - if verbose: - print("guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" % - (root, dirname, parentdir_prefix)) - return None - return {"version": dirname[len(parentdir_prefix):], "full": ""} - -import sys - -def do_vcs_install(versionfile_source, ipy): - GIT = "git" +def do_vcs_install(manifest_in, versionfile_source, ipy): + GITS = ["git"] if sys.platform == "win32": - GIT = "git.cmd" - run_command([GIT, "add", "versioneer.py"]) - run_command([GIT, "add", versionfile_source]) - run_command([GIT, "add", ipy]) + GITS = ["git.cmd", "git.exe"] + files = [manifest_in, versionfile_source] + if ipy: + files.append(ipy) + try: + me = __file__ + if me.endswith(".pyc") or me.endswith(".pyo"): + me = os.path.splitext(me)[0] + ".py" + versioneer_file = os.path.relpath(me) + except NameError: + versioneer_file = "versioneer.py" + files.append(versioneer_file) present = False try: f = open(".gitattributes", "r") @@ -475,16 +743,28 @@ def do_vcs_install(versionfile_source, ipy): present = True f.close() except EnvironmentError: - pass + pass if not present: f = open(".gitattributes", "a+") f.write("%s export-subst\n" % versionfile_source) f.close() - run_command([GIT, "add", ".gitattributes"]) - + files.append(".gitattributes") + run_command(GITS, ["add", "--"] + files) + + +def versions_from_parentdir(parentdir_prefix, root, verbose=False): + # Source tarballs conventionally unpack into a directory that includes + # both the project name and a version string. + dirname = os.path.basename(root) + if not dirname.startswith(parentdir_prefix): + if verbose: + print("guessing rootdir is '%s', but '%s' doesn't start with " + "prefix '%s'" % (root, dirname, parentdir_prefix)) + return None + return {"version": dirname[len(parentdir_prefix):], "full": ""} SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.7+) from +# This file was generated by 'versioneer.py' (0.14) from # revision-control system data, or from the parent directory name of an # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. @@ -496,99 +776,159 @@ def get_versions(default={}, verbose=False): """ -DEFAULT = {"version": "unknown", "full": "unknown"} +DEFAULT = {"version": "0+unknown", "full": "unknown"} + def versions_from_file(filename): versions = {} try: - f = open(filename) + with open(filename) as f: + for line in f.readlines(): + mo = re.match("version_version = '([^']+)'", line) + if mo: + versions["version"] = mo.group(1) + mo = re.match("version_full = '([^']+)'", line) + if mo: + versions["full"] = mo.group(1) except EnvironmentError: - return versions - for line in f.readlines(): - mo = re.match("version_version = '([^']+)'", line) - if mo: - versions["version"] = mo.group(1) - mo = re.match("version_full = '([^']+)'", line) - if mo: - versions["full"] = mo.group(1) + return {} + return versions + def write_to_version_file(filename, versions): - f = open(filename, "w") - f.write(SHORT_VERSION_PY % versions) - f.close() + with open(filename, "w") as f: + f.write(SHORT_VERSION_PY % versions) + print("set %s to '%s'" % (filename, versions["version"])) -def get_best_versions(versionfile, tag_prefix, parentdir_prefix, - default=DEFAULT, verbose=False): - # returns dict with two keys: 'version' and 'full' - # - # extract version from first of _version.py, 'git describe', parentdir. - # This is meant to work for developers using a source checkout, for users - # of a tarball created by 'setup.py sdist', and for users of a - # tarball/zipball created by 'git archive' or github's download-from-tag - # feature. +def get_root(): + try: + return os.path.dirname(os.path.abspath(__file__)) + except NameError: + return os.path.dirname(os.path.abspath(sys.argv[0])) - variables = get_expanded_variables(versionfile_source) - if variables: - ver = versions_from_expanded_variables(variables, tag_prefix) - if ver: - if verbose: print("got version from expanded variable %s" % ver) - return ver - ver = versions_from_file(versionfile) - if ver: - if verbose: print("got version from file %s %s" % (versionfile, ver)) - return ver +def vcs_function(vcs, suffix): + return getattr(sys.modules[__name__], '%s_%s' % (vcs, suffix), None) - ver = versions_from_vcs(tag_prefix, versionfile_source, verbose) - if ver: - if verbose: print("got version from git %s" % ver) - return ver - - ver = versions_from_parentdir(parentdir_prefix, versionfile_source, verbose) - if ver: - if verbose: print("got version from parentdir %s" % ver) - return ver - - if verbose: print("got version from default %s" % ver) - return default def get_versions(default=DEFAULT, verbose=False): - assert versionfile_source is not None, "please set versioneer.versionfile_source" + # returns dict with two keys: 'version' and 'full' + assert versionfile_source is not None, \ + "please set versioneer.versionfile_source" assert tag_prefix is not None, "please set versioneer.tag_prefix" - assert parentdir_prefix is not None, "please set versioneer.parentdir_prefix" - return get_best_versions(versionfile_source, tag_prefix, parentdir_prefix, - default=default, verbose=verbose) + assert parentdir_prefix is not None, \ + "please set versioneer.parentdir_prefix" + assert VCS is not None, "please set versioneer.VCS" + + # I am in versioneer.py, which must live at the top of the source tree, + # which we use to compute the root directory. py2exe/bbfreeze/non-CPython + # don't have __file__, in which case we fall back to sys.argv[0] (which + # ought to be the setup.py script). We prefer __file__ since that's more + # robust in cases where setup.py was invoked in some weird way (e.g. pip) + root = get_root() + versionfile_abs = os.path.join(root, versionfile_source) + + # extract version from first of _version.py, VCS command (e.g. 'git + # describe'), parentdir. This is meant to work for developers using a + # source checkout, for users of a tarball created by 'setup.py sdist', + # and for users of a tarball/zipball created by 'git archive' or github's + # download-from-tag feature or the equivalent in other VCSes. + + get_keywords_f = vcs_function(VCS, "get_keywords") + versions_from_keywords_f = vcs_function(VCS, "versions_from_keywords") + if get_keywords_f and versions_from_keywords_f: + vcs_keywords = get_keywords_f(versionfile_abs) + ver = versions_from_keywords_f(vcs_keywords, tag_prefix) + if ver: + if verbose: + print("got version from expanded keyword %s" % ver) + return ver + + ver = versions_from_file(versionfile_abs) + if ver: + if verbose: + print("got version from file %s %s" % (versionfile_abs, ver)) + return ver + + versions_from_vcs_f = vcs_function(VCS, "versions_from_vcs") + if versions_from_vcs_f: + ver = versions_from_vcs_f(tag_prefix, root, verbose) + if ver: + if verbose: + print("got version from VCS %s" % ver) + return ver + + ver = versions_from_parentdir(parentdir_prefix, root, verbose) + if ver: + if verbose: + print("got version from parentdir %s" % ver) + return ver + + if verbose: + print("got version from default %s" % default) + return default + + def get_version(verbose=False): return get_versions(verbose=verbose)["version"] + class cmd_version(Command): description = "report generated version string" user_options = [] boolean_options = [] + def initialize_options(self): pass + def finalize_options(self): pass + def run(self): ver = get_version(verbose=True) print("Version is currently: %s" % ver) class cmd_build(_build): - def run2(self): + def run(self): versions = get_versions(verbose=True) _build.run(self) # now locate _version.py in the new build/ directory and replace it # with an updated value - target_versionfile = os.path.join(self.build_lib, versionfile_build) - print("UPDATING %s" % target_versionfile) - os.unlink(target_versionfile) - f = open(target_versionfile, "w") - f.write(SHORT_VERSION_PY % versions) - f.close() + if versionfile_build: + target_versionfile = os.path.join(self.build_lib, + versionfile_build) + print("UPDATING %s" % target_versionfile) + os.unlink(target_versionfile) + with open(target_versionfile, "w") as f: + f.write(SHORT_VERSION_PY % versions) + +if 'cx_Freeze' in sys.modules: # cx_freeze enabled? + from cx_Freeze.dist import build_exe as _build_exe + + class cmd_build_exe(_build_exe): + def run(self): + versions = get_versions(verbose=True) + target_versionfile = versionfile_source + print("UPDATING %s" % target_versionfile) + os.unlink(target_versionfile) + with open(target_versionfile, "w") as f: + f.write(SHORT_VERSION_PY % versions) + + _build_exe.run(self) + os.unlink(target_versionfile) + with open(versionfile_source, "w") as f: + assert VCS is not None, "please set versioneer.VCS" + LONG = LONG_VERSION_PY[VCS] + f.write(LONG % {"DOLLAR": "$", + "TAG_PREFIX": tag_prefix, + "PARENTDIR_PREFIX": parentdir_prefix, + "VERSIONFILE_SOURCE": versionfile_source, + }) + class cmd_sdist(_sdist): def run(self): @@ -605,9 +945,8 @@ class cmd_sdist(_sdist): target_versionfile = os.path.join(base_dir, versionfile_source) print("UPDATING %s" % target_versionfile) os.unlink(target_versionfile) - f = open(target_versionfile, "w") - f.write(SHORT_VERSION_PY % self._versioneer_generated_versions) - f.close() + with open(target_versionfile, "w") as f: + f.write(SHORT_VERSION_PY % self._versioneer_generated_versions) INIT_PY_SNIPPET = """ from ._version import get_versions @@ -615,40 +954,93 @@ __version__ = get_versions()['version'] del get_versions """ + class cmd_update_files(Command): - description = "modify __init__.py and create _version.py" + description = ("install/upgrade Versioneer files: " + "__init__.py SRC/_version.py") user_options = [] boolean_options = [] + def initialize_options(self): pass + def finalize_options(self): pass + def run(self): - ipy = os.path.join(os.path.dirname(versionfile_source), "__init__.py") print(" creating %s" % versionfile_source) - f = open(versionfile_source, "w") - f.write(LONG_VERSION_PY % {"DOLLAR": "$", - "TAG_PREFIX": tag_prefix, - "PARENTDIR_PREFIX": parentdir_prefix, - "VERSIONFILE_SOURCE": versionfile_source, - }) - f.close() - try: - old = open(ipy, "r").read() - except EnvironmentError: - old = "" - if INIT_PY_SNIPPET not in old: - print(" appending to %s" % ipy) - f = open(ipy, "a") - f.write(INIT_PY_SNIPPET) - f.close() + with open(versionfile_source, "w") as f: + assert VCS is not None, "please set versioneer.VCS" + LONG = LONG_VERSION_PY[VCS] + f.write(LONG % {"DOLLAR": "$", + "TAG_PREFIX": tag_prefix, + "PARENTDIR_PREFIX": parentdir_prefix, + "VERSIONFILE_SOURCE": versionfile_source, + }) + + ipy = os.path.join(os.path.dirname(versionfile_source), "__init__.py") + if os.path.exists(ipy): + try: + with open(ipy, "r") as f: + old = f.read() + except EnvironmentError: + old = "" + if INIT_PY_SNIPPET not in old: + print(" appending to %s" % ipy) + with open(ipy, "a") as f: + f.write(INIT_PY_SNIPPET) + else: + print(" %s unmodified" % ipy) else: - print(" %s unmodified" % ipy) - do_vcs_install(versionfile_source, ipy) + print(" %s doesn't exist, ok" % ipy) + ipy = None + + # Make sure both the top-level "versioneer.py" and versionfile_source + # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so + # they'll be copied into source distributions. Pip won't be able to + # install the package without this. + manifest_in = os.path.join(get_root(), "MANIFEST.in") + simple_includes = set() + try: + with open(manifest_in, "r") as f: + for line in f: + if line.startswith("include "): + for include in line.split()[1:]: + simple_includes.add(include) + except EnvironmentError: + pass + # That doesn't cover everything MANIFEST.in can do + # (http://docs.python.org/2/distutils/sourcedist.html#commands), so + # it might give some false negatives. Appending redundant 'include' + # lines is safe, though. + if "versioneer.py" not in simple_includes: + print(" appending 'versioneer.py' to MANIFEST.in") + with open(manifest_in, "a") as f: + f.write("include versioneer.py\n") + else: + print(" 'versioneer.py' already in MANIFEST.in") + if versionfile_source not in simple_includes: + print(" appending versionfile_source ('%s') to MANIFEST.in" % + versionfile_source) + with open(manifest_in, "a") as f: + f.write("include %s\n" % versionfile_source) + else: + print(" versionfile_source already in MANIFEST.in") + + # Make VCS-specific changes. For git, this means creating/changing + # .gitattributes to mark _version.py for export-time keyword + # substitution. + do_vcs_install(manifest_in, versionfile_source, ipy) + def get_cmdclass(): - return {'version': cmd_version, - 'update_files': cmd_update_files, + cmds = {'version': cmd_version, + 'versioneer': cmd_update_files, 'build': cmd_build, 'sdist': cmd_sdist, } + if 'cx_Freeze' in sys.modules: # cx_freeze enabled? + cmds['build_exe'] = cmd_build_exe + del cmds['build'] + + return cmds From e939172c775754446cc35a290f85f6c789557c92 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 14 May 2015 02:03:42 +0200 Subject: [PATCH 094/241] prefix issues numbers referring to attic issue tracker with "attic #" --- CHANGES | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/CHANGES b/CHANGES index e0effdf8d..e2a1e31bd 100644 --- a/CHANGES +++ b/CHANGES @@ -6,15 +6,15 @@ Version - give specific path to xattr.is_enabled(), disable symlink setattr call that always fails -- fix misleading hint the fuse ImportError handler gave, fixes #237 +- fix misleading hint the fuse ImportError handler gave, fixes attic #237 - source: misc. cleanups, pep8, style - implement check --last N - check: sort archives in reverse time order -- avoid defect python-msgpack releases, fixes #171, fixes #185 -- check unpacked data from RPC for tuple type and correct length, fixes #127 +- avoid defect python-msgpack releases, fixes attic #171, fixes attic #185 +- check unpacked data from RPC for tuple type and correct length, fixes attic #127 - less memory usage: add global option --no-cache-files -- fix traceback when trying to do unsupported passphrase change, fixes #189 -- datetime does not like the year 10.000, fixes issue #139 +- fix traceback when trying to do unsupported passphrase change, fixes attic #189 +- datetime does not like the year 10.000, fixes attic #139 - docs improvements, fixes, updates - cleanup crypto.pyx, make it easier to adapt to other modes - extract: if --stdout is given, write all extracted binary data to stdout @@ -24,21 +24,21 @@ Version - FUSE: reflect deduplication in allocated blocks - only allow whitelisted RPC calls in server mode - normalize source/exclude paths before matching -- fix "info" all archives stats, fixes #183 +- fix "info" all archives stats, fixes attic #183 - implement create --timestamp, utc string or reference file/dir - simple sparse file support (extract --sparse) -- fix parsing with missing microseconds, fixes #282 -- use posix_fadvise to not spoil the OS cache, fixes #252 +- fix parsing with missing microseconds, fixes attic #282 +- use posix_fadvise to not spoil the OS cache, fixes attic #252 - source: Let chunker optionally work with os-level file descriptor. - source: Linux: remove duplicate os.fsencode calls -- fix test setup for 32bit platforms, partial fix for #196 +- fix test setup for 32bit platforms, partial fix for attic #196 - source: refactor _open_rb code a bit, so it is more consistent / regular - implement rename repo::oldname newname - implement create --progress - source: refactor indicator (status) and item processing - implement delete (also deletes local cache) - better create -v output - +- upgraded versioneer, fixes attic #257 Attic Changelog =============== From 3d5b1cfc230189f553c794f7bff73354c02cba7e Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 14 May 2015 02:12:36 +0200 Subject: [PATCH 095/241] updated CHANGES about the name change --- CHANGES | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGES b/CHANGES index e2a1e31bd..4fc68fbfc 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,10 @@ Borg Changelog Version ------------- +- changed sw name and cli command to "borg", updated docs +- package name and name in urls uses "borgbackup" to have less collisions +- changed repo / cache internal magic strings from ATTIC* to BORG*, + changed cache location to .cache/borg/ - give specific path to xattr.is_enabled(), disable symlink setattr call that always fails - fix misleading hint the fuse ImportError handler gave, fixes attic #237 @@ -40,6 +44,7 @@ Version - better create -v output - upgraded versioneer, fixes attic #257 + Attic Changelog =============== From 5f4e928eff6c8984b8ea48e7e2f45f9c799b8184 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 14 May 2015 11:44:58 +0200 Subject: [PATCH 096/241] add FreeBSD to platforms list in setup.py it already was present in the classifiers. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 89ba31873..1620be036 100644 --- a/setup.py +++ b/setup.py @@ -105,7 +105,7 @@ setup( description='Deduplicated, encrypted, authenticated and compressed backups', long_description=long_description, license='BSD', - platforms=['Linux', 'MacOS X'], + platforms=['Linux', 'MacOS X', 'FreeBSD', ], classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: Console', From 7fee5dbbbf58c9d4edff9940372090e008eee493 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 14 May 2015 12:04:38 +0200 Subject: [PATCH 097/241] add python version classifiers --- setup.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/setup.py b/setup.py index 1620be036..1e4686dba 100644 --- a/setup.py +++ b/setup.py @@ -115,6 +115,10 @@ setup( 'Operating System :: MacOS :: MacOS X', 'Operating System :: POSIX :: Linux', 'Programming Language :: Python', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.2', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', 'Topic :: Security :: Cryptography', 'Topic :: System :: Archiving :: Backup', ], From 4ce754178cdf4f79a96f3da702c51018e405100b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 14 May 2015 16:46:44 +0200 Subject: [PATCH 098/241] merged attic PR #286 - sigusr1 / sigint handler to print current file infos --- attic/archiver.py | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/attic/archiver.py b/attic/archiver.py index 78864be00..d0d1f6f92 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -3,8 +3,10 @@ from binascii import hexlify from datetime import datetime from operator import attrgetter import functools +import inspect import io import os +import signal import stat import sys import textwrap @@ -14,7 +16,7 @@ from attic.archive import Archive, ArchiveChecker from attic.repository import Repository from attic.cache import Cache from attic.key import key_creator -from attic.helpers import Error, location_validator, format_time, \ +from attic.helpers import Error, location_validator, format_time, format_file_size, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ @@ -807,11 +809,45 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") return args.func(args) +def sig_info_handler(signum, stack): + """search the stack for infos about the currently processed file and print them""" + for frame in inspect.getouterframes(stack): + func, loc = frame[3], frame[0].f_locals + if func in ('process_file', '_process', ): # attic create + path = loc['path'] + try: + pos = loc['fd'].tell() + total = loc['st'].st_size + except Exception: + pos, total = 0, 0 + print("{0} {1}/{2}".format(path, format_file_size(pos), format_file_size(total))) + break + if func in ('extract_item', ): # attic extract + path = loc['item'][b'path'] + try: + pos = loc['fd'].tell() + except Exception: + pos = 0 + print("{0} {1}/???".format(path, format_file_size(pos))) + break + + +def setup_signal_handlers(): + sigs = [] + if hasattr(signal, 'SIGUSR1'): + sigs.append(signal.SIGUSR1) # kill -USR1 pid + if hasattr(signal, 'SIGINFO'): + sigs.append(signal.SIGINFO) # kill -INFO pid (or ctrl-t) + for sig in sigs: + signal.signal(sig, sig_info_handler) + + def main(): # Make sure stdout and stderr have errors='replace') to avoid unicode # issues when print()-ing unicode file names sys.stdout = io.TextIOWrapper(sys.stdout.buffer, sys.stdout.encoding, 'replace', line_buffering=True) sys.stderr = io.TextIOWrapper(sys.stderr.buffer, sys.stderr.encoding, 'replace', line_buffering=True) + setup_signal_handlers() archiver = Archiver() try: exit_code = archiver.run(sys.argv[1:]) From 7cbff6925fa8eef68114cf1febc6fe149bda7622 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 14 May 2015 17:36:53 +0200 Subject: [PATCH 099/241] Documentation for 'attic serve' Thanks to @lfam (attic PR #277 )! Note: As I already had refactored a lot of these pathes you changed, it was easier to just cherry pick the hunks with the other changes and apply them manually. --- docs/quickstart.rst | 16 ++++++++++++---- docs/update_usage.sh | 2 +- docs/usage.rst | 12 ++++++++++++ 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/docs/quickstart.rst b/docs/quickstart.rst index a6f131c8d..366f3aacb 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -138,17 +138,25 @@ Remote repositories host is accessible using SSH. This is fastest and easiest when |project_name| is installed on the remote host, in which case the following syntax is used:: - $ borg init user@hostname:backup + $ borg init user@hostname:/mnt/backup or:: - $ borg init ssh://user@hostname:port/backup + $ borg init ssh://user@hostname:port//mnt/backup -If it is not possible to install |project_name| on the remote host, +Remote operations over SSH can be automated with SSH keys. You can restrict the +use of the SSH keypair by prepending a forced command to the SSH public key in +the remote server's authorized_keys file. Only the forced command will be run +when the key authenticates a connection. This example will start attic in server +mode, and limit the attic server to a specific filesystem path:: + + command="attic serve --restrict-to-path /mnt/backup" ssh-rsa AAAAB3[...] + +If it is not possible to install |project_name| on the remote host, it is still possible to use the remote host to store a repository by mounting the remote filesystem, for example, using sshfs:: - $ sshfs user@hostname:/path/to/folder /mnt + $ sshfs user@hostname:/mnt /mnt $ borg init /mnt/backup $ fusermount -u /mnt diff --git a/docs/update_usage.sh b/docs/update_usage.sh index 3089d6297..9e79f4e88 100755 --- a/docs/update_usage.sh +++ b/docs/update_usage.sh @@ -2,7 +2,7 @@ if [ ! -d usage ]; then mkdir usage fi -for cmd in change-passphrase check create delete extract info init list mount prune; do +for cmd in change-passphrase check create delete extract info init list mount prune serve; do FILENAME="usage/$cmd.rst.inc" LINE=`echo -n borg $cmd | tr 'a-z- ' '-'` echo -e ".. _borg_$cmd:\n" > $FILENAME diff --git a/docs/usage.rst b/docs/usage.rst index 2f391b043..bc5c011b9 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -168,3 +168,15 @@ Examples Enter same passphrase again: Key file "/home/USER/.borg/keys/mnt_backup" updated + +.. include:: usage/serve.rst.inc + +Examples +~~~~~~~~ +:: + + # Allow an SSH keypair to only run attic, and only have access to /mnt/backup. + # This will help to secure an automated remote backup system. + $ cat ~/.ssh/authorized_keys + command="attic serve --restrict-to-path /mnt/backup" ssh-rsa AAAAB3[...] + From c3370a5f076683a944a2cb574f264af48592d659 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 14 May 2015 18:48:14 +0200 Subject: [PATCH 100/241] improve docs about what is preserved and what not UNIX domain sockets: explain why not, see attic issue #259 Symlinks: say that they are backed up as is and not followed, replacement for attic PR #294 Sparse files: explain what the "simple" in simple sparse file support means. Plus some other explanations / mentions that were missing. --- docs/faq.rst | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index d6ffe99ea..1721d9531 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -25,20 +25,33 @@ Can I backup from multiple servers into a single repository? Borg will keep an exclusive lock on the repository while creating or deleting archives, which may make *simultaneous* backups fail. -Which file attributes are preserved? - The following attributes are preserved: - +Which file types, attributes, etc. are preserved? + * Directories + * Regular files + * Hardlinks (considering all files in the same archive) + * Symlinks (stored as symlink, the symlink is not followed) + * Character and block device files + * FIFOs ("named pipes") * Name * Contents - * Hardlinks and symlinks * Time of last modification (nanosecond precision with Python >= 3.3) * User ID of owner * Group ID of owner - * Unix Permission - * Extended attributes (xattrs) + * Unix Mode/Permissions (u/g/o permissions, suid, sgid, sticky) + * Extended Attributes (xattrs) * Access Control Lists (ACL_) on Linux, OS X and FreeBSD * BSD flags on OS X and FreeBSD +Which file types, attributes, etc. are *not* preserved? + * UNIX domain sockets (because it does not make sense - they are meaningless + without the running process that created them and the process needs to + recreate them in any case). So, don't panic if your backup misses a UDS! + * The precise on-disk representation of the holes in a sparse file. + Archive creation has no special support for sparse files, holes are + backed up up as (deduplicated and compressed) runs of zero bytes. + Archive extraction has optional support to extract all-zero chunks as + holes in a sparse file. + How can I specify the encryption passphrase programmatically? The encryption passphrase can be specified programmatically using the `BORG_PASSPHRASE` environment variable. This is convenient when setting up From f30d05fcd41da13c434efc39d7f97fb53bb47798 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 14 May 2015 20:47:08 +0200 Subject: [PATCH 101/241] docs: install docs, faq improvements, other minor changes --- docs/faq.rst | 11 ++++- docs/index.rst | 18 +++++--- docs/installation.rst | 97 +++++++++++++++++++++++++------------------ docs/quickstart.rst | 13 +++--- 4 files changed, 86 insertions(+), 53 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index 1721d9531..54bd47c1e 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -7,10 +7,13 @@ Frequently asked questions Which platforms are supported? Currently Linux, FreeBSD and MacOS X are supported. + You can try your luck on other POSIX-like systems, like Cygwin, + other BSDs, etc. but they are not officially supported. Can I backup VM disk images? Yes, the :ref:`deduplication ` technique used by |project_name| makes sure only the modified parts of the file are stored. + Also, we have optional simple sparse file support for extract. Can I backup from multiple servers into a single repository? Yes, but in order for the deduplication used by Borg to work, it @@ -62,7 +65,13 @@ How can I specify the encryption passphrase programmatically? When backing up to remote servers, is data encrypted before leaving the local machine, or do I have to trust that the remote server isn't malicious? Yes, everything is encrypted before leaving the local machine. -If a backup stops mid-way, does the already-backed-up data stay there? I.e. does Borg resume backups? +If a backup stops mid-way, does the already-backed-up data stay there? I.e. does |project_name| resume backups? Yes, during a backup a special checkpoint archive named ``.checkpoint`` is saved every 5 minutes containing all the data backed-up until that point. This means that at most 5 minutes worth of data needs to be retransmitted if a backup needs to be restarted. + +If it crashes with a UnicodeError, what can I do? + Check if your encoding is set correctly. For most POSIX-like systems, try:: + + export LANG=en_US.UTF-8 # or similar, important is correct charset + diff --git a/docs/index.rst b/docs/index.rst index 723ad3d7f..cca367317 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,10 +2,16 @@ Welcome to Borg ================ -|project_name| is a deduplicating backup program written in Python. +|project_name| is a deduplicating and compressing backup program. +Optionally, it also supports authenticated encryption. + The main goal of |project_name| is to provide an efficient and secure way to backup data. The data deduplication technique used makes |project_name| -suitable for daily backups since only the changes are stored. +suitable for daily backups since only the changes are stored. The authenticated +encryption makes it suitable for backups to not fully trusted targets. + +|project_name| is written in Python (with a little bit of Cython and C for +the speed critical parts). Easy to use @@ -52,9 +58,11 @@ User's Guide Getting help ============ -If you've found a bug or have a concrete feature request, you can add your bug -report or feature request directly to the project's `issue tracker`_. For more -general questions or discussions, a post to the mailing list is preferred. +If you've found a bug or have a concrete feature request, please create a new +ticket on the project's `issue tracker`_ (after checking whether someone else +already has reported the same thing). + +For more general questions or discussions, IRC or mailing list are preferred. IRC --- diff --git a/docs/installation.rst b/docs/installation.rst index c42c99473..ec47fe3ae 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -4,62 +4,79 @@ Installation ============ -|project_name| requires Python_ 3.2 or above to work. Even though Python 3 is -not the default Python version on most Linux distributions, it is usually -available as an optional install. +|project_name| requires: -Other dependencies: - -* `msgpack-python`_ >= 0.1.10 +* Python_ >= 3.2 * OpenSSL_ >= 1.0.0 * libacl_ +* some python dependencies, see install_requires in setup.py -The OpenSSL version bundled with Mac OS X and FreeBSD is most likey too old. -Newer versions are available from homebrew_ on OS X and from FreeBSD ports. +General notes +------------- +Even though Python 3 is not the default Python version on many systems, it is +usually available as an optional install. + +Virtualenv_ can be used to build and install |project_name| without affecting +the system Python or requiring root access. The llfuse_ python package is also required if you wish to mount an -archive as a FUSE filesystem. +archive as a FUSE filesystem. Only FUSE >= 2.8.0 can support llfuse. -Virtualenv_ can be used to build and install |project_name| -without affecting the system Python or requiring root access. +You only need Cython to compile the .pyx files to the respective .c files +when using |project_name| code from git. For |project_name| releases, the .c +files will be bundled. -Common compilation pre-requisites ---------------------------------- +Platform notes +-------------- +FreeBSD: You may need to get a recent enough OpenSSL version from FreeBSD ports. -The following Debian packages are generally necessary to compile -|project_name|, either through pip, the tarball or git:: +Mac OS X: You may need to get a recent enough OpenSSL version from homebrew_. - $ sudo apt-get install python3 python3-dev python3-msgpack python3-sphinx libssl-dev libacl1-dev +Mac OS X: A recent enough FUSE implementation might be unavailable. -Installing from PyPI using pip ------------------------------- -To install |project_name| system-wide:: +Debian / Ubuntu installation (from git) +--------------------------------------- +Note: this uses latest, unreleased development code from git. +While we try not to break master, there are no guarantees on anything. - $ sudo pip3 install borgbackup +Some of the steps detailled below might be useful also for non-git installs. -To install it in a user-specific account:: - - $ pip3 install --user borgbackup - -Then add ``$HOME/.library/bin`` to your ``$PATH``. - -Installing from source tarballs -------------------------------- .. parsed-literal:: - $ curl -O :targz_url:`Borg` - $ tar -xvzf |package_filename| - $ cd |package_dirname| - $ sudo python3 setup.py install + # Python 3.x (>= 3.2) + Headers, Py Package Installer + apt-get install python3 python3-dev python3-pip -Installing from git -------------------- -.. parsed-literal:: + # we need OpenSSL + Headers for Crypto + apt-get install libssl-dev openssl - $ git clone |git_url| - $ cd borg - $ sudo python3 setup.py install + # ACL support Headers + Library + apt-get install libacl1-dev libacl1 + + # if you do not have gcc / make / etc. yet + apt-get install build-essential + + # optional: lowlevel FUSE py binding - to mount backup archives + apt-get install python3-llfuse fuse + + # optional: for unit testing + apt-get install fakeroot + + # install virtualenv tool, create and activate a virtual env + apt-get install python-virtualenv + virtualenv --python=python3 borg-env + source borg-env/bin/activate # always do this before using! + + # install some dependencies into virtual env + pip install cython # to compile .pyx -> .c + pip install tox # optional, for running unit tests + pip install sphinx # optional, to build the docs + + # get |project_name| from github, install it + git clone |git_url| + cd borg + pip install -e . # in-place editable mode + + # optional: run all the tests, on all supported Python versions + fakeroot -u tox -Please note that when installing from git, Cython_ is required to generate some files that -are normally bundled with the release tarball. diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 366f3aacb..b4273167c 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -68,18 +68,17 @@ A step by step example Automating backups ------------------ -The following example script backs up ``/home`` and -``/var/www`` to a remote server. The script also uses the -:ref:`borg_prune` subcommand to maintain a certain number -of old archives:: +The following example script backs up ``/home`` and ``/var/www`` to a remote +server. The script also uses the :ref:`borg_prune` subcommand to maintain a +certain number of old archives:: #!/bin/sh REPOSITORY=username@remoteserver.com:backup # Backup all of /home and /var/www except a few # excluded directories - borg create --stats \ - $REPOSITORY::hostname-`date +%Y-%m-%d` \ + borg create --stats \ + $REPOSITORY::`hostname`-`date +%Y-%m-%d` \ /home \ /var/www \ --exclude /home/*/.cache \ @@ -103,7 +102,7 @@ When repository encryption is enabled all data is encrypted using 256-bit AES_ encryption and the integrity and authenticity is verified using `HMAC-SHA256`_. All data is encrypted before being written to the repository. This means that -an attacker that manages to compromise the host containing an encrypted +an attacker who manages to compromise the host containing an encrypted archive will not be able to access any of the data. |project_name| supports two different methods to derive the AES and HMAC keys. From ff0c264e3e1941ded9b455adc91288ab45f966f1 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 15 May 2015 01:07:04 +0200 Subject: [PATCH 102/241] fix small doc bug in prune examples, fix attic #306 --monthly is old/deprecated, --keep-monthly is new way. --- docs/usage.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index bc5c011b9..7ae76e6a2 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -109,11 +109,11 @@ Examples # Keep 7 end of day, 4 additional end of week archives, # and an end of month archive for every month: - $ borg prune /mnt/backup --keep-daily=7 --keep-weekly=4 --monthly=-1 + $ borg prune /mnt/backup --keep-daily=7 --keep-weekly=4 --keep-monthly=-1 # Keep all backups in the last 10 days, 4 additional end of week archives, # and an end of month archive for every month: - $ borg prune /mnt/backup --keep-within=10d --keep-weekly=4 --monthly=-1 + $ borg prune /mnt/backup --keep-within=10d --keep-weekly=4 --keep-monthly=-1 .. include:: usage/info.rst.inc From 37c923d4f054daa2c43aaf69815030e66fc8ae43 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 15 May 2015 01:50:45 +0200 Subject: [PATCH 103/241] misc. minor docs fixes --- docs/conf.py | 2 +- docs/faq.rst | 2 +- docs/index.rst | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index bd8b14bff..d7f29e69e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -120,7 +120,7 @@ html_theme_path = ['_themes'] # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. diff --git a/docs/faq.rst b/docs/faq.rst index 54bd47c1e..407d475c4 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -73,5 +73,5 @@ If a backup stops mid-way, does the already-backed-up data stay there? I.e. does If it crashes with a UnicodeError, what can I do? Check if your encoding is set correctly. For most POSIX-like systems, try:: - export LANG=en_US.UTF-8 # or similar, important is correct charset + export LANG=en_US.UTF-8 # or similar, important is correct charset diff --git a/docs/index.rst b/docs/index.rst index cca367317..db4f4928d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -11,7 +11,7 @@ suitable for daily backups since only the changes are stored. The authenticated encryption makes it suitable for backups to not fully trusted targets. |project_name| is written in Python (with a little bit of Cython and C for -the speed critical parts). +the performance critical parts). Easy to use From 56fd96589b8caa435ee1b0963329173e4993dfa3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 15 May 2015 18:35:14 +0200 Subject: [PATCH 104/241] docs: make clear that remote repos used via ssh need a remote borg --- docs/usage.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.rst b/docs/usage.rst index 7ae76e6a2..ad536a48f 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -24,7 +24,7 @@ Examples # Local repository $ borg init /mnt/backup - # Remote repository + # Remote repository (accesses a remote borg via ssh) $ borg init user@hostname:backup # Encrypted remote repository From afc2dab71e894cd0aa78bfd698acfd1aff5f595b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 15 May 2015 19:22:52 +0200 Subject: [PATCH 105/241] docs: more borg (or project_name) --- docs/quickstart.rst | 6 +++--- docs/usage.rst | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/quickstart.rst b/docs/quickstart.rst index b4273167c..d0881fb95 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -146,10 +146,10 @@ or:: Remote operations over SSH can be automated with SSH keys. You can restrict the use of the SSH keypair by prepending a forced command to the SSH public key in the remote server's authorized_keys file. Only the forced command will be run -when the key authenticates a connection. This example will start attic in server -mode, and limit the attic server to a specific filesystem path:: +when the key authenticates a connection. This example will start |project_name| in server +mode, and limit the |project_name| server to a specific filesystem path:: - command="attic serve --restrict-to-path /mnt/backup" ssh-rsa AAAAB3[...] + command="borg serve --restrict-to-path /mnt/backup" ssh-rsa AAAAB3[...] If it is not possible to install |project_name| on the remote host, it is still possible to use the remote host to store a repository by diff --git a/docs/usage.rst b/docs/usage.rst index ad536a48f..971a467b9 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -175,8 +175,8 @@ Examples ~~~~~~~~ :: - # Allow an SSH keypair to only run attic, and only have access to /mnt/backup. + # Allow an SSH keypair to only run |project_name|, and only have access to /mnt/backup. # This will help to secure an automated remote backup system. $ cat ~/.ssh/authorized_keys - command="attic serve --restrict-to-path /mnt/backup" ssh-rsa AAAAB3[...] + command="borg serve --restrict-to-path /mnt/backup" ssh-rsa AAAAB3[...] From 22acb80bf9ad8ce39f6bd9a99df6607075a6f379 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 21 May 2015 21:49:44 -0400 Subject: [PATCH 106/241] explain the reason behind the fork --- docs/faq.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/faq.rst b/docs/faq.rst index 407d475c4..dd69a4d7f 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -75,3 +75,9 @@ If it crashes with a UnicodeError, what can I do? export LANG=en_US.UTF-8 # or similar, important is correct charset +Why was borg forked from Attic? + borg was created in may 2015 in response to the difficulty of + getting new merge requests and larger changes incorporated into + attic. more details can be found in the `fairly long discussion + `_ in + attic issue queue that led to the fork. From 622e05bcee2b84a9027be95540a952f5989b70f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 21 May 2015 21:55:29 -0400 Subject: [PATCH 107/241] add more information about the objectives of borg --- docs/faq.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/faq.rst b/docs/faq.rst index dd69a4d7f..9354d84f7 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -81,3 +81,19 @@ Why was borg forked from Attic? attic. more details can be found in the `fairly long discussion `_ in attic issue queue that led to the fork. + + borg intends to be: + + * simple: + * as simple as possible, but no simpler + * do the right thing by default, but offer options + * open: + * welcome feature requests + * accept pull requests of good quality and coding style + * give feedback on PRs that can't be accepted "as is" + * discuss openly, don't work in the dark + * changing: + * do not break compatibility accidentally, without a good reason + or without warning + * borg is not backwards-compatible with attic + * major versions may not be compatible with older releases From 78bfc58b472703da5e21efcfb013dd6b0f1918be Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 22 May 2015 17:48:54 +0200 Subject: [PATCH 108/241] rename package directory to borg --- {attic => borg}/__init__.py | 0 {attic => borg}/_chunker.c | 0 {attic => borg}/_hashindex.c | 0 {attic => borg}/_version.py | 0 {attic => borg}/archive.py | 0 {attic => borg}/archiver.py | 0 {attic => borg}/cache.py | 0 {attic => borg}/chunker.pyx | 0 {attic => borg}/crypto.pyx | 0 {attic => borg}/fuse.py | 0 {attic => borg}/hashindex.pyx | 0 {attic => borg}/helpers.py | 0 {attic => borg}/key.py | 0 {attic => borg}/lrucache.py | 0 {attic => borg}/platform.py | 0 {attic => borg}/platform_darwin.pyx | 0 {attic => borg}/platform_freebsd.pyx | 0 {attic => borg}/platform_linux.pyx | 0 {attic => borg}/remote.py | 0 {attic => borg}/repository.py | 0 {attic => borg}/testsuite/__init__.py | 0 {attic => borg}/testsuite/archive.py | 0 {attic => borg}/testsuite/archiver.py | 0 {attic => borg}/testsuite/chunker.py | 0 {attic => borg}/testsuite/crypto.py | 0 {attic => borg}/testsuite/hashindex.py | 0 {attic => borg}/testsuite/helpers.py | 0 {attic => borg}/testsuite/key.py | 0 {attic => borg}/testsuite/lrucache.py | 0 {attic => borg}/testsuite/mock.py | 0 {attic => borg}/testsuite/platform.py | 0 {attic => borg}/testsuite/repository.py | 0 {attic => borg}/testsuite/run.py | 0 {attic => borg}/testsuite/xattr.py | 0 {attic => borg}/xattr.py | 0 35 files changed, 0 insertions(+), 0 deletions(-) rename {attic => borg}/__init__.py (100%) rename {attic => borg}/_chunker.c (100%) rename {attic => borg}/_hashindex.c (100%) rename {attic => borg}/_version.py (100%) rename {attic => borg}/archive.py (100%) rename {attic => borg}/archiver.py (100%) rename {attic => borg}/cache.py (100%) rename {attic => borg}/chunker.pyx (100%) rename {attic => borg}/crypto.pyx (100%) rename {attic => borg}/fuse.py (100%) rename {attic => borg}/hashindex.pyx (100%) rename {attic => borg}/helpers.py (100%) rename {attic => borg}/key.py (100%) rename {attic => borg}/lrucache.py (100%) rename {attic => borg}/platform.py (100%) rename {attic => borg}/platform_darwin.pyx (100%) rename {attic => borg}/platform_freebsd.pyx (100%) rename {attic => borg}/platform_linux.pyx (100%) rename {attic => borg}/remote.py (100%) rename {attic => borg}/repository.py (100%) rename {attic => borg}/testsuite/__init__.py (100%) rename {attic => borg}/testsuite/archive.py (100%) rename {attic => borg}/testsuite/archiver.py (100%) rename {attic => borg}/testsuite/chunker.py (100%) rename {attic => borg}/testsuite/crypto.py (100%) rename {attic => borg}/testsuite/hashindex.py (100%) rename {attic => borg}/testsuite/helpers.py (100%) rename {attic => borg}/testsuite/key.py (100%) rename {attic => borg}/testsuite/lrucache.py (100%) rename {attic => borg}/testsuite/mock.py (100%) rename {attic => borg}/testsuite/platform.py (100%) rename {attic => borg}/testsuite/repository.py (100%) rename {attic => borg}/testsuite/run.py (100%) rename {attic => borg}/testsuite/xattr.py (100%) rename {attic => borg}/xattr.py (100%) diff --git a/attic/__init__.py b/borg/__init__.py similarity index 100% rename from attic/__init__.py rename to borg/__init__.py diff --git a/attic/_chunker.c b/borg/_chunker.c similarity index 100% rename from attic/_chunker.c rename to borg/_chunker.c diff --git a/attic/_hashindex.c b/borg/_hashindex.c similarity index 100% rename from attic/_hashindex.c rename to borg/_hashindex.c diff --git a/attic/_version.py b/borg/_version.py similarity index 100% rename from attic/_version.py rename to borg/_version.py diff --git a/attic/archive.py b/borg/archive.py similarity index 100% rename from attic/archive.py rename to borg/archive.py diff --git a/attic/archiver.py b/borg/archiver.py similarity index 100% rename from attic/archiver.py rename to borg/archiver.py diff --git a/attic/cache.py b/borg/cache.py similarity index 100% rename from attic/cache.py rename to borg/cache.py diff --git a/attic/chunker.pyx b/borg/chunker.pyx similarity index 100% rename from attic/chunker.pyx rename to borg/chunker.pyx diff --git a/attic/crypto.pyx b/borg/crypto.pyx similarity index 100% rename from attic/crypto.pyx rename to borg/crypto.pyx diff --git a/attic/fuse.py b/borg/fuse.py similarity index 100% rename from attic/fuse.py rename to borg/fuse.py diff --git a/attic/hashindex.pyx b/borg/hashindex.pyx similarity index 100% rename from attic/hashindex.pyx rename to borg/hashindex.pyx diff --git a/attic/helpers.py b/borg/helpers.py similarity index 100% rename from attic/helpers.py rename to borg/helpers.py diff --git a/attic/key.py b/borg/key.py similarity index 100% rename from attic/key.py rename to borg/key.py diff --git a/attic/lrucache.py b/borg/lrucache.py similarity index 100% rename from attic/lrucache.py rename to borg/lrucache.py diff --git a/attic/platform.py b/borg/platform.py similarity index 100% rename from attic/platform.py rename to borg/platform.py diff --git a/attic/platform_darwin.pyx b/borg/platform_darwin.pyx similarity index 100% rename from attic/platform_darwin.pyx rename to borg/platform_darwin.pyx diff --git a/attic/platform_freebsd.pyx b/borg/platform_freebsd.pyx similarity index 100% rename from attic/platform_freebsd.pyx rename to borg/platform_freebsd.pyx diff --git a/attic/platform_linux.pyx b/borg/platform_linux.pyx similarity index 100% rename from attic/platform_linux.pyx rename to borg/platform_linux.pyx diff --git a/attic/remote.py b/borg/remote.py similarity index 100% rename from attic/remote.py rename to borg/remote.py diff --git a/attic/repository.py b/borg/repository.py similarity index 100% rename from attic/repository.py rename to borg/repository.py diff --git a/attic/testsuite/__init__.py b/borg/testsuite/__init__.py similarity index 100% rename from attic/testsuite/__init__.py rename to borg/testsuite/__init__.py diff --git a/attic/testsuite/archive.py b/borg/testsuite/archive.py similarity index 100% rename from attic/testsuite/archive.py rename to borg/testsuite/archive.py diff --git a/attic/testsuite/archiver.py b/borg/testsuite/archiver.py similarity index 100% rename from attic/testsuite/archiver.py rename to borg/testsuite/archiver.py diff --git a/attic/testsuite/chunker.py b/borg/testsuite/chunker.py similarity index 100% rename from attic/testsuite/chunker.py rename to borg/testsuite/chunker.py diff --git a/attic/testsuite/crypto.py b/borg/testsuite/crypto.py similarity index 100% rename from attic/testsuite/crypto.py rename to borg/testsuite/crypto.py diff --git a/attic/testsuite/hashindex.py b/borg/testsuite/hashindex.py similarity index 100% rename from attic/testsuite/hashindex.py rename to borg/testsuite/hashindex.py diff --git a/attic/testsuite/helpers.py b/borg/testsuite/helpers.py similarity index 100% rename from attic/testsuite/helpers.py rename to borg/testsuite/helpers.py diff --git a/attic/testsuite/key.py b/borg/testsuite/key.py similarity index 100% rename from attic/testsuite/key.py rename to borg/testsuite/key.py diff --git a/attic/testsuite/lrucache.py b/borg/testsuite/lrucache.py similarity index 100% rename from attic/testsuite/lrucache.py rename to borg/testsuite/lrucache.py diff --git a/attic/testsuite/mock.py b/borg/testsuite/mock.py similarity index 100% rename from attic/testsuite/mock.py rename to borg/testsuite/mock.py diff --git a/attic/testsuite/platform.py b/borg/testsuite/platform.py similarity index 100% rename from attic/testsuite/platform.py rename to borg/testsuite/platform.py diff --git a/attic/testsuite/repository.py b/borg/testsuite/repository.py similarity index 100% rename from attic/testsuite/repository.py rename to borg/testsuite/repository.py diff --git a/attic/testsuite/run.py b/borg/testsuite/run.py similarity index 100% rename from attic/testsuite/run.py rename to borg/testsuite/run.py diff --git a/attic/testsuite/xattr.py b/borg/testsuite/xattr.py similarity index 100% rename from attic/testsuite/xattr.py rename to borg/testsuite/xattr.py diff --git a/attic/xattr.py b/borg/xattr.py similarity index 100% rename from attic/xattr.py rename to borg/xattr.py From 5e98400a5aad0604d5265d8feb5e36ef9d616d58 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 22 May 2015 19:21:41 +0200 Subject: [PATCH 109/241] fix all references to package name use relative imports if possible reorder imports (1. stdlib 2. dependencies 3. borg 4. borg.testsuite) --- .gitattributes | 2 +- .travis.yml | 2 +- MANIFEST.in | 4 ++-- borg/_version.py | 2 +- borg/archive.py | 14 +++++------ borg/archiver.py | 20 ++++++++-------- borg/cache.py | 2 +- borg/fuse.py | 6 ++--- borg/helpers.py | 16 ++++++------- borg/key.py | 4 ++-- borg/platform.py | 6 ++--- borg/platform_darwin.pyx | 2 +- borg/platform_freebsd.pyx | 2 +- borg/platform_linux.pyx | 2 +- borg/remote.py | 4 ++-- borg/testsuite/__init__.py | 6 ++--- borg/testsuite/archive.py | 14 ++++++----- borg/testsuite/archiver.py | 23 +++++++++--------- borg/testsuite/chunker.py | 7 +++--- borg/testsuite/crypto.py | 5 ++-- borg/testsuite/hashindex.py | 5 ++-- borg/testsuite/helpers.py | 8 ++++--- borg/testsuite/key.py | 9 ++++---- borg/testsuite/lrucache.py | 4 ++-- borg/testsuite/platform.py | 5 ++-- borg/testsuite/repository.py | 13 ++++++----- borg/testsuite/run.py | 3 ++- borg/testsuite/xattr.py | 5 ++-- docs/conf.py | 7 +++--- scripts/borg | 2 +- setup.py | 45 +++++++++++++++++++++--------------- tox.ini | 2 +- 32 files changed, 136 insertions(+), 115 deletions(-) diff --git a/.gitattributes b/.gitattributes index fb24ad887..a97e72971 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1 @@ -attic/_version.py export-subst +borg/_version.py export-subst diff --git a/.travis.yml b/.travis.yml index 7e3471b11..367458353 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,4 +9,4 @@ install: - "pip install --use-mirrors Cython" - "pip install -e ." # command to run tests -script: fakeroot -u python -m attic.testsuite.run -vb +script: fakeroot -u python -m borg.testsuite.run -vb diff --git a/MANIFEST.in b/MANIFEST.in index 7584a33c2..9ac968909 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,7 @@ include README.rst LICENSE CHANGES MANIFEST.in versioneer.py -recursive-include attic *.pyx +recursive-include borg *.pyx recursive-include docs * recursive-exclude docs *.pyc recursive-exclude docs *.pyo prune docs/_build -include attic/_version.py +include borg/_version.py diff --git a/borg/_version.py b/borg/_version.py index a7103c1a6..7a94d800f 100644 --- a/borg/_version.py +++ b/borg/_version.py @@ -21,7 +21,7 @@ git_full = "$Format:%H$" # these strings are filled in when 'setup.py versioneer' creates _version.py tag_prefix = "" parentdir_prefix = "borgbackup-" -versionfile_source = "attic/_version.py" +versionfile_source = "borg/_version.py" def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): diff --git a/borg/archive.py b/borg/archive.py index bae659ea2..a2cc59b99 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -4,8 +4,8 @@ from itertools import groupby import errno import shutil import tempfile -from attic.key import key_factory -from attic.remote import cache_if_remote +from .key import key_factory +from .remote import cache_if_remote import msgpack import os import socket @@ -13,11 +13,11 @@ import stat import sys import time from io import BytesIO -from attic import xattr -from attic.platform import acl_get, acl_set -from attic.chunker import Chunker -from attic.hashindex import ChunkIndex -from attic.helpers import parse_timestamp, Error, uid2user, user2uid, gid2group, group2gid, \ +from . import xattr +from .platform import acl_get, acl_set +from .chunker import Chunker +from .hashindex import ChunkIndex +from .helpers import parse_timestamp, Error, uid2user, user2uid, gid2group, group2gid, \ Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe, StableDict, int_to_bigint, bigint_to_int ITEMS_BUFFER = 1024 * 1024 diff --git a/borg/archiver.py b/borg/archiver.py index b9295e87e..79bf65f03 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -12,17 +12,17 @@ import sys import textwrap import traceback -from attic import __version__ -from attic.archive import Archive, ArchiveChecker -from attic.repository import Repository -from attic.cache import Cache -from attic.key import key_creator -from attic.helpers import Error, location_validator, format_time, format_file_size, \ +from . import __version__ +from .archive import Archive, ArchiveChecker +from .repository import Repository +from .cache import Cache +from .key import key_creator +from .helpers import Error, location_validator, format_time, format_file_size, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ is_cachedir, bigint_to_int -from attic.remote import RepositoryServer, RemoteRepository +from .remote import RepositoryServer, RemoteRepository class Archiver: @@ -296,7 +296,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") def do_mount(self, args): """Mount archive or an entire repository as a FUSE fileystem""" try: - from attic.fuse import FuseOperations + from .fuse import FuseOperations except ImportError as e: self.print_error('loading fuse support failed [ImportError: %s]' % str(e)) return self.exit_code @@ -814,7 +814,7 @@ def sig_info_handler(signum, stack): """search the stack for infos about the currently processed file and print them""" for frame in inspect.getouterframes(stack): func, loc = frame[3], frame[0].f_locals - if func in ('process_file', '_process', ): # attic create + if func in ('process_file', '_process', ): # create op path = loc['path'] try: pos = loc['fd'].tell() @@ -823,7 +823,7 @@ def sig_info_handler(signum, stack): pos, total = 0, 0 print("{0} {1}/{2}".format(path, format_file_size(pos), format_file_size(total))) break - if func in ('extract_item', ): # attic extract + if func in ('extract_item', ): # extract op path = loc['item'][b'path'] try: pos = loc['fd'].tell() diff --git a/borg/cache.py b/borg/cache.py index 97890b52a..037a8e76b 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -1,5 +1,5 @@ from configparser import RawConfigParser -from attic.remote import cache_if_remote +from .remote import cache_if_remote import msgpack import os import sys diff --git a/borg/fuse.py b/borg/fuse.py index 1e93eb951..eb4b46ffd 100644 --- a/borg/fuse.py +++ b/borg/fuse.py @@ -7,9 +7,9 @@ import os import stat import tempfile import time -from attic.archive import Archive -from attic.helpers import daemonize -from attic.remote import cache_if_remote +from .archive import Archive +from .helpers import daemonize +from .remote import cache_if_remote # Does this version of llfuse support ns precision? have_fuse_mtime_ns = hasattr(llfuse.EntryAttributes, 'st_mtime_ns') diff --git a/borg/helpers.py b/borg/helpers.py index 0ba0739f9..f96c1bf52 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -12,9 +12,9 @@ from fnmatch import translate from operator import attrgetter import fcntl -import attic.hashindex -import attic.chunker -import attic.crypto +from . import hashindex +from . import chunker +from . import crypto class Error(Exception): @@ -71,11 +71,11 @@ class UpgradableLock: def check_extension_modules(): - import attic.platform - if (attic.hashindex.API_VERSION != 2 or - attic.chunker.API_VERSION != 2 or - attic.crypto.API_VERSION != 2 or - attic.platform.API_VERSION != 2): + from . import platform + if (hashindex.API_VERSION != 2 or + chunker.API_VERSION != 2 or + crypto.API_VERSION != 2 or + platform.API_VERSION != 2): raise ExtensionModuleError diff --git a/borg/key.py b/borg/key.py index f61798b9b..3e44b092e 100644 --- a/borg/key.py +++ b/borg/key.py @@ -7,8 +7,8 @@ import hmac from hashlib import sha256 import zlib -from attic.crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks -from attic.helpers import IntegrityError, get_keys_dir, Error +from .crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks +from .helpers import IntegrityError, get_keys_dir, Error PREFIX = b'\0' * 8 diff --git a/borg/platform.py b/borg/platform.py index 19f8072e6..caa3b4edc 100644 --- a/borg/platform.py +++ b/borg/platform.py @@ -1,11 +1,11 @@ import sys if sys.platform.startswith('linux'): - from attic.platform_linux import acl_get, acl_set, API_VERSION + from .platform_linux import acl_get, acl_set, API_VERSION elif sys.platform.startswith('freebsd'): - from attic.platform_freebsd import acl_get, acl_set, API_VERSION + from .platform_freebsd import acl_get, acl_set, API_VERSION elif sys.platform == 'darwin': - from attic.platform_darwin import acl_get, acl_set, API_VERSION + from .platform_darwin import acl_get, acl_set, API_VERSION else: API_VERSION = 2 diff --git a/borg/platform_darwin.pyx b/borg/platform_darwin.pyx index 37ed2d461..e44dc8ef2 100644 --- a/borg/platform_darwin.pyx +++ b/borg/platform_darwin.pyx @@ -1,5 +1,5 @@ import os -from attic.helpers import user2uid, group2gid +from .helpers import user2uid, group2gid API_VERSION = 2 diff --git a/borg/platform_freebsd.pyx b/borg/platform_freebsd.pyx index 074eebca1..43ae35edc 100644 --- a/borg/platform_freebsd.pyx +++ b/borg/platform_freebsd.pyx @@ -1,5 +1,5 @@ import os -from attic.helpers import posix_acl_use_stored_uid_gid +from .helpers import posix_acl_use_stored_uid_gid API_VERSION = 2 diff --git a/borg/platform_linux.pyx b/borg/platform_linux.pyx index 658d77a48..6144c76ff 100644 --- a/borg/platform_linux.pyx +++ b/borg/platform_linux.pyx @@ -1,7 +1,7 @@ import os import re from stat import S_ISLNK -from attic.helpers import posix_acl_use_stored_uid_gid, user2uid, group2gid +from .helpers import posix_acl_use_stored_uid_gid, user2uid, group2gid API_VERSION = 2 diff --git a/borg/remote.py b/borg/remote.py index b6b746d85..5d59e14ac 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -9,7 +9,7 @@ import sys import tempfile import traceback -from attic import __version__ +from . import __version__ from .hashindex import NSIndex from .helpers import Error, IntegrityError @@ -123,7 +123,7 @@ class RemoteRepository: self.unpacker = msgpack.Unpacker(use_list=False) self.p = None if location.host == '__testsuite__': - args = [sys.executable, '-m', 'attic.archiver', 'serve'] + self.extra_test_args + args = [sys.executable, '-m', 'borg.archiver', 'serve'] + self.extra_test_args else: args = ['ssh'] if location.port: diff --git a/borg/testsuite/__init__.py b/borg/testsuite/__init__.py index 421a0c329..fac3de9e2 100644 --- a/borg/testsuite/__init__.py +++ b/borg/testsuite/__init__.py @@ -6,8 +6,8 @@ import sys import sysconfig import time import unittest -from attic.helpers import st_mtime_ns -from attic.xattr import get_all +from ..helpers import st_mtime_ns +from ..xattr import get_all try: import llfuse @@ -113,7 +113,7 @@ class TestLoader(unittest.TestLoader): """ def loadTestsFromName(self, pattern, module=None): - suite = self.discover('attic.testsuite', '*.py') + suite = self.discover('borg.testsuite', '*.py') tests = unittest.TestSuite() for test in get_tests(suite): if pattern.lower() in test.id().lower(): diff --git a/borg/testsuite/archive.py b/borg/testsuite/archive.py index 1d9b7004d..abb5bccb9 100644 --- a/borg/testsuite/archive.py +++ b/borg/testsuite/archive.py @@ -1,11 +1,13 @@ -import msgpack -from attic.testsuite import BaseTestCase -from attic.testsuite.mock import Mock -from attic.archive import Archive, CacheChunkBuffer, RobustUnpacker -from attic.key import PlaintextKey -from attic.helpers import Manifest from datetime import datetime, timezone +import msgpack + +from ..archive import Archive, CacheChunkBuffer, RobustUnpacker +from ..key import PlaintextKey +from ..helpers import Manifest +from . import BaseTestCase +from .mock import Mock + class MockCache: diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 85de28cd3..11efefd3d 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -10,16 +10,17 @@ import tempfile import time import unittest from hashlib import sha256 -from attic import xattr -from attic.archive import Archive, ChunkBuffer, CHUNK_MAX -from attic.archiver import Archiver -from attic.cache import Cache -from attic.crypto import bytes_to_long, num_aes_blocks -from attic.helpers import Manifest -from attic.remote import RemoteRepository, PathNotAllowed -from attic.repository import Repository -from attic.testsuite import BaseTestCase -from attic.testsuite.mock import patch + +from .. import xattr +from ..archive import Archive, ChunkBuffer, CHUNK_MAX +from ..archiver import Archiver +from ..cache import Cache +from ..crypto import bytes_to_long, num_aes_blocks +from ..helpers import Manifest +from ..remote import RemoteRepository, PathNotAllowed +from ..repository import Repository +from . import BaseTestCase +from .mock import patch try: import llfuse @@ -95,7 +96,7 @@ class ArchiverTestCaseBase(BaseTestCase): fork = kw.get('fork', False) if fork: try: - output = subprocess.check_output((sys.executable, '-m', 'attic.archiver') + args) + output = subprocess.check_output((sys.executable, '-m', 'borg.archiver') + args) ret = 0 except subprocess.CalledProcessError as e: output = e.output diff --git a/borg/testsuite/chunker.py b/borg/testsuite/chunker.py index cb5bb55ec..982e4dd20 100644 --- a/borg/testsuite/chunker.py +++ b/borg/testsuite/chunker.py @@ -1,8 +1,9 @@ -from attic.chunker import Chunker, buzhash, buzhash_update -from attic.testsuite import BaseTestCase -from attic.archive import CHUNK_MAX from io import BytesIO +from ..chunker import Chunker, buzhash, buzhash_update +from ..archive import CHUNK_MAX +from . import BaseTestCase + class ChunkerTestCase(BaseTestCase): diff --git a/borg/testsuite/crypto.py b/borg/testsuite/crypto.py index 066ba1815..e438eb85e 100644 --- a/borg/testsuite/crypto.py +++ b/borg/testsuite/crypto.py @@ -1,6 +1,7 @@ from binascii import hexlify -from attic.testsuite import BaseTestCase -from attic.crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, pbkdf2_sha256, get_random_bytes + +from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, pbkdf2_sha256, get_random_bytes +from . import BaseTestCase class CryptoTestCase(BaseTestCase): diff --git a/borg/testsuite/hashindex.py b/borg/testsuite/hashindex.py index de2a3eaae..41c019d61 100644 --- a/borg/testsuite/hashindex.py +++ b/borg/testsuite/hashindex.py @@ -1,8 +1,9 @@ import hashlib import os import tempfile -from attic.hashindex import NSIndex, ChunkIndex -from attic.testsuite import BaseTestCase + +from ..hashindex import NSIndex, ChunkIndex +from . import BaseTestCase class HashIndexTestCase(BaseTestCase): diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 23c604f43..ac949ba7a 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -4,11 +4,13 @@ from datetime import datetime, timezone, timedelta import os import tempfile import unittest -from attic.helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, to_localtime, \ - StableDict, int_to_bigint, bigint_to_int, parse_timestamp -from attic.testsuite import BaseTestCase + import msgpack +from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, to_localtime, \ + StableDict, int_to_bigint, bigint_to_int, parse_timestamp +from . import BaseTestCase + class BigIntTestCase(BaseTestCase): diff --git a/borg/testsuite/key.py b/borg/testsuite/key.py index 32ac64974..2f234dd8a 100644 --- a/borg/testsuite/key.py +++ b/borg/testsuite/key.py @@ -3,10 +3,11 @@ import re import shutil import tempfile from binascii import hexlify -from attic.crypto import bytes_to_long, num_aes_blocks -from attic.testsuite import BaseTestCase -from attic.key import PlaintextKey, PassphraseKey, KeyfileKey -from attic.helpers import Location, unhexlify + +from ..crypto import bytes_to_long, num_aes_blocks +from ..key import PlaintextKey, PassphraseKey, KeyfileKey +from ..helpers import Location, unhexlify +from . import BaseTestCase class KeyTestCase(BaseTestCase): diff --git a/borg/testsuite/lrucache.py b/borg/testsuite/lrucache.py index 83d23cfc5..43ea5572e 100644 --- a/borg/testsuite/lrucache.py +++ b/borg/testsuite/lrucache.py @@ -1,5 +1,5 @@ -from attic.lrucache import LRUCache -from attic.testsuite import BaseTestCase +from ..lrucache import LRUCache +from . import BaseTestCase class LRUCacheTestCase(BaseTestCase): diff --git a/borg/testsuite/platform.py b/borg/testsuite/platform.py index bb3ab32b7..2a9ebff9e 100644 --- a/borg/testsuite/platform.py +++ b/borg/testsuite/platform.py @@ -3,8 +3,9 @@ import shutil import sys import tempfile import unittest -from attic.platform import acl_get, acl_set -from attic.testsuite import BaseTestCase + +from ..platform import acl_get, acl_set +from . import BaseTestCase ACCESS_ACL = """ diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 7be454e5b..9cc8d2427 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -1,12 +1,13 @@ import os import shutil import tempfile -from attic.testsuite.mock import patch -from attic.hashindex import NSIndex -from attic.helpers import Location, IntegrityError, UpgradableLock -from attic.remote import RemoteRepository, InvalidRPCMethod -from attic.repository import Repository -from attic.testsuite import BaseTestCase + +from ..hashindex import NSIndex +from ..helpers import Location, IntegrityError, UpgradableLock +from ..remote import RemoteRepository, InvalidRPCMethod +from ..repository import Repository +from . import BaseTestCase +from .mock import patch class RepositoryTestCaseBase(BaseTestCase): diff --git a/borg/testsuite/run.py b/borg/testsuite/run.py index 9231d3677..19d87699b 100644 --- a/borg/testsuite/run.py +++ b/borg/testsuite/run.py @@ -1,5 +1,6 @@ import unittest -from attic.testsuite import TestLoader + +from . import TestLoader def main(): diff --git a/borg/testsuite/xattr.py b/borg/testsuite/xattr.py index 952b70d80..d73856953 100644 --- a/borg/testsuite/xattr.py +++ b/borg/testsuite/xattr.py @@ -1,8 +1,9 @@ import os import tempfile import unittest -from attic.testsuite import BaseTestCase -from attic.xattr import is_enabled, getxattr, setxattr, listxattr + +from ..xattr import is_enabled, getxattr, setxattr, listxattr +from . import BaseTestCase @unittest.skipUnless(is_enabled(), 'xattr not enabled on filesystem') diff --git a/docs/conf.py b/docs/conf.py index d7f29e69e..d51da9207 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,4 @@ - # -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- # # documentation build configuration file, created by # sphinx-quickstart on Sat Sep 10 18:18:25 2011. @@ -11,7 +11,8 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os, attic +import sys, os +from borg import __version__ as sw_version # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -48,7 +49,7 @@ copyright = '2010-2014, Jonas Borgström' # built documents. # # The short X.Y version. -version = attic.__version__.split('-')[0] +version = sw_version.split('-')[0] # The full version, including alpha/beta/rc tags. release = version diff --git a/scripts/borg b/scripts/borg index 01ace036e..9fe6f7b5a 100644 --- a/scripts/borg +++ b/scripts/borg @@ -1,4 +1,4 @@ #!/usr/bin/env python -from attic.archiver import main +from borg.archiver import main main() diff --git a/setup.py b/setup.py index 1e4686dba..d5d548b3d 100644 --- a/setup.py +++ b/setup.py @@ -6,8 +6,8 @@ from glob import glob import versioneer versioneer.VCS = 'git' versioneer.style = 'pep440' -versioneer.versionfile_source = 'attic/_version.py' -versioneer.versionfile_build = 'attic/_version.py' +versioneer.versionfile_source = 'borg/_version.py' +versioneer.versionfile_build = 'borg/_version.py' versioneer.tag_prefix = '' versioneer.parentdir_prefix = 'borgbackup-' # dirname like 'myproject-1.2.0' @@ -21,12 +21,12 @@ try: except ImportError: from distutils.core import setup, Extension -crypto_source = 'attic/crypto.pyx' -chunker_source = 'attic/chunker.pyx' -hashindex_source = 'attic/hashindex.pyx' -platform_linux_source = 'attic/platform_linux.pyx' -platform_darwin_source = 'attic/platform_darwin.pyx' -platform_freebsd_source = 'attic/platform_freebsd.pyx' +crypto_source = 'borg/crypto.pyx' +chunker_source = 'borg/chunker.pyx' +hashindex_source = 'borg/hashindex.pyx' +platform_linux_source = 'borg/platform_linux.pyx' +platform_darwin_source = 'borg/platform_darwin.pyx' +platform_freebsd_source = 'borg/platform_freebsd.pyx' try: from Cython.Distutils import build_ext @@ -34,13 +34,20 @@ try: class Sdist(versioneer.cmd_sdist): def __init__(self, *args, **kwargs): - for src in glob('attic/*.pyx'): - cython_compiler.compile(glob('attic/*.pyx'), + for src in glob('borg/*.pyx'): + cython_compiler.compile(glob('borg/*.pyx'), cython_compiler.default_options) versioneer.cmd_sdist.__init__(self, *args, **kwargs) def make_distribution(self): - self.filelist.extend(['attic/crypto.c', 'attic/chunker.c', 'attic/_chunker.c', 'attic/hashindex.c', 'attic/_hashindex.c', 'attic/platform_linux.c', 'attic/platform_freebsd.c', 'attic/platform_darwin.c']) + self.filelist.extend([ + 'borg/crypto.c', + 'borg/chunker.c', 'borg/_chunker.c', + 'borg/hashindex.c', 'borg/_hashindex.c', + 'borg/platform_linux.c', + 'borg/platform_freebsd.c', + 'borg/platform_darwin.c', + ]) super(Sdist, self).make_distribution() except ImportError: @@ -68,7 +75,7 @@ def detect_openssl(prefixes): return prefix -possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl', '/usr/local/attic', '/opt/local'] +possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl', '/usr/local/borg', '/opt/local'] if os.environ.get('BORG_OPENSSL_PREFIX'): possible_openssl_prefixes.insert(0, os.environ.get('BORG_OPENSSL_PREFIX')) ssl_prefix = detect_openssl(possible_openssl_prefixes) @@ -85,16 +92,16 @@ cmdclass = versioneer.get_cmdclass() cmdclass.update({'build_ext': build_ext, 'sdist': Sdist}) ext_modules = [ - Extension('attic.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs), - Extension('attic.chunker', [chunker_source]), - Extension('attic.hashindex', [hashindex_source]) + Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs), + Extension('borg.chunker', [chunker_source]), + Extension('borg.hashindex', [hashindex_source]) ] if sys.platform.startswith('linux'): - ext_modules.append(Extension('attic.platform_linux', [platform_linux_source], libraries=['acl'])) + ext_modules.append(Extension('borg.platform_linux', [platform_linux_source], libraries=['acl'])) elif sys.platform.startswith('freebsd'): - ext_modules.append(Extension('attic.platform_freebsd', [platform_freebsd_source])) + ext_modules.append(Extension('borg.platform_freebsd', [platform_freebsd_source])) elif sys.platform == 'darwin': - ext_modules.append(Extension('attic.platform_darwin', [platform_darwin_source])) + ext_modules.append(Extension('borg.platform_darwin', [platform_darwin_source])) setup( name='borgbackup', @@ -122,7 +129,7 @@ setup( 'Topic :: Security :: Cryptography', 'Topic :: System :: Archiving :: Backup', ], - packages=['attic', 'attic.testsuite'], + packages=['borg', 'borg.testsuite'], scripts=['scripts/borg'], cmdclass=cmdclass, ext_modules=ext_modules, diff --git a/tox.ini b/tox.ini index 327ddf0c1..1a71053c4 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ envlist = py32, py33, py34 [testenv] # Change dir to avoid import problem changedir = docs -commands = {envpython} -m attic.testsuite.run -bv [] +commands = {envpython} -m borg.testsuite.run -bv [] [testenv:py32] deps = mock From b5b1af2195e31c171fad3c5b59fa3df005a32da9 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 22 May 2015 19:30:56 +0200 Subject: [PATCH 110/241] tox >= 2.0 needs passenv in tox.ini, so that fakeroot can work tox 2.x does not inherit environment vars by default. --- tox.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/tox.ini b/tox.ini index 1a71053c4..46e4c01d7 100644 --- a/tox.ini +++ b/tox.ini @@ -5,6 +5,7 @@ envlist = py32, py33, py34 # Change dir to avoid import problem changedir = docs commands = {envpython} -m borg.testsuite.run -bv [] +passenv = * # fakeroot -u needs some env vars [testenv:py32] deps = mock From 906ea629f635bef61d614541c38eac7ad0f076aa Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 22 May 2015 20:23:37 +0200 Subject: [PATCH 111/241] use py.test for testing, document it locally (via tox) and also on travis ci. --- .travis.yml | 2 +- docs/installation.rst | 2 +- setup.cfg | 2 ++ tox.ini | 10 +++++++--- 4 files changed, 11 insertions(+), 5 deletions(-) create mode 100644 setup.cfg diff --git a/.travis.yml b/.travis.yml index 367458353..87d3afb02 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,4 +9,4 @@ install: - "pip install --use-mirrors Cython" - "pip install -e ." # command to run tests -script: fakeroot -u python -m borg.testsuite.run -vb +script: fakeroot -u py.test diff --git a/docs/installation.rst b/docs/installation.rst index ec47fe3ae..58073a7a8 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -69,7 +69,7 @@ Some of the steps detailled below might be useful also for non-git installs. # install some dependencies into virtual env pip install cython # to compile .pyx -> .c - pip install tox # optional, for running unit tests + pip install tox pytest # optional, for running unit tests pip install sphinx # optional, to build the docs # get |project_name| from github, install it diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 000000000..c9f4a34c7 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[pytest] +python_files = testsuite/*.py diff --git a/tox.ini b/tox.ini index 46e4c01d7..619fc01ff 100644 --- a/tox.ini +++ b/tox.ini @@ -3,9 +3,13 @@ envlist = py32, py33, py34 [testenv] # Change dir to avoid import problem -changedir = docs -commands = {envpython} -m borg.testsuite.run -bv [] +changedir = {envdir} +deps = + pytest +commands = py.test passenv = * # fakeroot -u needs some env vars [testenv:py32] -deps = mock +deps = + pytest + mock From e1c7da1f53ec95e76476f704c4127d02b8cde55f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 22 May 2015 20:40:43 +0200 Subject: [PATCH 112/241] add flake8 configuration (style checker) currently adjusted in a way it does not throw errors and warnings with the source as it is now. issues will get fixed over time and the exclude list shall get shorter. --- setup.cfg | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/setup.cfg b/setup.cfg index c9f4a34c7..2f726b562 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,8 @@ [pytest] python_files = testsuite/*.py + +[flake8] +ignore = E123,E126,E127,E129,E203,E221,E226,E231,E241,E265,E301,E302,E303,E713,F401,F403,W291,W293,W391 +max-line-length = 250 +exclude = versioneer.py,docs/conf.py,borg/_version.py +max-complexity = 100 From ef373b120f33e16ebf220e1b904b8758d766d5a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 22 May 2015 16:09:38 -0400 Subject: [PATCH 113/241] uppercase borg --- docs/faq.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index 9354d84f7..98ed64aaf 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -75,14 +75,14 @@ If it crashes with a UnicodeError, what can I do? export LANG=en_US.UTF-8 # or similar, important is correct charset -Why was borg forked from Attic? - borg was created in may 2015 in response to the difficulty of +Why was Borg forked from Attic? + Borg was created in may 2015 in response to the difficulty of getting new merge requests and larger changes incorporated into attic. more details can be found in the `fairly long discussion `_ in attic issue queue that led to the fork. - borg intends to be: + Borg intends to be: * simple: * as simple as possible, but no simpler @@ -95,5 +95,5 @@ Why was borg forked from Attic? * changing: * do not break compatibility accidentally, without a good reason or without warning - * borg is not backwards-compatible with attic + * Borg is not backwards-compatible with attic * major versions may not be compatible with older releases From 1edd5daef90568a5632003905f7b695c901da9e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 22 May 2015 16:09:46 -0400 Subject: [PATCH 114/241] try to clarify link --- docs/faq.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index 98ed64aaf..a99fd8f39 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -78,9 +78,9 @@ If it crashes with a UnicodeError, what can I do? Why was Borg forked from Attic? Borg was created in may 2015 in response to the difficulty of getting new merge requests and larger changes incorporated into - attic. more details can be found in the `fairly long discussion - `_ in - attic issue queue that led to the fork. + attic. more details can be found in the fairly long discussion + in attic issue queue (found in `ticket 217 + `_) that led to the fork. Borg intends to be: From 0ad2ab2496fee1c6505c68919d3838fd42b38e2c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 22 May 2015 22:55:35 +0200 Subject: [PATCH 115/241] a bit too much glob there in setup.py, removed --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index d5d548b3d..88dc2564b 100644 --- a/setup.py +++ b/setup.py @@ -35,8 +35,7 @@ try: class Sdist(versioneer.cmd_sdist): def __init__(self, *args, **kwargs): for src in glob('borg/*.pyx'): - cython_compiler.compile(glob('borg/*.pyx'), - cython_compiler.default_options) + cython_compiler.compile(src, cython_compiler.default_options) versioneer.cmd_sdist.__init__(self, *args, **kwargs) def make_distribution(self): From 7e9fefd92a33c91fb5af24a5cc7b3e7bb1b54a9b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 22 May 2015 23:28:55 +0200 Subject: [PATCH 116/241] updated CHANGES --- CHANGES | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index 95f5b9e03..245559d1b 100644 --- a/CHANGES +++ b/CHANGES @@ -19,7 +19,7 @@ Version - less memory usage: add global option --no-cache-files - fix traceback when trying to do unsupported passphrase change, fixes attic #189 - datetime does not like the year 10.000, fixes attic #139 -- docs improvements, fixes, updates +- docs and faq improvements, fixes, updates - cleanup crypto.pyx, make it easier to adapt to other modes - extract: if --stdout is given, write all extracted binary data to stdout - create: if "-" is given as path, read binary from stdin @@ -40,9 +40,16 @@ Version - implement rename repo::oldname newname - implement create --progress - source: refactor indicator (status) and item processing -- implement delete (also deletes local cache) +- implement delete repo (also deletes local cache) - better create -v output -- upgraded versioneer, fixes attic #257 +- upgraded versioneer, PEP440 compliance, fixes attic #257 +- source: use py.test for better testing, flake8 for code style checks +- source: fix tox >=2.0 compatibility +- toplevel error handler: show tracebacks for better error analysis +- sigusr1 / sigint handler to print current file infos - attic PR #286 +- pypi package: add python version classifiers, add FreeBSD to platforms +- fix Repository._active_txn state when lock upgrade fails +- RPCError: include the exception args we get from remote Attic Changelog From 9d1560f830c5113224b30b67000b7661eb4a1410 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 22 May 2015 23:56:29 +0200 Subject: [PATCH 117/241] faq: updates, fixes --- docs/faq.rst | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index a99fd8f39..49d100598 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -16,16 +16,16 @@ Can I backup VM disk images? Also, we have optional simple sparse file support for extract. Can I backup from multiple servers into a single repository? - Yes, but in order for the deduplication used by Borg to work, it + Yes, but in order for the deduplication used by |project_name| to work, it needs to keep a local cache containing checksums of all file chunks already stored in the repository. This cache is stored in - ``~/.cache/borg/``. If Borg detects that a repository has been + ``~/.cache/borg/``. If |project_name| detects that a repository has been modified since the local cache was updated it will need to rebuild the cache. This rebuild can be quite time consuming. So, yes it's possible. But it will be most efficient if a single repository is only modified from one place. Also keep in mind that - Borg will keep an exclusive lock on the repository while creating + |project_name| will keep an exclusive lock on the repository while creating or deleting archives, which may make *simultaneous* backups fail. Which file types, attributes, etc. are preserved? @@ -62,8 +62,18 @@ How can I specify the encryption passphrase programmatically? key file based encryption with a blank passphrase. See :ref:`encrypted_repos` for more details. -When backing up to remote servers, is data encrypted before leaving the local machine, or do I have to trust that the remote server isn't malicious? - Yes, everything is encrypted before leaving the local machine. +When backing up to remote encrypted repos, is encryption done locally? + Yes, file and directory metadata and data is locally encrypted, before + leaving the local machine. We do not mean the transport layer encryption + by that, but the data/metadata itself. Transport layer encryption (e.g. + when ssh is used as a transport) applies additionally. + +When backing up to remote servers, do I have to trust the remote server? + Yes and No. + No, as far as data confidentiality is concerned - all your files/dirs data + and metadata are stored in their encrypted form into the repository. + Yes, as an attacker with access to the remote server could delete (or + otherwise make unavailable) all your backups. If a backup stops mid-way, does the already-backed-up data stay there? I.e. does |project_name| resume backups? Yes, during a backup a special checkpoint archive named ``.checkpoint`` is saved every 5 minutes @@ -75,11 +85,17 @@ If it crashes with a UnicodeError, what can I do? export LANG=en_US.UTF-8 # or similar, important is correct charset +If I want to run |project_name| on a ARM CPU older than ARM v6? + You need to enable the alignment trap handler to fixup misaligned accesses: + + echo "2" > /proc/cpu/alignment + Why was Borg forked from Attic? - Borg was created in may 2015 in response to the difficulty of - getting new merge requests and larger changes incorporated into - attic. more details can be found in the fairly long discussion - in attic issue queue (found in `ticket 217 + Borg was created in May 2015 in response to the difficulty of + getting new code or larger changes incorporated into Attic and + establishing a bigger developer community / more open development. + + More details can be found in `ticket 217 `_) that led to the fork. Borg intends to be: @@ -93,7 +109,7 @@ Why was Borg forked from Attic? * give feedback on PRs that can't be accepted "as is" * discuss openly, don't work in the dark * changing: + * Borg is not compatible with Attic * do not break compatibility accidentally, without a good reason - or without warning - * Borg is not backwards-compatible with attic - * major versions may not be compatible with older releases + or without warning. allow compatibility breaking for other cases. + * if major version number changes, it may have incompatible changes From f6442fe7f0a137e929f0b27381568a48f12a4eb0 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 23 May 2015 00:12:17 +0200 Subject: [PATCH 118/241] automate updating the borgbackup.github.io repo/website cd docs ; make gh-io --- docs/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/Makefile b/docs/Makefile index 21d6d69c7..5c5469597 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -138,5 +138,13 @@ gh-pages: html (cd $$GH_PAGES_CLONE && git add -A && git commit -m 'Updated gh-pages' && git push) && \ rm -rf $$GH_PAGES_CLONE +gh-io: html + GH_IO_CLONE="`mktemp -d`" && \ + git clone git@github.com:borgbackup/borgbackup.github.io.git $$GH_IO_CLONE && \ + (cd $$GH_IO_CLONE && git rm -r *) && \ + cp -r _build/html/* $$GH_IO_CLONE && \ + (cd $$GH_IO_CLONE && git add -A && git commit -m 'Updated borgbackup.github.io' && git push) && \ + rm -rf $$GH_IO_CLONE + inotify: html while inotifywait -r . --exclude usage.rst --exclude '_build/*' ; do make html ; done From cd3a76909d22e43b77494c4e29636d8da0630593 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 23 May 2015 00:15:58 +0200 Subject: [PATCH 119/241] faq: markup fixes, typos --- docs/faq.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index 49d100598..180ac0edb 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -86,7 +86,7 @@ If it crashes with a UnicodeError, what can I do? export LANG=en_US.UTF-8 # or similar, important is correct charset If I want to run |project_name| on a ARM CPU older than ARM v6? - You need to enable the alignment trap handler to fixup misaligned accesses: + You need to enable the alignment trap handler to fixup misaligned accesses:: echo "2" > /proc/cpu/alignment @@ -96,19 +96,22 @@ Why was Borg forked from Attic? establishing a bigger developer community / more open development. More details can be found in `ticket 217 - `_) that led to the fork. + `_ that led to the fork. Borg intends to be: * simple: + * as simple as possible, but no simpler * do the right thing by default, but offer options * open: + * welcome feature requests * accept pull requests of good quality and coding style * give feedback on PRs that can't be accepted "as is" * discuss openly, don't work in the dark * changing: + * Borg is not compatible with Attic * do not break compatibility accidentally, without a good reason or without warning. allow compatibility breaking for other cases. From 8d0c5316f8af535666ad7aad8e681b412a7f87de Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 23 May 2015 00:40:30 +0200 Subject: [PATCH 120/241] remove gh-pages Makefile target (not used) --- docs/Makefile | 8 -------- 1 file changed, 8 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index 5c5469597..387195a2a 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -130,14 +130,6 @@ doctest: @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." -gh-pages: html - GH_PAGES_CLONE="`mktemp -d`" && \ - git clone --branch gh-pages `git rev-parse --show-toplevel` $$GH_PAGES_CLONE && \ - (cd $$GH_PAGES_CLONE && git rm -r *) && \ - cp -r _build/html/* $$GH_PAGES_CLONE && \ - (cd $$GH_PAGES_CLONE && git add -A && git commit -m 'Updated gh-pages' && git push) && \ - rm -rf $$GH_PAGES_CLONE - gh-io: html GH_IO_CLONE="`mktemp -d`" && \ git clone git@github.com:borgbackup/borgbackup.github.io.git $$GH_IO_CLONE && \ From d067bc31784b6650135719f8914d6d2e540c2d2c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 26 May 2015 02:04:41 +0200 Subject: [PATCH 121/241] efficient archive list from manifest a lot of speedup for: "list ", "delete " list, "prune" - esp. for slow connections to remote repositories. the previous method used metadata from the archive itself, which is (in total) rather large. so if you had many archives and a slow (remote) connection, it was very slow. but there is a lot easier way: just use the archives list from the repository manifest - we already have it anyway and it also has name, id and timestamp for all archives - and that's all we need. I defined a ArchiveInfo namedtuple that has same element names as seen as attribute names of the Archive object, so as long as name, id, ts is enough, it can be used in its place. --- borg/archive.py | 1 + borg/archiver.py | 13 ++++++------- borg/helpers.py | 13 +++++++++++++ 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index a2cc59b99..3eccbbeef 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -494,6 +494,7 @@ class Archive: @staticmethod def list_archives(repository, key, manifest, cache=None): + # expensive! see also Manifest.list_archive_infos. for name, info in manifest.archives.items(): yield Archive(repository, key, manifest, name, cache=cache) diff --git a/borg/archiver.py b/borg/archiver.py index 79bf65f03..4b13e47fd 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -284,8 +284,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") stats.print_('Deleted data:', cache) else: print("You requested to completely DELETE the repository *including* all archives it contains:") - for archive in sorted(Archive.list_archives(repository, key, manifest), key=attrgetter('ts')): - print(format_archive(archive)) + for archive_info in manifest.list_archive_infos(sort_by='ts'): + print(format_archive(archive_info)) print("""Type "YES" if you understand this and want to continue.\n""") if input('Do you want to continue? ') == 'YES': repository.destroy() @@ -354,8 +354,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") item[b'group'] or item[b'gid'], size, format_time(mtime), remove_surrogates(item[b'path']), extra)) else: - for archive in sorted(Archive.list_archives(repository, key, manifest), key=attrgetter('ts')): - print(format_archive(archive)) + for archive_info in manifest.list_archive_infos(sort_by='ts'): + print(format_archive(archive_info)) return self.exit_code def do_info(self, args): @@ -380,8 +380,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") repository = self.open_repository(args.repository, exclusive=True) manifest, key = Manifest.load(repository) cache = Cache(repository, key, manifest, do_files=args.cache_files) - archives = list(sorted(Archive.list_archives(repository, key, manifest, cache), - key=attrgetter('ts'), reverse=True)) + archives = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list if args.hourly + args.daily + args.weekly + args.monthly + args.yearly == 0 and args.within is None: self.print_error('At least one of the "within", "hourly", "daily", "weekly", "monthly" or "yearly" ' 'settings must be specified') @@ -412,7 +411,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") self.print_verbose('Would prune: %s' % format_archive(archive)) else: self.print_verbose('Pruning archive: %s' % format_archive(archive)) - archive.delete(stats) + Archive(repository, key, manifest, archive.name, cache).delete(stats) if to_delete and not args.dry_run: manifest.write() repository.commit() diff --git a/borg/helpers.py b/borg/helpers.py index f96c1bf52..e97c88bf2 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -1,5 +1,6 @@ import argparse import binascii +from collections import namedtuple import grp import msgpack import os @@ -119,6 +120,18 @@ class Manifest: self.id = self.key.id_hash(data) self.repository.put(self.MANIFEST_ID, self.key.encrypt(data)) + def list_archive_infos(self, sort_by=None, reverse=False): + # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts + ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts') + archives = [] + for name, values in self.archives.items(): + ts = parse_timestamp(values[b'time'].decode('utf-8')) + id = values[b'id'] + archives.append(ArchiveInfo(name=name, id=id, ts=ts)) + if sort_by is not None: + archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse) + return archives + def prune_within(archives, within): multiplier = {'H': 1, 'd': 24, 'w': 24*7, 'm': 24*31, 'y': 24*365} From 776bb9fabc4806be9858ff960040e7be2002b5c9 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 31 May 2015 17:48:19 +0200 Subject: [PATCH 122/241] hashindex: improve error messages --- borg/_hashindex.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/borg/_hashindex.c b/borg/_hashindex.c index 5e1e62e3d..591f5c9f6 100644 --- a/borg/_hashindex.c +++ b/borg/_hashindex.c @@ -137,7 +137,7 @@ hashindex_read(const char *path) HashIndex *index = NULL; if((fd = fopen(path, "r")) == NULL) { - EPRINTF_PATH(path, "fopen failed"); + EPRINTF_PATH(path, "fopen for reading failed"); return NULL; } bytes_read = fread(&header, 1, sizeof(HashHeader), fd); @@ -163,20 +163,20 @@ hashindex_read(const char *path) goto fail; } if(memcmp(header.magic, MAGIC, 8)) { - EPRINTF_MSG_PATH(path, "Unknown file header"); + EPRINTF_MSG_PATH(path, "Unknown MAGIC in header"); goto fail; } buckets_length = (off_t)_le32toh(header.num_buckets) * (header.key_size + header.value_size); if(length != sizeof(HashHeader) + buckets_length) { - EPRINTF_MSG_PATH(path, "Incorrect file length"); + EPRINTF_MSG_PATH(path, "Incorrect file length (expected %ld, got %ld)", sizeof(HashHeader) + buckets_length, length); goto fail; } if(!(index = malloc(sizeof(HashIndex)))) { - EPRINTF_PATH(path, "malloc failed"); + EPRINTF_PATH(path, "malloc header failed"); goto fail; } if(!(index->buckets = malloc(buckets_length))) { - EPRINTF_PATH(path, "malloc failed"); + EPRINTF_PATH(path, "malloc buckets failed"); free(index); index = NULL; goto fail; @@ -217,12 +217,12 @@ hashindex_init(int capacity, int key_size, int value_size) capacity = MAX(MIN_BUCKETS, capacity); if(!(index = malloc(sizeof(HashIndex)))) { - EPRINTF("malloc failed"); + EPRINTF("malloc header failed"); return NULL; } buckets_length = (off_t)capacity * (key_size + value_size); if(!(index->buckets = calloc(buckets_length, 1))) { - EPRINTF("malloc failed"); + EPRINTF("malloc buckets failed"); free(index); return NULL; } @@ -261,15 +261,15 @@ hashindex_write(HashIndex *index, const char *path) int ret = 1; if((fd = fopen(path, "w")) == NULL) { - EPRINTF_PATH(path, "open failed"); + EPRINTF_PATH(path, "fopen for writing failed"); return 0; } if(fwrite(&header, 1, sizeof(header), fd) != sizeof(header)) { - EPRINTF_PATH(path, "fwrite failed"); + EPRINTF_PATH(path, "fwrite header failed"); ret = 0; } if(fwrite(index->buckets, 1, buckets_length, fd) != buckets_length) { - EPRINTF_PATH(path, "fwrite failed"); + EPRINTF_PATH(path, "fwrite buckets failed"); ret = 0; } if(fclose(fd) < 0) { From 926454c0d826d41e49c69e52a0d3573f08e1f219 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 31 May 2015 17:57:45 +0200 Subject: [PATCH 123/241] explicitely specify binary mode to open binary files on POSIX OSes, it doesn't make a difference, but it is cleaner and also good for portability. --- borg/_hashindex.c | 4 ++-- borg/cache.py | 2 +- borg/testsuite/archiver.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/borg/_hashindex.c b/borg/_hashindex.c index 591f5c9f6..e2589d0b8 100644 --- a/borg/_hashindex.c +++ b/borg/_hashindex.c @@ -136,7 +136,7 @@ hashindex_read(const char *path) HashHeader header; HashIndex *index = NULL; - if((fd = fopen(path, "r")) == NULL) { + if((fd = fopen(path, "rb")) == NULL) { EPRINTF_PATH(path, "fopen for reading failed"); return NULL; } @@ -260,7 +260,7 @@ hashindex_write(HashIndex *index, const char *path) }; int ret = 1; - if((fd = fopen(path, "w")) == NULL) { + if((fd = fopen(path, "wb")) == NULL) { EPRINTF_PATH(path, "fopen for writing failed"); return 0; } diff --git a/borg/cache.py b/borg/cache.py index 037a8e76b..573a7f5cc 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -93,7 +93,7 @@ class Cache: with open(os.path.join(self.path, 'config'), 'w') as fd: config.write(fd) ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8')) - with open(os.path.join(self.path, 'files'), 'w') as fd: + with open(os.path.join(self.path, 'files'), 'wb') as fd: pass # empty file def destroy(self): diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 11efefd3d..b35df2477 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -400,9 +400,9 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('extract', '--dry-run', self.repository_location + '::test') self.cmd('check', self.repository_location) name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0] - with open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+') as fd: + with open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+b') as fd: fd.seek(100) - fd.write('XXXX') + fd.write(b'XXXX') self.cmd('check', self.repository_location, exit_code=1) def test_readonly_repository(self): From 072326fef01a2e9ca321c7af73db14675b696439 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 31 May 2015 18:41:23 +0200 Subject: [PATCH 124/241] chunker: get rid of read_buf if we have a OS file handle, we can directly read to the final destination - one memcpy less. if we have a Python file object, we get a Python bytes object as read result (can't save the memcpy here). --- borg/_chunker.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 526878596..20461e7c6 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -79,7 +79,7 @@ typedef struct { int window_size, chunk_mask, min_size; size_t buf_size; uint32_t *table; - uint8_t *data, *read_buf; + uint8_t *data; PyObject *fd; int fh; int done, eof; @@ -96,7 +96,6 @@ chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32 c->table = buzhash_init_table(seed); c->buf_size = max_size; c->data = malloc(c->buf_size); - c->read_buf = malloc(c->buf_size); return c; } @@ -122,7 +121,6 @@ chunker_free(Chunker *c) Py_XDECREF(c->fd); free(c->table); free(c->data); - free(c->read_buf); free(c); } @@ -140,9 +138,8 @@ chunker_fill(Chunker *c) } if(c->fh >= 0) { // if we have a os-level file descriptor, use os-level API - n = read(c->fh, c->read_buf, n); + n = read(c->fh, c->data + c->position + c->remaining, n); if(n > 0) { - memcpy(c->data + c->position + c->remaining, c->read_buf, n); c->remaining += n; c->bytes_read += n; } From a3f4d1951574ad3a31c6004bed9140514d515cb6 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 23 May 2015 17:07:22 +0200 Subject: [PATCH 125/241] speed up chunks cache sync, fixes #18 Re-synchronize chunks cache with repository. If present, uses a compressed tar archive of known backup archive indices, so it only needs to fetch infos from repo and build a chunk index once per backup archive. If out of sync, the tar gets rebuilt from known + fetched chunk infos, so it has complete and current information about all backup archives. Finally, it builds the master chunks index by merging all indices from the tar. Note: compression (esp. xz) is very effective in keeping the tar relatively small compared to the files it contains. Use python >= 3.3 to get better compression with xz, there's a fallback to bz2 or gz when xz is not supported. --- borg/cache.py | 147 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 128 insertions(+), 19 deletions(-) diff --git a/borg/cache.py b/borg/cache.py index 573a7f5cc..110f088d9 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -1,10 +1,13 @@ from configparser import RawConfigParser from .remote import cache_if_remote +import errno import msgpack import os import sys from binascii import hexlify import shutil +import tarfile +import tempfile from .key import PlaintextKey from .helpers import Error, get_cache_dir, decode_dict, st_mtime_ns, unhexlify, UpgradableLock, int_to_bigint, \ @@ -93,6 +96,8 @@ class Cache: with open(os.path.join(self.path, 'config'), 'w') as fd: config.write(fd) ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8')) + with open(os.path.join(self.path, 'chunks.archive'), 'wb') as fd: + pass # empty file with open(os.path.join(self.path, 'files'), 'wb') as fd: pass # empty file @@ -148,6 +153,7 @@ class Cache: os.mkdir(txn_dir) shutil.copy(os.path.join(self.path, 'config'), txn_dir) shutil.copy(os.path.join(self.path, 'chunks'), txn_dir) + shutil.copy(os.path.join(self.path, 'chunks.archive'), txn_dir) shutil.copy(os.path.join(self.path, 'files'), txn_dir) os.rename(os.path.join(self.path, 'txn.tmp'), os.path.join(self.path, 'txn.active')) @@ -189,6 +195,7 @@ class Cache: if os.path.exists(txn_dir): shutil.copy(os.path.join(txn_dir, 'config'), self.path) shutil.copy(os.path.join(txn_dir, 'chunks'), self.path) + shutil.copy(os.path.join(txn_dir, 'chunks.archive'), self.path) shutil.copy(os.path.join(txn_dir, 'files'), self.path) os.rename(txn_dir, os.path.join(self.path, 'txn.tmp')) if os.path.exists(os.path.join(self.path, 'txn.tmp')): @@ -197,37 +204,139 @@ class Cache: self._do_open() def sync(self): - """Initializes cache by fetching and reading all archive indicies + """Re-synchronize chunks cache with repository. + + If present, uses a compressed tar archive of known backup archive + indices, so it only needs to fetch infos from repo and build a chunk + index once per backup archive. + If out of sync, the tar gets rebuilt from known + fetched chunk infos, + so it has complete and current information about all backup archives. + Finally, it builds the master chunks index by merging all indices from + the tar. + + Note: compression (esp. xz) is very effective in keeping the tar + relatively small compared to the files it contains. """ - def add(id, size, csize): + in_archive_path = os.path.join(self.path, 'chunks.archive') + out_archive_path = os.path.join(self.path, 'chunks.archive.tmp') + + def open_in_archive(): try: - count, size, csize = self.chunks[id] - self.chunks[id] = count + 1, size, csize + tf = tarfile.open(in_archive_path, 'r') + except OSError as e: + if e.errno != errno.ENOENT: + raise + # file not found + tf = None + except tarfile.ReadError: + # empty file? + tf = None + return tf + + def open_out_archive(): + for compression in ('xz', 'bz2', 'gz'): + # xz needs py 3.3, bz2 and gz also work on 3.2 + try: + tf = tarfile.open(out_archive_path, 'w:'+compression, format=tarfile.PAX_FORMAT) + break + except tarfile.CompressionError: + continue + else: # shouldn't happen + tf = None + return tf + + def close_archive(tf): + if tf: + tf.close() + + def delete_in_archive(): + os.unlink(in_archive_path) + + def rename_out_archive(): + os.rename(out_archive_path, in_archive_path) + + def add(chunk_idx, id, size, csize, incr=1): + try: + count, size, csize = chunk_idx[id] + chunk_idx[id] = count + incr, size, csize except KeyError: - self.chunks[id] = 1, size, csize - self.begin_txn() - print('Initializing cache...') - self.chunks.clear() - unpacker = msgpack.Unpacker() - repository = cache_if_remote(self.repository) - for name, info in self.manifest.archives.items(): - archive_id = info[b'id'] + chunk_idx[id] = incr, size, csize + + def transfer_known_idx(archive_id, tf_in, tf_out): + archive_id_hex = hexlify(archive_id).decode('ascii') + tarinfo = tf_in.getmember(archive_id_hex) + archive_name = tarinfo.pax_headers['archive_name'] + print('Already known archive:', archive_name) + f_in = tf_in.extractfile(archive_id_hex) + tf_out.addfile(tarinfo, f_in) + return archive_name + + def fetch_and_build_idx(archive_id, repository, key, tmp_dir, tf_out): + chunk_idx = ChunkIndex() cdata = repository.get(archive_id) - data = self.key.decrypt(archive_id, cdata) - add(archive_id, len(data), len(cdata)) + data = key.decrypt(archive_id, cdata) + add(chunk_idx, archive_id, len(data), len(cdata)) archive = msgpack.unpackb(data) if archive[b'version'] != 1: raise Exception('Unknown archive metadata version') decode_dict(archive, (b'name',)) - print('Analyzing archive:', archive[b'name']) - for key, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])): - data = self.key.decrypt(key, chunk) - add(key, len(data), len(chunk)) + print('Analyzing new archive:', archive[b'name']) + unpacker = msgpack.Unpacker() + for item_id, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])): + data = key.decrypt(item_id, chunk) + add(chunk_idx, item_id, len(data), len(chunk)) unpacker.feed(data) for item in unpacker: if b'chunks' in item: for chunk_id, size, csize in item[b'chunks']: - add(chunk_id, size, csize) + add(chunk_idx, chunk_id, size, csize) + archive_id_hex = hexlify(archive_id).decode('ascii') + file_tmp = os.path.join(tmp_dir, archive_id_hex).encode('utf-8') + chunk_idx.write(file_tmp) + tarinfo = tf_out.gettarinfo(file_tmp, archive_id_hex) + tarinfo.pax_headers['archive_name'] = archive[b'name'] + with open(file_tmp, 'rb') as f: + tf_out.addfile(tarinfo, f) + os.unlink(file_tmp) + + def create_master_idx(chunk_idx, tf_in, tmp_dir): + chunk_idx.clear() + for tarinfo in tf_in: + archive_id_hex = tarinfo.name + tf_in.extract(archive_id_hex, tmp_dir) + chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8') + archive_chunk_idx = ChunkIndex.read(chunk_idx_path) + for chunk_id, (count, size, csize) in archive_chunk_idx.iteritems(): + add(chunk_idx, chunk_id, size, csize, incr=count) + os.unlink(chunk_idx_path) + + self.begin_txn() + print('Synchronizing chunks cache...') + # XXX we have to do stuff on disk due to lacking ChunkIndex api + with tempfile.TemporaryDirectory() as tmp_dir: + repository = cache_if_remote(self.repository) + out_archive = open_out_archive() + in_archive = open_in_archive() + if in_archive: + known_ids = set(unhexlify(hexid) for hexid in in_archive.getnames()) + else: + known_ids = set() + archive_ids = set(info[b'id'] for info in self.manifest.archives.values()) + print('Rebuilding archive collection. Known: %d Repo: %d Unknown: %d' % ( + len(known_ids), len(archive_ids), len(archive_ids - known_ids), )) + for archive_id in archive_ids & known_ids: + transfer_known_idx(archive_id, in_archive, out_archive) + close_archive(in_archive) + delete_in_archive() # free disk space + for archive_id in archive_ids - known_ids: + fetch_and_build_idx(archive_id, repository, self.key, tmp_dir, out_archive) + close_archive(out_archive) + rename_out_archive() + print('Merging collection into master chunks cache...') + in_archive = open_in_archive() + create_master_idx(self.chunks, in_archive, tmp_dir) + close_archive(in_archive) + print('Done.') def add_chunk(self, id, data, stats): if not self.txn_active: From ed1e5e9c13228ee71cea1e0f4417527902068e30 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 31 May 2015 21:23:36 +0200 Subject: [PATCH 126/241] "create" micro optimization: do not check for sockets early they are rare, so it's pointless to check for them first. seen the stat..S_ISSOCK in profiling results with high call count. was no big issue, that call is cheap, but also no big issue to just fix the order. --- borg/archiver.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 4b13e47fd..438418abb 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -168,9 +168,6 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") # Entering a new filesystem? if restrict_dev and st.st_dev != restrict_dev: return - # Ignore unix sockets - if stat.S_ISSOCK(st.st_mode): - return status = None if stat.S_ISREG(st.st_mode): try: @@ -196,6 +193,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") status = archive.process_fifo(path, st) elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode): status = archive.process_dev(path, st) + elif stat.S_ISSOCK(st.st_mode): + # Ignore unix sockets + return else: self.print_error('Unknown file type: %s', path) return From 646cdca312f3ebb87a186af3d1f088b54d4a6a7d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 31 May 2015 21:53:37 +0200 Subject: [PATCH 127/241] "extract" micro optimization: first check for regular files, then for directories, check for fifos late regular files are most common, more than directories. fifos are rare. was no big issue, the calls are cheap, but also no big issue to just fix the order. --- borg/archive.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 3eccbbeef..e6d557479 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -273,12 +273,7 @@ class Archive: except OSError: pass mode = item[b'mode'] - if stat.S_ISDIR(mode): - if not os.path.exists(path): - os.makedirs(path) - if restore_attrs: - self.restore_attrs(path, item) - elif stat.S_ISREG(mode): + if stat.S_ISREG(mode): if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) # Hard link? @@ -300,11 +295,11 @@ class Archive: fd.truncate(pos) fd.flush() self.restore_attrs(path, item, fd=fd.fileno()) - elif stat.S_ISFIFO(mode): - if not os.path.exists(os.path.dirname(path)): - os.makedirs(os.path.dirname(path)) - os.mkfifo(path) - self.restore_attrs(path, item) + elif stat.S_ISDIR(mode): + if not os.path.exists(path): + os.makedirs(path) + if restore_attrs: + self.restore_attrs(path, item) elif stat.S_ISLNK(mode): if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) @@ -313,6 +308,11 @@ class Archive: os.unlink(path) os.symlink(source, path) self.restore_attrs(path, item, symlink=True) + elif stat.S_ISFIFO(mode): + if not os.path.exists(os.path.dirname(path)): + os.makedirs(os.path.dirname(path)) + os.mkfifo(path) + self.restore_attrs(path, item) elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode): os.mknod(path, item[b'mode'], item[b'rdev']) self.restore_attrs(path, item) From 3dce75306ae46fe4dc376508873d0b56255ff808 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 2 Jun 2015 02:30:07 +0200 Subject: [PATCH 128/241] LoggedIO: better error checks / exceptions / exception handling It doesn't just say "error reading segment X", but also what went wrong and at what offset. --- borg/repository.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/borg/repository.py b/borg/repository.py index 9cea1e578..fcec23540 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -281,8 +281,8 @@ class Repository: continue try: objects = list(self.io.iter_objects(segment)) - except (IntegrityError, struct.error): - report_error('Error reading segment {}'.format(segment)) + except IntegrityError as err: + report_error('Error reading segment {}: {}'.format(segment, err)) objects = [] if repair: self.io.recover_segment(segment, filename) @@ -505,18 +505,25 @@ class LoggedIO: fd = self.get_fd(segment) fd.seek(0) if fd.read(8) != MAGIC: - raise IntegrityError('Invalid segment header') + raise IntegrityError('Invalid segment magic') offset = 8 header = fd.read(self.header_fmt.size) while header: - crc, size, tag = self.header_fmt.unpack(header) + try: + crc, size, tag = self.header_fmt.unpack(header) + except struct.error as err: + raise IntegrityError('Invalid segment entry header [offset {}]: {}'.format(offset, err)) if size > MAX_OBJECT_SIZE: - raise IntegrityError('Invalid segment object size') - rest = fd.read(size - self.header_fmt.size) + raise IntegrityError('Invalid segment entry size [offset {}]'.format(offset)) + length = size - self.header_fmt.size + rest = fd.read(length) + if len(rest) != length: + raise IntegrityError('Segment entry data short read [offset {}]: expected: {}, got {} bytes'.format( + offset, length, len(rest))) if crc32(rest, crc32(memoryview(header)[4:])) & 0xffffffff != crc: - raise IntegrityError('Segment checksum mismatch') + raise IntegrityError('Segment entry checksum mismatch [offset {}]'.format(offset)) if tag not in (TAG_PUT, TAG_DELETE, TAG_COMMIT): - raise IntegrityError('Invalid segment entry header') + raise IntegrityError('Invalid segment entry tag [offset {}]'.format(offset)) key = None if tag in (TAG_PUT, TAG_DELETE): key = rest[:32] From 614261604e144062342cbd3df7e26c561671fc77 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 2 Jun 2015 02:41:23 +0200 Subject: [PATCH 129/241] don't hardcode MAGIC length --- borg/_hashindex.c | 8 +++++--- borg/repository.py | 7 ++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/borg/_hashindex.c b/borg/_hashindex.c index e2589d0b8..9fd31d56b 100644 --- a/borg/_hashindex.c +++ b/borg/_hashindex.c @@ -18,8 +18,11 @@ #error Unknown byte order #endif +#define MAGIC "BORG_IDX" +#define MAGIC_LEN 8 + typedef struct { - char magic[8]; + char magic[MAGIC_LEN]; int32_t num_entries; int32_t num_buckets; int8_t key_size; @@ -37,7 +40,6 @@ typedef struct { int upper_limit; } HashIndex; -#define MAGIC "BORG_IDX" #define EMPTY _htole32(0xffffffff) #define DELETED _htole32(0xfffffffe) #define MAX_BUCKET_SIZE 512 @@ -162,7 +164,7 @@ hashindex_read(const char *path) EPRINTF_PATH(path, "fseek failed"); goto fail; } - if(memcmp(header.magic, MAGIC, 8)) { + if(memcmp(header.magic, MAGIC, MAGIC_LEN)) { EPRINTF_MSG_PATH(path, "Unknown MAGIC in header"); goto fail; } diff --git a/borg/repository.py b/borg/repository.py index 9cea1e578..6d4999133 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -14,6 +14,7 @@ from .lrucache import LRUCache MAX_OBJECT_SIZE = 20 * 1024 * 1024 MAGIC = b'BORG_SEG' +MAGIC_LEN = len(MAGIC) TAG_PUT = 0 TAG_DELETE = 1 TAG_COMMIT = 2 @@ -481,7 +482,7 @@ class LoggedIO: os.mkdir(dirname) self._write_fd = open(self.segment_filename(self.segment), 'ab') self._write_fd.write(MAGIC) - self.offset = 8 + self.offset = MAGIC_LEN return self._write_fd def get_fd(self, segment): @@ -504,9 +505,9 @@ class LoggedIO: def iter_objects(self, segment, include_data=False): fd = self.get_fd(segment) fd.seek(0) - if fd.read(8) != MAGIC: + if fd.read(MAGIC_LEN) != MAGIC: raise IntegrityError('Invalid segment header') - offset = 8 + offset = MAGIC_LEN header = fd.read(self.header_fmt.size) while header: crc, size, tag = self.header_fmt.unpack(header) From 83f520cfbee78cd09bdeb77ee7210b5a02a1eff3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 7 Jun 2015 02:15:13 +0200 Subject: [PATCH 130/241] improve internals docs --- docs/internals.rst | 295 ++++++++++++++++++++++++++------------------- 1 file changed, 168 insertions(+), 127 deletions(-) diff --git a/docs/internals.rst b/docs/internals.rst index ead22158b..9ee4a8c9e 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -6,38 +6,43 @@ Internals This page documents the internal data structures and storage mechanisms of |project_name|. It is partly based on `mailing list -discussion about internals`_ and also on static code analysis. It may -not be exactly up to date with the current source code. +discussion about internals`_ and also on static code analysis. + +It may not be exactly up to date with the current source code. + +Repository and Archives +----------------------- |project_name| stores its data in a `Repository`. Each repository can hold multiple `Archives`, which represent individual backups that contain a full archive of the files specified when the backup was performed. Deduplication is performed across multiple backups, both on -data and metadata, using `Segments` chunked with the Buzhash_ -algorithm. Each repository has the following file structure: +data and metadata, using `Chunks` created by the chunker using the Buzhash_ +algorithm. + +Each repository has the following file structure: README - simple text file describing the repository + simple text file telling that this is a |project_name| repository config - description of the repository, includes the unique identifier. also - acts as a lock file + repository configuration and lock file data/ - directory where the actual data (`segments`) is stored + directory where the actual data is stored hints.%d - undocumented + hints for repository compaction index.%d - cache of the file indexes. those files can be regenerated with - ``check --repair`` + repository index + Config file ----------- -Each repository has a ``config`` file which which is a ``INI`` -formatted file which looks like this:: +Each repository has a ``config`` file which which is a ``INI``-style file +and looks like this:: [repository] version = 1 @@ -48,20 +53,35 @@ formatted file which looks like this:: This is where the ``repository.id`` is stored. It is a unique identifier for repositories. It will not change if you move the repository around so you can make a local transfer then decide to move -the repository in another (even remote) location at a later time. +the repository to another (even remote) location at a later time. -|project_name| will do a POSIX read lock on that file when operating +|project_name| will do a POSIX read lock on the config file when operating on the repository. + +Keys +---- +The key to address the key/value store is usually computed like this: + +key = id = id_hash(unencrypted_data) + +The id_hash function is: + +* sha256 (no encryption keys available) +* hmac-sha256 (encryption keys available) + + Segments and archives --------------------- -|project_name| is a "filesystem based transactional key value -store". It makes extensive use of msgpack_ to store data and, unless +A |project_name| repository is a filesystem based transactional key/value +store. It makes extensive use of msgpack_ to store data and, unless otherwise noted, data is stored in msgpack_ encoded files. -Objects referenced by a key (256bits id/hash) are stored inline in -files (`segments`) of size approx 5MB in ``repo/data``. They contain: +Objects referenced by a key are stored inline in files (`segments`) of approx. +5MB size in numbered subdirectories of ``repo/data``. + +They contain: * header size * crc @@ -77,21 +97,26 @@ Tag is either ``PUT``, ``DELETE``, or ``COMMIT``. A segment file is basically a transaction log where each repository operation is appended to the file. So if an object is written to the repository a ``PUT`` tag is written to the file followed by the object id and -data. And if an object is deleted a ``DELETE`` tag is appended +data. If an object is deleted a ``DELETE`` tag is appended followed by the object id. A ``COMMIT`` tag is written when a repository transaction is committed. When a repository is opened any ``PUT`` or ``DELETE`` operations not followed by a ``COMMIT`` tag are discarded since they are part of a partial/uncommitted transaction. -The manifest is an object with an id of only zeros (32 bytes), that -references all the archives. It contains: + +The manifest +------------ + +The manifest is an object with an all-zero key that references all the +archives. +It contains: * version -* list of archives +* list of archive infos * timestamp * config -Each archive contains: +Each archive info contains: * name * id @@ -102,21 +127,21 @@ each time. The archive metadata does not contain the file items directly. Only references to other objects that contain that data. An archive is an -object that contain metadata: +object that contains: * version * name -* items list +* list of chunks containing item metadata * cmdline * hostname * username * time -Each item represents a file or directory or -symlink is stored as an ``item`` dictionary that contains: +Each item represents a file, directory or other fs item and is stored as an +``item`` dictionary that contains: * path -* list of chunks +* list of data chunks * user * group * uid @@ -135,124 +160,136 @@ it and it is reset every time an inode's metadata is changed. All items are serialized using msgpack and the resulting byte stream is fed into the same chunker used for regular file data and turned into deduplicated chunks. The reference to these chunks is then added -to the archive metadata. This allows the archive to store many files, -beyond the ``MAX_OBJECT_SIZE`` barrier of 20MB. +to the archive metadata. -A chunk is an object as well, of course. The chunk id is either -HMAC-SHA256_, when encryption is used, or a SHA256_ hash otherwise. +A chunk is stored as an object as well, of course. -Hints are stored in a file (``repo/hints``) and contain: - -* version -* list of segments -* compact Chunks ------ -|project_name| uses a rolling checksum with Buzhash_ algorithm, with -window size of 4095 bytes (`0xFFF`), with a minimum of 1024, and triggers when -the last 16 bits of the checksum are null, producing chunks of 64kB on -average. All these parameters are fixed. The buzhash table is altered -by XORing it with a seed randomly generated once for the archive, and -stored encrypted in the keyfile. +|project_name| uses a rolling hash computed by the Buzhash_ algorithm, with a +window size of 4095 bytes (`0xFFF`), with a minimum chunk size of 1024 bytes. +It triggers (chunks) when the last 16 bits of the hash are zero, producing +chunks of 64kiB on average. -Indexes -------- +The buzhash table is altered by XORing it with a seed randomly generated once +for the archive, and stored encrypted in the keyfile. -There are two main indexes: the chunk lookup index and the repository -index. There is also the file chunk cache. -The chunk lookup index is stored in ``cache/chunk`` and is indexed on -the ``chunk hash``. It contains: +Indexes / Caches +---------------- -* reference count -* size -* ciphered size - -The repository index is stored in ``repo/index.%d`` and is also -indexed on ``chunk hash`` and contains: - -* segment -* offset - -The repository index files are random access but those files can be -recreated if damaged or lost using ``check --repair``. - -Both indexes are stored as hash tables, directly mapped in memory from -the file content, with only one slot per bucket, but that spreads the -collisions to the following buckets. As a consequence the hash is just -a start position for a linear search, and if the element is not in the -table the index is linearly crossed until an empty bucket is -found. When the table is full at 90% its size is doubled, when it's -empty at 25% its size is halfed. So operations on it have a variable -complexity between constant and linear with low factor, and memory -overhead varies between 10% and 300%. - -The file chunk cache is stored in ``cache/files`` and is indexed on -the ``file path hash`` and contains: +The files cache is stored in ``cache/files`` and is indexed on the +``file path hash``. At backup time, it is used to quickly determine whether we +need to chunk a given file (or whether it is unchanged and we already have all +its pieces). +It contains: * age -* inode number -* size -* mtime_ns -* chunks hashes +* file inode number +* file size +* file mtime_ns +* file content chunk hashes The inode number is stored to make sure we distinguish between different files, as a single path may not be unique across different archives in different setups. -The file chunk cache is stored as a python associative array storing -python objects, which generate a lot of overhead. This takes around -240 bytes per file without the chunk list, to be compared to at most -64 bytes of real data (depending on data alignment), and around 80 -bytes per chunk hash (vs 32), with a minimum of ~250 bytes even if -only one chunk hash. +The files cache is stored as a python associative array storing +python objects, which generates a lot of overhead. -Indexes memory usage --------------------- +The chunks cache is stored in ``cache/chunks`` and is indexed on the +``chunk id_hash``. It is used to determine whether we already have a specific +chunk, to count references to it and also for statistics. +It contains: -Here is the estimated memory usage of |project_name| when using those -indexes. +* reference count +* size +* encrypted/compressed size -Repository index - 40 bytes x N ~ 200MB (If a remote repository is - used this will be allocated on the remote side) +The repository index is stored in ``repo/index.%d`` and is indexed on the +``chunk id_hash``. It is used to determine a chunk's location in the repository. +It contains: -Chunk lookup index - 44 bytes x N ~ 220MB +* segment (that contains the chunk) +* offset (where the chunk is located in the segment) -File chunk cache - probably 80-100 bytes x N ~ 400MB +The repository index file is random access. + +Hints are stored in a file (``repo/hints.%d``). +It contains: + +* version +* list of segments +* compact + +hints and index can be recreated if damaged or lost using ``check --repair``. + +The chunks cache and the repository index are stored as hash tables, with +only one slot per bucket, but that spreads the collisions to the following +buckets. As a consequence the hash is just a start position for a linear +search, and if the element is not in the table the index is linearly crossed +until an empty bucket is found. + +When the hash table is almost full at 90%, its size is doubled. When it's +almost empty at 25%, its size is halved. So operations on it have a variable +complexity between constant and linear with low factor, and memory overhead +varies between 10% and 300%. + + +Indexes / Caches memory usage +----------------------------- + +Here is the estimated memory usage of |project_name|: + + chunk_count ~= total_file_size / 65536 + + repo_index_usage = chunk_count * 40 + + chunks_cache_usage = chunk_count * 44 + + files_cache_usage = total_file_count * 240 + chunk_count * 80 + + mem_usage ~= repo_index_usage + chunks_cache_usage + files_cache_usage + = total_file_count * 240 + total_file_size / 400 + +All units are Bytes. + +It is assuming every chunk is referenced exactly once and that typical chunk size is 64kiB. + +If a remote repository is used the repo index will be allocated on the remote side. + +E.g. backing up a total count of 1Mi files with a total size of 1TiB: + + mem_usage = 1 * 2**20 * 240 + 1 * 2**40 / 400 = 2.8GiB + +Note: there is a commandline option to switch off the files cache. You'll save +some memory, but it will need to read / chunk all the files then. -In the above we assume 350GB of data that we divide on an average 64KB -chunk size, so N is around 5.3 million. Encryption ---------- -AES_ is used with CTR mode of operation (so no need for padding). A 64 -bits initialization vector is used, a `HMAC-SHA256`_ is computed -on the encrypted chunk with a random 64 bits nonce and both are stored -in the chunk. The header of each chunk is : ``TYPE(1)`` + -``HMAC(32)`` + ``NONCE(8)`` + ``CIPHERTEXT``. Encryption and HMAC use -two different keys. +AES_ is used in CTR mode (so no need for padding). A 64bit initialization +vector is used, a `HMAC-SHA256`_ is computed on the encrypted chunk with a +random 64bit nonce and both are stored in the chunk. +The header of each chunk is : ``TYPE(1)`` + ``HMAC(32)`` + ``NONCE(8)`` + ``CIPHERTEXT``. +Encryption and HMAC use two different keys. -In AES CTR mode you can think of the IV as the start value for the -counter. The counter itself is incremented by one after each 16 byte -block. The IV/counter is not required to be random but it must NEVER be -reused. So to accomplish this |project_name| initializes the encryption counter -to be higher than any previously used counter value before encrypting -new data. +In AES CTR mode you can think of the IV as the start value for the counter. +The counter itself is incremented by one after each 16 byte block. +The IV/counter is not required to be random but it must NEVER be reused. +So to accomplish this |project_name| initializes the encryption counter to be +higher than any previously used counter value before encrypting new data. -To reduce payload size only 8 bytes of the 16 bytes nonce is saved in -the payload, the first 8 bytes are always zeroes. This does not affect -security but limits the maximum repository capacity to only 295 -exabytes (2**64 * 16 bytes). +To reduce payload size, only 8 bytes of the 16 bytes nonce is saved in the +payload, the first 8 bytes are always zeros. This does not affect security but +limits the maximum repository capacity to only 295 exabytes (2**64 * 16 bytes). -Encryption keys are either a passphrase, passed through the -``BORG_PASSPHRASE`` environment or prompted on the commandline, or -stored in automatically generated key files. +Encryption keys are either derived from a passphrase or kept in a key file. +The passphrase is passed through the ``BORG_PASSPHRASE`` environment variable +or prompted for interactive usage. Key files --------- @@ -274,22 +311,20 @@ enc_key the key used to encrypt data with AES (256 bits) enc_hmac_key - the key used to HMAC the resulting AES-encrypted data (256 bits) + the key used to HMAC the encrypted data (256 bits) id_key - the key used to HMAC the above chunks, the resulting hash is - stored out of band (256 bits) + the key used to HMAC the plaintext chunk data to compute the chunk's id chunk_seed the seed for the buzhash chunking table (signed 32 bit integer) -Those fields are processed using msgpack_. The utf-8 encoded phassphrase -is encrypted with PBKDF2_ and SHA256_ using 100000 iterations and a -random 256 bits salt to give us a derived key. The derived key is 256 -bits long. A `HMAC-SHA256`_ checksum of the above fields is generated -with the derived key, then the derived key is also used to encrypt the -above pack of fields. Then the result is stored in a another msgpack_ -formatted as follows: +Those fields are processed using msgpack_. The utf-8 encoded passphrase +is processed with PBKDF2_ (SHA256_, 100000 iterations, random 256 bit salt) +to give us a derived key. The derived key is 256 bits long. +A `HMAC-SHA256`_ checksum of the above fields is generated with the derived +key, then the derived key is also used to encrypt the above pack of fields. +Then the result is stored in a another msgpack_ formatted as follows: version currently always an integer, 1 @@ -315,3 +350,9 @@ The resulting msgpack_ is then encoded using base64 and written to the key file, wrapped using the standard ``textwrap`` module with a header. The header is a single line with a MAGIC string, a space and a hexadecimal representation of the repository id. + + +Compression +----------- + +Currently, zlib level 6 is used as compression. From dd78e1a56e4e2a35bc47030447be6da4cdf50c0c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 11 Jun 2015 22:18:12 +0200 Subject: [PATCH 131/241] improve docs, usage help, changelog --- CHANGES | 76 +++++++++++++++++++++++++------------------ borg/archiver.py | 6 +++- docs/faq.rst | 2 +- docs/installation.rst | 20 +++++------- 4 files changed, 59 insertions(+), 45 deletions(-) diff --git a/CHANGES b/CHANGES index 53fb5f9f7..9a8970519 100644 --- a/CHANGES +++ b/CHANGES @@ -4,53 +4,65 @@ Borg Changelog Version ------------- +Incompatible changes (compared to attic, fork related): + - changed sw name and cli command to "borg", updated docs -- package name and name in urls uses "borgbackup" to have less collisions +- package name (and name in urls) uses "borgbackup" to have less collisions - changed repo / cache internal magic strings from ATTIC* to BORG*, - changed cache location to .cache/borg/ -- give specific path to xattr.is_enabled(), disable symlink setattr call that - always fails -- fix misleading hint the fuse ImportError handler gave, fixes attic #237 -- source: misc. cleanups, pep8, style -- implement check --last N -- check: sort archives in reverse time order + changed cache location to .cache/borg/ - this means that it currently won't + accept attic repos (see issue #21 about improving that) + +Bug fixes: + - avoid defect python-msgpack releases, fixes attic #171, fixes attic #185 -- check unpacked data from RPC for tuple type and correct length, fixes attic #127 -- less memory usage: add global option --no-cache-files - fix traceback when trying to do unsupported passphrase change, fixes attic #189 - datetime does not like the year 10.000, fixes attic #139 -- docs and faq improvements, fixes, updates -- cleanup crypto.pyx, make it easier to adapt to other modes -- extract: if --stdout is given, write all extracted binary data to stdout +- fix "info" all archives stats, fixes attic #183 +- fix parsing with missing microseconds, fixes attic #282 +- fix misleading hint the fuse ImportError handler gave, fixes attic #237 +- check unpacked data from RPC for tuple type and correct length, fixes attic #127 +- fix Repository._active_txn state when lock upgrade fails +- give specific path to xattr.is_enabled(), disable symlink setattr call that + always fails +- fix test setup for 32bit platforms, partial fix for attic #196 +- upgraded versioneer, PEP440 compliance, fixes attic #257 + +New features: + +- less memory usage: add global option --no-cache-files +- check --last N (only check the last N archives) +- check: sort archives in reverse time order +- rename repo::oldname newname (rename repository) +- create -v output more informative +- create --progress (backup progress indicator) +- create --timestamp (utc string or reference file/dir) - create: if "-" is given as path, read binary from stdin -- do os.fsync like recommended in the python docs +- extract: if --stdout is given, write all extracted binary data to stdout +- extract --sparse (simple sparse file support) - extra debug information for 'fread failed' +- delete (deletes whole repo + local cache) - FUSE: reflect deduplication in allocated blocks - only allow whitelisted RPC calls in server mode - normalize source/exclude paths before matching -- fix "info" all archives stats, fixes attic #183 -- implement create --timestamp, utc string or reference file/dir -- simple sparse file support (extract --sparse) -- fix parsing with missing microseconds, fixes attic #282 - use posix_fadvise to not spoil the OS cache, fixes attic #252 -- source: Let chunker optionally work with os-level file descriptor. -- source: Linux: remove duplicate os.fsencode calls -- fix test setup for 32bit platforms, partial fix for attic #196 -- source: refactor _open_rb code a bit, so it is more consistent / regular -- implement rename repo::oldname newname -- implement create --progress -- source: refactor indicator (status) and item processing -- implement delete repo (also deletes local cache) -- better create -v output -- upgraded versioneer, PEP440 compliance, fixes attic #257 -- source: use py.test for better testing, flake8 for code style checks -- source: fix tox >=2.0 compatibility - toplevel error handler: show tracebacks for better error analysis - sigusr1 / sigint handler to print current file infos - attic PR #286 -- pypi package: add python version classifiers, add FreeBSD to platforms -- fix Repository._active_txn state when lock upgrade fails - RPCError: include the exception args we get from remote +Other changes: + +- source: misc. cleanups, pep8, style +- docs and faq improvements, fixes, updates +- cleanup crypto.pyx, make it easier to adapt to other AES modes +- do os.fsync like recommended in the python docs +- source: Let chunker optionally work with os-level file descriptor. +- source: Linux: remove duplicate os.fsencode calls +- source: refactor _open_rb code a bit, so it is more consistent / regular +- source: refactor indicator (status) and item processing +- source: use py.test for better testing, flake8 for code style checks +- source: fix tox >=2.0 compatibility (test runner) +- pypi package: add python version classifiers, add FreeBSD to platforms + Attic Changelog =============== diff --git a/borg/archiver.py b/borg/archiver.py index 438418abb..9d984d5cc 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -515,8 +515,12 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") parser = argparse.ArgumentParser(description='Borg %s - Deduplicated Backups' % __version__) subparsers = parser.add_subparsers(title='Available commands') + serve_epilog = textwrap.dedent(""" + This command starts a repository server process. This command is usually not used manually. + """) subparser = subparsers.add_parser('serve', parents=[common_parser], - description=self.do_serve.__doc__) + description=self.do_serve.__doc__, epilog=serve_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter) subparser.set_defaults(func=self.do_serve) subparser.add_argument('--restrict-to-path', dest='restrict_to_paths', action='append', metavar='PATH', help='restrict repository access to PATH') diff --git a/docs/faq.rst b/docs/faq.rst index 180ac0edb..ddfb0c834 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -51,7 +51,7 @@ Which file types, attributes, etc. are *not* preserved? recreate them in any case). So, don't panic if your backup misses a UDS! * The precise on-disk representation of the holes in a sparse file. Archive creation has no special support for sparse files, holes are - backed up up as (deduplicated and compressed) runs of zero bytes. + backed up as (deduplicated and compressed) runs of zero bytes. Archive extraction has optional support to extract all-zero chunks as holes in a sparse file. diff --git a/docs/installation.rst b/docs/installation.rst index 58073a7a8..dc28fc34d 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -62,21 +62,19 @@ Some of the steps detailled below might be useful also for non-git installs. # optional: for unit testing apt-get install fakeroot - # install virtualenv tool, create and activate a virtual env - apt-get install python-virtualenv - virtualenv --python=python3 borg-env - source borg-env/bin/activate # always do this before using! - - # install some dependencies into virtual env - pip install cython # to compile .pyx -> .c - pip install tox pytest # optional, for running unit tests - pip install sphinx # optional, to build the docs - # get |project_name| from github, install it git clone |git_url| + + apt-get install python-virtualenv + virtualenv --python=python3 borg-env + source borg-env/bin/activate # always before using! + + # install borg + dependencies into virtualenv + pip install cython # compile .pyx -> .c + pip install tox pytest # optional, for running unit tests + pip install sphinx # optional, to build the docs cd borg pip install -e . # in-place editable mode # optional: run all the tests, on all supported Python versions fakeroot -u tox - From 21cfdde73b3e8dea4f6bfb8db192e183af958a58 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 11 Jun 2015 22:26:02 +0200 Subject: [PATCH 132/241] adjust docs theme colours for note divs --- docs/_themes/local/static/local.css_t | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/_themes/local/static/local.css_t b/docs/_themes/local/static/local.css_t index d3ae46596..4395cc97b 100644 --- a/docs/_themes/local/static/local.css_t +++ b/docs/_themes/local/static/local.css_t @@ -161,8 +161,8 @@ p.admonition-title:after { } div.note { - background-color: #0f5; - border-bottom: 2px solid #d22; + background-color: #002211; + border-bottom: 2px solid #22dd22; } div.seealso { From d1f56d480b8ac2c46fc577ce42e07310877762a6 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 11 Jun 2015 22:29:02 +0200 Subject: [PATCH 133/241] update AUTHORS --- AUTHORS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index be47591b2..6b2891eea 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,6 +1,7 @@ Borg Developers / Contributors ("The Borg Collective") `````````````````````````````````````````````````````` -- Thomas Waldmann +- Thomas Waldmann +- Antoine Beaupré Borg is a fork of Attic. Attic is written and maintained From e92d94cb07e4b71fb909e9fa84c1771cd5bdf354 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 11 Jun 2015 22:54:33 +0200 Subject: [PATCH 134/241] rephrase compatibility note --- README.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 214b7fb04..a88732b68 100644 --- a/README.rst +++ b/README.rst @@ -10,8 +10,12 @@ are stored. Borg is a fork of Attic and maintained by "The Borg Collective" (see AUTHORS file). BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC. -UNTIL FURTHER NOTICE, EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY. -THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF IF IT FITS YOUR NEEDS. +EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY WHEN MAJOR RELEASE NUMBER +CHANGES (like when going from 0.x.y to 1.0.0). Please read CHANGES document. + +NOT RELEASED DEVELOPMENT VERSIONS HAVE UNKNOWN COMPATIBILITY PROPERTIES. + +THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. Read issue #1 on the issue tracker, goals are being defined there. From a3b5a1bebac588c24ff3b5166f39e50a085bb0e1 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 11 Jun 2015 22:56:53 +0200 Subject: [PATCH 135/241] add first borg release version number to CHANGES --- CHANGES | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index 9a8970519..811ee6939 100644 --- a/CHANGES +++ b/CHANGES @@ -1,8 +1,8 @@ Borg Changelog ============== -Version -------------- +Version 0.23.0 +-------------- Incompatible changes (compared to attic, fork related): From c7da105fd0a75eec0454762df1dcc84510a2d813 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 11 Jun 2015 23:04:31 +0200 Subject: [PATCH 136/241] add AUTHORS to MANIFEST.in, so it gets included in releases --- MANIFEST.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 9ac968909..480b1088a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ -include README.rst LICENSE CHANGES MANIFEST.in versioneer.py +include README.rst AUTHORS LICENSE CHANGES MANIFEST.in versioneer.py recursive-include borg *.pyx recursive-include docs * recursive-exclude docs *.pyc From 98a015772ddb88005edc7d0ba68f859fe7c86fdd Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 12 Jun 2015 00:05:35 +0200 Subject: [PATCH 137/241] forgot to list some 0.23.0 stuff in CHANGES --- CHANGES | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/CHANGES b/CHANGES index 811ee6939..743abbce3 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,25 @@ Borg Changelog ============== + +Version 0.23.1 +-------------- + +Forgot to list some stuff implemented in 0.23.0, here they are: + +New features: + +- efficient archive list from manifest, meaning a big speedup for slow + repo connections and "list ", "delete ", "prune" +- big speedup for chunks cache sync (esp. for slow repo connections), fixes #18 +- hashindex: improve error messages + +Other changes: + +- explicitely specify binary mode to open binary files +- some easy micro optimizations + + Version 0.23.0 -------------- From 9880ac7ba8d220a2a4da9d076e87e874501876c4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 12 Jun 2015 00:11:16 +0200 Subject: [PATCH 138/241] README: mention pytest --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index a88732b68..3668d38cf 100644 --- a/README.rst +++ b/README.rst @@ -70,7 +70,7 @@ Where are the tests? The tests are in the borg/testsuite package. To run the test suite use the following command:: - $ fakeroot -u tox # you need to have tox installed + $ fakeroot -u tox # you need to have tox and pytest installed .. |build| image:: https://travis-ci.org/borgbackup/borg.svg :alt: Build Status From 1dc00e79375c624beb198e0b2f3b13c3eab99161 Mon Sep 17 00:00:00 2001 From: Per Guth Date: Mon, 15 Jun 2015 13:09:39 +0200 Subject: [PATCH 139/241] Update installation.rst please test on fresh installation. --- docs/installation.rst | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/docs/installation.rst b/docs/installation.rst index dc28fc34d..d5b1d5dd2 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -78,3 +78,29 @@ Some of the steps detailled below might be useful also for non-git installs. # optional: run all the tests, on all supported Python versions fakeroot -u tox + + +Korora / Fedora 21 installation (from git) +--------------------------------------- +Note: this uses latest, unreleased development code from git. +While we try not to break master, there are no guarantees on anything. + +Some of the steps detailled below might be useful also for non-git installs. + +.. parsed-literal:: + sudo dnf install libacl-devel + + # get |project_name| from github, install it + git clone |git_url| + virtualenv --python=python3 borg-env + source borg-env/bin/activate + + # install borg + dependencies into virtualenv + pip install cython # compile .pyx -> .c + pip install tox pytest # optional, for running unit tests + pip install sphinx # optional, to build the docs + cd borg + pip install -e . # in-place editable mode + + # optional: run all the tests, on all supported Python versions + fakeroot -u tox From 041357e48a08d7994521ad9e05d560ce734009d7 Mon Sep 17 00:00:00 2001 From: Per Guth Date: Mon, 15 Jun 2015 14:39:04 +0200 Subject: [PATCH 140/241] Update installation.rst Replicated Ubuntus dependency list. --- docs/installation.rst | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index d5b1d5dd2..76aa56b04 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -88,19 +88,34 @@ While we try not to break master, there are no guarantees on anything. Some of the steps detailled below might be useful also for non-git installs. .. parsed-literal:: - sudo dnf install libacl-devel + # Python 3.x (>= 3.2) + Headers, Py Package Installer + sudo dnf install python3 python3-devel python3-pip + + # we need OpenSSL + Headers for Crypto + sudo dnf install openssl-devel openssl + + # ACL support Headers + Library + sudo dnf install libacl-devel libacl + + # optional: lowlevel FUSE py binding - to mount backup archives + sudo dnf install python3-llfuse fuse + + # optional: for unit testing + sudo dnf install fakeroot # get |project_name| from github, install it git clone |git_url| + + dnf install python3-virtualenv virtualenv --python=python3 borg-env - source borg-env/bin/activate - + source borg-env/bin/activate # always before using! + # install borg + dependencies into virtualenv pip install cython # compile .pyx -> .c pip install tox pytest # optional, for running unit tests pip install sphinx # optional, to build the docs cd borg pip install -e . # in-place editable mode - + # optional: run all the tests, on all supported Python versions fakeroot -u tox From e168b41406aebb98e1e0bcd14406c8741f38cdb0 Mon Sep 17 00:00:00 2001 From: Per Guth Date: Mon, 15 Jun 2015 14:40:11 +0200 Subject: [PATCH 141/241] Update installation.rst --- docs/installation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/installation.rst b/docs/installation.rst index 76aa56b04..11f13fc7a 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -81,7 +81,7 @@ Some of the steps detailled below might be useful also for non-git installs. Korora / Fedora 21 installation (from git) ---------------------------------------- +------------------------------------------ Note: this uses latest, unreleased development code from git. While we try not to break master, there are no guarantees on anything. From 2743ab1593e919291fd40bff187e4a6980b0535c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 18 Jun 2015 23:18:05 +0200 Subject: [PATCH 142/241] better Exception msg if there is no Borg installed on the remote repository server (still a bit ugly to get even 2 tracebacks) --- borg/remote.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/borg/remote.py b/borg/remote.py index 5d59e14ac..81d4ace05 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -141,7 +141,10 @@ class RemoteRepository: self.r_fds = [self.stdout_fd] self.x_fds = [self.stdin_fd, self.stdout_fd] - version = self.call('negotiate', 1) + try: + version = self.call('negotiate', 1) + except ConnectionClosed: + raise Exception('Server immediately closed connection - is Borg installed and working on the server?') if version != 1: raise Exception('Server insisted on using unsupported protocol version %d' % version) self.id = self.call('open', location.path, create) From 6d0a00496afaf82f45dd066507d073bf7a578eca Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 19 Jun 2015 23:53:23 +0200 Subject: [PATCH 143/241] determine and report chunk counts in chunks index borg info repo::archive now reports unique chunks count, total chunks count also: use index->key_size instead of hardcoded value --- borg/_hashindex.c | 12 +++++++++--- borg/hashindex.pyx | 12 ++++++++---- borg/helpers.py | 5 ++++- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/borg/_hashindex.c b/borg/_hashindex.c index 9fd31d56b..2eebd09d9 100644 --- a/borg/_hashindex.c +++ b/borg/_hashindex.c @@ -361,14 +361,18 @@ hashindex_get_size(HashIndex *index) } static void -hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *total_unique_size, long long *total_unique_csize) +hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, + long long *total_unique_size, long long *total_unique_csize, + long long *total_unique_chunks, long long *total_chunks) { - int64_t size = 0, csize = 0, unique_size = 0, unique_csize = 0; + int64_t size = 0, csize = 0, unique_size = 0, unique_csize = 0, chunks = 0, unique_chunks = 0; const int32_t *values; void *key = NULL; while((key = hashindex_next_key(index, key))) { - values = key + 32; + values = key + index->key_size; + unique_chunks++; + chunks += values[0]; unique_size += values[1]; unique_csize += values[2]; size += values[0] * values[1]; @@ -378,4 +382,6 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs *total_csize = csize; *total_unique_size = unique_size; *total_unique_csize = unique_csize; + *total_unique_chunks = unique_chunks; + *total_chunks = chunks; } diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index 13f9da93c..d5d4b6f45 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -11,7 +11,9 @@ cdef extern from "_hashindex.c": HashIndex *hashindex_read(char *path) HashIndex *hashindex_init(int capacity, int key_size, int value_size) void hashindex_free(HashIndex *index) - void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize) + void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, + long long *unique_size, long long *unique_csize, + long long *total_unique_chunks, long long *total_chunks) int hashindex_get_size(HashIndex *index) int hashindex_write(HashIndex *index, char *path) void *hashindex_get(HashIndex *index, void *key) @@ -179,9 +181,11 @@ cdef class ChunkIndex(IndexBase): return iter def summarize(self): - cdef long long total_size, total_csize, unique_size, unique_csize - hashindex_summarize(self.index, &total_size, &total_csize, &unique_size, &unique_csize) - return total_size, total_csize, unique_size, unique_csize + cdef long long total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks + hashindex_summarize(self.index, &total_size, &total_csize, + &unique_size, &unique_csize, + &total_unique_chunks, &total_chunks) + return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks cdef class ChunkKeyIterator: diff --git a/borg/helpers.py b/borg/helpers.py index e97c88bf2..0a3b84d2f 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -174,11 +174,14 @@ class Statistics: self.usize += csize def print_(self, label, cache): - total_size, total_csize, unique_size, unique_csize = cache.chunks.summarize() + total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks = cache.chunks.summarize() print() print(' Original size Compressed size Deduplicated size') print('%-15s %20s %20s %20s' % (label, format_file_size(self.osize), format_file_size(self.csize), format_file_size(self.usize))) print('All archives: %20s %20s %20s' % (format_file_size(total_size), format_file_size(total_csize), format_file_size(unique_csize))) + print() + print(' Unique chunks Total chunks') + print('Chunk index: %20d %20d' % (total_unique_chunks, total_chunks)) def show_progress(self, item=None, final=False): if not final: From 3b9b976f2af1986a66ede2c6157e8d01a5169168 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 20 Jun 2015 01:20:46 +0200 Subject: [PATCH 144/241] borg create --chunker-params=... --- borg/archive.py | 19 ++++++++++++------- borg/archiver.py | 11 ++++++++--- borg/helpers.py | 5 +++++ 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index e6d557479..a053f79e9 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -21,10 +21,12 @@ from .helpers import parse_timestamp, Error, uid2user, user2uid, gid2group, grou Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe, StableDict, int_to_bigint, bigint_to_int ITEMS_BUFFER = 1024 * 1024 + CHUNK_MIN = 1024 CHUNK_MAX = 10 * 1024 * 1024 WINDOW_SIZE = 0xfff CHUNK_MASK = 0xffff +CHUNKER_PARAMS = (WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX) ZEROS = b'\0' * CHUNK_MAX @@ -69,12 +71,13 @@ class DownloadPipeline: class ChunkBuffer: BUFFER_SIZE = 1 * 1024 * 1024 - def __init__(self, key): + def __init__(self, key, chunker_params=CHUNKER_PARAMS): self.buffer = BytesIO() self.packer = msgpack.Packer(unicode_errors='surrogateescape') self.chunks = [] self.key = key - self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX,self.key.chunk_seed) + chunker_params += (self.key.chunk_seed, ) + self.chunker = Chunker(*chunker_params) def add(self, item): self.buffer.write(self.packer.pack(StableDict(item))) @@ -104,8 +107,8 @@ class ChunkBuffer: class CacheChunkBuffer(ChunkBuffer): - def __init__(self, cache, key, stats): - super(CacheChunkBuffer, self).__init__(key) + def __init__(self, cache, key, stats, chunker_params=CHUNKER_PARAMS): + super(CacheChunkBuffer, self).__init__(key, chunker_params) self.cache = cache self.stats = stats @@ -127,7 +130,8 @@ class Archive: def __init__(self, repository, key, manifest, name, cache=None, create=False, - checkpoint_interval=300, numeric_owner=False, progress=False): + checkpoint_interval=300, numeric_owner=False, progress=False, + chunker_params=CHUNKER_PARAMS): self.cwd = os.getcwd() self.key = key self.repository = repository @@ -142,8 +146,9 @@ class Archive: self.numeric_owner = numeric_owner self.pipeline = DownloadPipeline(self.repository, self.key) if create: - self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats) - self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX, self.key.chunk_seed) + self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats, chunker_params) + chunker_params += (self.key.chunk_seed, ) + self.chunker = Chunker(*chunker_params) if name in manifest.archives: raise self.AlreadyExists(name) self.last_checkpoint = time.time() diff --git a/borg/archiver.py b/borg/archiver.py index 9d984d5cc..3c5ada4fe 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -13,7 +13,7 @@ import textwrap import traceback from . import __version__ -from .archive import Archive, ArchiveChecker +from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS from .repository import Repository from .cache import Cache from .key import key_creator @@ -21,7 +21,7 @@ from .helpers import Error, location_validator, format_time, format_file_size, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ - is_cachedir, bigint_to_int + is_cachedir, bigint_to_int, ChunkerParams from .remote import RepositoryServer, RemoteRepository @@ -104,7 +104,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") cache = Cache(repository, key, manifest, do_files=args.cache_files) archive = Archive(repository, key, manifest, args.archive.archive, cache=cache, create=True, checkpoint_interval=args.checkpoint_interval, - numeric_owner=args.numeric_owner, progress=args.progress) + numeric_owner=args.numeric_owner, progress=args.progress, + chunker_params=args.chunker_params) # Add cache dir to inode_skip list skip_inodes = set() try: @@ -625,6 +626,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") metavar='yyyy-mm-ddThh:mm:ss', help='manually specify the archive creation date/time (UTC). ' 'alternatively, give a reference file/directory.') + subparser.add_argument('--chunker-params', dest='chunker_params', + type=ChunkerParams, default=CHUNKER_PARAMS, + metavar='WINDOW_SIZE,CHUNK_MASK,CHUNK_MIN,CHUNK_MAX', + help='specify the chunker parameters. default: %r' % (CHUNKER_PARAMS, )) subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to create') diff --git a/borg/helpers.py b/borg/helpers.py index 0a3b84d2f..1f1612d3a 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -313,6 +313,11 @@ def timestamp(s): raise ValueError +def ChunkerParams(s): + window_size, chunk_mask, chunk_min, chunk_max = s.split(',') + return int(window_size), int(chunk_mask), int(chunk_min), int(chunk_max) + + def is_cachedir(path): """Determines whether the specified path is a cache directory (and therefore should potentially be excluded from the backup) according to From 54e8dd8419cd783262cefce5c12982b59992c786 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 21 Jun 2015 01:46:41 +0200 Subject: [PATCH 145/241] misc chunker parameter changes - use power-of-2 sizes / n bit hash mask so one can give them more easily - chunker api: give seed first, so we can give *chunker_params after it - fix some tests that aren't possible with 2^N - make sparse file extraction zero detection flexible for variable chunk max size --- borg/archive.py | 21 ++++++++++----------- borg/archiver.py | 4 ++-- borg/chunker.pyx | 7 +++++-- borg/testsuite/archiver.py | 4 ++-- borg/testsuite/chunker.py | 26 +++++++++++++------------- docs/usage.rst | 3 +++ 6 files changed, 35 insertions(+), 30 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index a053f79e9..83ac6aecb 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -22,13 +22,13 @@ from .helpers import parse_timestamp, Error, uid2user, user2uid, gid2group, grou ITEMS_BUFFER = 1024 * 1024 -CHUNK_MIN = 1024 -CHUNK_MAX = 10 * 1024 * 1024 -WINDOW_SIZE = 0xfff -CHUNK_MASK = 0xffff -CHUNKER_PARAMS = (WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX) +CHUNK_MIN_EXP = 10 # 2**10 == 1kiB +CHUNK_MAX_EXP = 23 # 2**23 == 8MiB +HASH_WINDOW_SIZE = 0xfff # 4095B +HASH_MASK_BITS = 16 # results in ~64kiB chunks statistically -ZEROS = b'\0' * CHUNK_MAX +# defaults, use --chunker-params to override +CHUNKER_PARAMS = (CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE) utime_supports_fd = os.utime in getattr(os, 'supports_fd', {}) utime_supports_follow_symlinks = os.utime in getattr(os, 'supports_follow_symlinks', {}) @@ -76,8 +76,7 @@ class ChunkBuffer: self.packer = msgpack.Packer(unicode_errors='surrogateescape') self.chunks = [] self.key = key - chunker_params += (self.key.chunk_seed, ) - self.chunker = Chunker(*chunker_params) + self.chunker = Chunker(self.key.chunk_seed, *chunker_params) def add(self, item): self.buffer.write(self.packer.pack(StableDict(item))) @@ -147,8 +146,7 @@ class Archive: self.pipeline = DownloadPipeline(self.repository, self.key) if create: self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats, chunker_params) - chunker_params += (self.key.chunk_seed, ) - self.chunker = Chunker(*chunker_params) + self.chunker = Chunker(self.key.chunk_seed, *chunker_params) if name in manifest.archives: raise self.AlreadyExists(name) self.last_checkpoint = time.time() @@ -163,6 +161,7 @@ class Archive: raise self.DoesNotExist(name) info = self.manifest.archives[name] self.load(info[b'id']) + self.zeros = b'\0' * (1 << chunker_params[1]) def _load_meta(self, id): data = self.key.decrypt(id, self.repository.get(id)) @@ -291,7 +290,7 @@ class Archive: with open(path, 'wb') as fd: ids = [c[0] for c in item[b'chunks']] for data in self.pipeline.fetch_many(ids, is_preloaded=True): - if sparse and ZEROS.startswith(data): + if sparse and self.zeros.startswith(data): # all-zero chunk: create a hole in a sparse file fd.seek(len(data), 1) else: diff --git a/borg/archiver.py b/borg/archiver.py index 3c5ada4fe..e18c1b277 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -628,8 +628,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") 'alternatively, give a reference file/directory.') subparser.add_argument('--chunker-params', dest='chunker_params', type=ChunkerParams, default=CHUNKER_PARAMS, - metavar='WINDOW_SIZE,CHUNK_MASK,CHUNK_MIN,CHUNK_MAX', - help='specify the chunker parameters. default: %r' % (CHUNKER_PARAMS, )) + metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE', + help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS) subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to create') diff --git a/borg/chunker.pyx b/borg/chunker.pyx index 88ccd630d..1d4897db1 100644 --- a/borg/chunker.pyx +++ b/borg/chunker.pyx @@ -20,8 +20,11 @@ cdef extern from "_chunker.c": cdef class Chunker: cdef _Chunker *chunker - def __cinit__(self, window_size, chunk_mask, min_size, max_size, seed): - self.chunker = chunker_init(window_size, chunk_mask, min_size, max_size, seed & 0xffffffff) + def __cinit__(self, seed, chunk_min_exp, chunk_max_exp, hash_mask_bits, hash_window_size): + min_size = 1 << chunk_min_exp + max_size = 1 << chunk_max_exp + hash_mask = (1 << hash_mask_bits) - 1 + self.chunker = chunker_init(hash_window_size, hash_mask, min_size, max_size, seed & 0xffffffff) def chunkify(self, fd, fh=-1): """ diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index b35df2477..03427008a 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -12,7 +12,7 @@ import unittest from hashlib import sha256 from .. import xattr -from ..archive import Archive, ChunkBuffer, CHUNK_MAX +from ..archive import Archive, ChunkBuffer, CHUNK_MAX_EXP from ..archiver import Archiver from ..cache import Cache from ..crypto import bytes_to_long, num_aes_blocks @@ -213,7 +213,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): sparse_support = sys.platform != 'darwin' filename = os.path.join(self.input_path, 'sparse') content = b'foobar' - hole_size = 5 * CHUNK_MAX # 5 full chunker buffers + hole_size = 5 * (1 << CHUNK_MAX_EXP) # 5 full chunker buffers with open(filename, 'wb') as fd: # create a file that has a hole at the beginning and end (if the # OS and filesystem supports sparse files) diff --git a/borg/testsuite/chunker.py b/borg/testsuite/chunker.py index 982e4dd20..9b12901eb 100644 --- a/borg/testsuite/chunker.py +++ b/borg/testsuite/chunker.py @@ -1,27 +1,27 @@ from io import BytesIO from ..chunker import Chunker, buzhash, buzhash_update -from ..archive import CHUNK_MAX +from ..archive import CHUNK_MAX_EXP from . import BaseTestCase class ChunkerTestCase(BaseTestCase): def test_chunkify(self): - data = b'0' * int(1.5 * CHUNK_MAX) + b'Y' - parts = [bytes(c) for c in Chunker(2, 0x3, 2, CHUNK_MAX, 0).chunkify(BytesIO(data))] + data = b'0' * int(1.5 * (1 << CHUNK_MAX_EXP)) + b'Y' + parts = [bytes(c) for c in Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(data))] self.assert_equal(len(parts), 2) self.assert_equal(b''.join(parts), data) - self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, CHUNK_MAX, 0).chunkify(BytesIO(b''))], []) - self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, CHUNK_MAX, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz']) - self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, CHUNK_MAX, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz']) - self.assert_equal([bytes(c) for c in Chunker(2, 0x3, 2, CHUNK_MAX, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz']) - self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, CHUNK_MAX, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3]) - self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, CHUNK_MAX, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boo', b'bazfo', b'obar', b'boo', b'bazfo', b'obar', b'boobaz']) - self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 3, CHUNK_MAX, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foo', b'barboobaz', b'foo', b'barboobaz', b'foo', b'barboobaz']) - self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, CHUNK_MAX, 0).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3]) - self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, CHUNK_MAX, 1).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz']) - self.assert_equal([bytes(c) for c in Chunker(3, 0x3, 4, CHUNK_MAX, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz']) + self.assert_equal([bytes(c) for c in Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b''))], []) + self.assert_equal([bytes(c) for c in Chunker(0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz']) + self.assert_equal([bytes(c) for c in Chunker(1, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz']) + self.assert_equal([bytes(c) for c in Chunker(2, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz']) + self.assert_equal([bytes(c) for c in Chunker(0, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3]) + self.assert_equal([bytes(c) for c in Chunker(1, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz']) + self.assert_equal([bytes(c) for c in Chunker(2, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz']) + self.assert_equal([bytes(c) for c in Chunker(0, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz' * 3]) + self.assert_equal([bytes(c) for c in Chunker(1, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarbo', b'obazfoobar', b'boobazfo', b'obarboobaz']) + self.assert_equal([bytes(c) for c in Chunker(2, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b'foobarboobaz' * 3))], [b'foobarboobaz', b'foobarboobaz', b'foobarboobaz']) def test_buzhash(self): self.assert_equal(buzhash(b'abcdefghijklmnop', 0), 3795437769) diff --git a/docs/usage.rst b/docs/usage.rst index 971a467b9..688bd255b 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -50,6 +50,9 @@ Examples NAME="root-`date +%Y-%m-%d`" $ borg create /mnt/backup::$NAME / --do-not-cross-mountpoints + # Backup huge files with little chunk management overhead + $ borg create --chunker-params 19,23,21,4095 /mnt/backup::VMs /srv/VMs + .. include:: usage/extract.rst.inc From 41a37e77db59e86589253fe502d2add8bbec4b93 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 21 Jun 2015 02:11:02 +0200 Subject: [PATCH 146/241] add a misc docs directory, add a usecase for --chunker-params --- docs/misc/create_chunker-params.txt | 116 ++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 docs/misc/create_chunker-params.txt diff --git a/docs/misc/create_chunker-params.txt b/docs/misc/create_chunker-params.txt new file mode 100644 index 000000000..73cac6a3b --- /dev/null +++ b/docs/misc/create_chunker-params.txt @@ -0,0 +1,116 @@ +About borg create --chunker-params +================================== + +--chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE + +CHUNK_MIN_EXP and CHUNK_MAX_EXP give the exponent N of the 2^N minimum and +maximum chunk size. Required: CHUNK_MIN_EXP < CHUNK_MAX_EXP. + +Defaults: 10 (2^10 == 1KiB) minimum, 23 (2^23 == 8MiB) maximum. + +HASH_MASK_BITS is the number of least-significant bits of the rolling hash +that need to be zero to trigger a chunk cut. +Recommended: CHUNK_MIN_EXP + X <= HASH_MASK_BITS <= CHUNK_MAX_EXP - X, X >= 2 +(this allows the rolling hash some freedom to make its cut at a place +determined by the windows contents rather than the min/max. chunk size). + +Default: 16 (statistically, chunks will be about 2^16 == 64kiB in size) + +HASH_WINDOW_SIZE: the size of the window used for the rolling hash computation. +Default: 4095B + + +Trying it out +============= + +I backed up a VM directory to demonstrate how different chunker parameters +influence repo size, index size / chunk count, compression, deduplication. + +repo-sm: ~64kiB chunks (16 bits chunk mask), min chunk size 1kiB (2^10B) + (these are attic / borg 0.23 internal defaults) + +repo-lg: ~1MiB chunks (20 bits chunk mask), min chunk size 64kiB (2^16B) + +repo-xl: 8MiB chunks (2^23B max chunk size), min chunk size 64kiB (2^16B). + The chunk mask bits was set to 31, so it (almost) never triggers. + This degrades the rolling hash based dedup to a fixed-offset dedup + as the cutting point is now (almost) always the end of the buffer + (at 2^23B == 8MiB). + +The repo index size is an indicator for the RAM needs of Borg. +In this special case, the total RAM needs are about 2.1x the repo index size. +You see index size of repo-sm is 16x larger than of repo-lg, which corresponds +to the ratio of the different target chunk sizes. + +Note: RAM needs were not a problem in this specific case (37GB data size). + But just imagine, you have 37TB of such data and much less than 42GB RAM, + then you'ld definitely want the "lg" chunker params so you only need + 2.6GB RAM. Or even bigger chunks than shown for "lg" (see "xl"). + +You also see compression works better for larger chunks, as expected. +Duplication works worse for larger chunks, also as expected. + +small chunks +============ + +$ borg info /extra/repo-sm::1 + +Command line: /home/tw/w/borg-env/bin/borg create --chunker-params 10,23,16,4095 /extra/repo-sm::1 /home/tw/win +Number of files: 3 + + Original size Compressed size Deduplicated size +This archive: 37.12 GB 14.81 GB 12.18 GB +All archives: 37.12 GB 14.81 GB 12.18 GB + + Unique chunks Total chunks +Chunk index: 378374 487316 + +$ ls -l /extra/repo-sm/index* + +-rw-rw-r-- 1 tw tw 20971538 Jun 20 23:39 index.2308 + +$ du -sk /extra/repo-sm +11930840 /extra/repo-sm + +large chunks +============ + +$ borg info /extra/repo-lg::1 + +Command line: /home/tw/w/borg-env/bin/borg create --chunker-params 16,23,20,4095 /extra/repo-lg::1 /home/tw/win +Number of files: 3 + + Original size Compressed size Deduplicated size +This archive: 37.10 GB 14.60 GB 13.38 GB +All archives: 37.10 GB 14.60 GB 13.38 GB + + Unique chunks Total chunks +Chunk index: 25889 29349 + +$ ls -l /extra/repo-lg/index* + +-rw-rw-r-- 1 tw tw 1310738 Jun 20 23:10 index.2264 + +$ du -sk /extra/repo-lg +13073928 /extra/repo-lg + +xl chunks +========= + +(borg-env)tw@tux:~/w/borg$ borg info /extra/repo-xl::1 +Command line: /home/tw/w/borg-env/bin/borg create --chunker-params 16,23,31,4095 /extra/repo-xl::1 /home/tw/win +Number of files: 3 + + Original size Compressed size Deduplicated size +This archive: 37.10 GB 14.59 GB 14.59 GB +All archives: 37.10 GB 14.59 GB 14.59 GB + + Unique chunks Total chunks +Chunk index: 4319 4434 + +$ ls -l /extra/repo-xl/index* +-rw-rw-r-- 1 tw tw 327698 Jun 21 00:52 index.2011 + +$ du -sk /extra/repo-xl/ +14253464 /extra/repo-xl/ + From 2944bbe6c611fe1271ebb3994622bbd09596e30f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 21 Jun 2015 02:36:22 +0200 Subject: [PATCH 147/241] update CHANGES --- CHANGES | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index 743abbce3..2acb58115 100644 --- a/CHANGES +++ b/CHANGES @@ -2,10 +2,31 @@ Borg Changelog ============== -Version 0.23.1 +Version 0.24.0 -------------- -Forgot to list some stuff implemented in 0.23.0, here they are: +New features: + +- borg create --chunker-params ... to configure the chunker. + See docs/misc/create_chunker-params.txt for more information. +- borg info now reports chunk counts in the chunk index. + +Bug fixes: + +- reduce memory usage, see --chunker-params, fixes #16. + This can be used to reduce chunk management overhead, so borg does not create + a huge chunks index/repo index and eats all your RAM if you back up lots of + data in huge files (like VM disk images). +- better Exception msg if there is no Borg installed on the remote repo server. + +Other changes: + +- Fedora/Fedora-based install instructions added to docs. +- added docs/misc directory for misc. writeups that won't be included "as is" + into the html docs. + + +I forgot to list some stuff already implemented in 0.23.0, here they are: New features: @@ -16,7 +37,7 @@ New features: Other changes: -- explicitely specify binary mode to open binary files +- explicitly specify binary mode to open binary files - some easy micro optimizations From 6964799d13e28077fec6e9310e2c43f596af125d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 25 Jun 2015 22:16:23 +0200 Subject: [PATCH 148/241] borg create --compression 0..9 for variable compression --- borg/archiver.py | 5 ++ borg/key.py | 5 +- docs/misc/create_compression.txt | 130 +++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 2 deletions(-) create mode 100644 docs/misc/create_compression.txt diff --git a/borg/archiver.py b/borg/archiver.py index e18c1b277..3cd588b84 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -101,6 +101,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") t0 = datetime.now() repository = self.open_repository(args.archive, exclusive=True) manifest, key = Manifest.load(repository) + key.compression_level = args.compression cache = Cache(repository, key, manifest, do_files=args.cache_files) archive = Archive(repository, key, manifest, args.archive.archive, cache=cache, create=True, checkpoint_interval=args.checkpoint_interval, @@ -630,6 +631,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") type=ChunkerParams, default=CHUNKER_PARAMS, metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE', help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS) + subparser.add_argument('-C', '--compression', dest='compression', + type=int, default=0, metavar='N', + help='select compression algorithm and level. 0..9 is supported and means zlib ' + 'level 0 (no compression, fast, default) .. zlib level 9 (high compression, slow).') subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to create') diff --git a/borg/key.py b/borg/key.py index 3e44b092e..b13295101 100644 --- a/borg/key.py +++ b/borg/key.py @@ -53,6 +53,7 @@ class KeyBase: def __init__(self): self.TYPE_STR = bytes([self.TYPE]) + self.compression_level = 0 def id_hash(self, data): """Return HMAC hash using the "id" HMAC key @@ -83,7 +84,7 @@ class PlaintextKey(KeyBase): return sha256(data).digest() def encrypt(self, data): - return b''.join([self.TYPE_STR, zlib.compress(data)]) + return b''.join([self.TYPE_STR, zlib.compress(data, self.compression_level)]) def decrypt(self, id, data): if data[0] != self.TYPE: @@ -115,7 +116,7 @@ class AESKeyBase(KeyBase): return HMAC(self.id_key, data, sha256).digest() def encrypt(self, data): - data = zlib.compress(data) + data = zlib.compress(data, self.compression_level) self.enc_cipher.reset() data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data))) hmac = HMAC(self.enc_hmac_key, data, sha256).digest() diff --git a/docs/misc/create_compression.txt b/docs/misc/create_compression.txt new file mode 100644 index 000000000..89ffdf4d9 --- /dev/null +++ b/docs/misc/create_compression.txt @@ -0,0 +1,130 @@ +data compression +================ + +borg create --compression N repo::archive data + +Currently, borg only supports zlib compression. There are plans to expand this +to other, faster or better compression algorithms in the future. + +N == 0 -> zlib level 0 == very quick, no compression +N == 1 -> zlib level 1 == quick, low compression +... +N == 9 -> zlib level 9 == slow, high compression + +Measurements made on a Haswell Ultrabook, SSD storage, Linux. + + +Example 1: lots of relatively small text files (linux kernel src) +----------------------------------------------------------------- + +N == 1 does a good job here, it saves the additional time needed for +compression because it needs to store less into storage (see N == 0). + +N == 6 is also quite ok, a little slower, a little less repo size. +6 was the old default of borg. + +High compression levels only give a little more compression, but take a lot +of cpu time. + +$ borg create --stats --compression 0 +------------------------------------------------------------------------------ +Duration: 50.40 seconds +Number of files: 72890 + + Original size Compressed size Deduplicated size +This archive: 1.17 GB 1.18 GB 1.01 GB + + Unique chunks Total chunks +Chunk index: 70263 82309 +------------------------------------------------------------------------------ + +$ borg create --stats --compression 1 +------------------------------------------------------------------------------ +Duration: 49.29 seconds +Number of files: 72890 + + Original size Compressed size Deduplicated size +This archive: 1.17 GB 368.62 MB 295.22 MB + + Unique chunks Total chunks +Chunk index: 70280 82326 +------------------------------------------------------------------------------ + +$ borg create --stats --compression 5 +------------------------------------------------------------------------------ +Duration: 59.99 seconds +Number of files: 72890 + + Original size Compressed size Deduplicated size +This archive: 1.17 GB 331.70 MB 262.20 MB + + Unique chunks Total chunks +Chunk index: 70290 82336 +------------------------------------------------------------------------------ + +$ borg create --stats --compression 6 +------------------------------------------------------------------------------ +Duration: 1 minutes 13.64 seconds +Number of files: 72890 + + Original size Compressed size Deduplicated size +This archive: 1.17 GB 328.79 MB 259.56 MB + + Unique chunks Total chunks +Chunk index: 70279 82325 +------------------------------------------------------------------------------ + +$ borg create --stats --compression 9 +------------------------------------------------------------------------------ +Duration: 3 minutes 1.58 seconds +Number of files: 72890 + + Original size Compressed size Deduplicated size +This archive: 1.17 GB 326.57 MB 257.57 MB + + Unique chunks Total chunks +Chunk index: 70292 82338 +------------------------------------------------------------------------------ + + +Example 2: large VM disk file (sparse file) +------------------------------------------- + +The file's directory size is 80GB, but a lot of it is sparse (and reads as +zeros). + +$ borg create --stats --compression 0 +------------------------------------------------------------------------------ +Duration: 13 minutes 48.47 seconds +Number of files: 1 + + Original size Compressed size Deduplicated size +This archive: 80.54 GB 80.55 GB 10.87 GB + + Unique chunks Total chunks +Chunk index: 147307 177109 +------------------------------------------------------------------------------ + +$ borg create --stats --compression 1 +------------------------------------------------------------------------------ +Duration: 15 minutes 31.34 seconds +Number of files: 1 + + Original size Compressed size Deduplicated size +This archive: 80.54 GB 6.68 GB 5.67 GB + + Unique chunks Total chunks +Chunk index: 147309 177111 +------------------------------------------------------------------------------ + +$ borg create --stats --compression 6 +------------------------------------------------------------------------------ +Duration: 18 minutes 57.54 seconds +Number of files: 1 + + Original size Compressed size Deduplicated size +This archive: 80.54 GB 6.19 GB 5.44 GB + + Unique chunks Total chunks +Chunk index: 147307 177109 +------------------------------------------------------------------------------ From 89db9b8b9ec77b3e8e73e2aa2df88edeb223a65c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 25 Jun 2015 23:57:38 +0200 Subject: [PATCH 149/241] improve at-end error logging always use archiver.print_error, so it goes to sys.stderr always say "Error: ..." for errors for rc != 0 always say "Exiting with failure status ..." catch all exceptions subclassing Exception, so we can log them in same way and set exit_code=1 --- borg/archiver.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 3cd588b84..4127067d7 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -866,19 +866,19 @@ def main(): try: exit_code = archiver.run(sys.argv[1:]) except Error as e: - traceback.print_exc() - archiver.print_error(e.get_message()) + archiver.print_error(e.get_message() + "\n%s" % traceback.format_exc()) exit_code = e.exit_code except RemoteRepository.RPCError as e: - print(e) + archiver.print_error('Error: Remote Exception.\n%s' % str(e)) + exit_code = 1 + except Exception: + archiver.print_error('Error: Local Exception.\n%s' % traceback.format_exc()) exit_code = 1 except KeyboardInterrupt: - traceback.print_exc() - archiver.print_error('Error: Keyboard interrupt') + archiver.print_error('Error: Keyboard interrupt.\n%s' % traceback.format_exc()) exit_code = 1 - else: - if exit_code: - archiver.print_error('Exiting with failure status due to previous errors') + if exit_code: + archiver.print_error('Exiting with failure status due to previous errors') sys.exit(exit_code) if __name__ == '__main__': From b92dd1bab21b01e9428da58b7beaae89a061dba7 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 26 Jun 2015 00:04:35 +0200 Subject: [PATCH 150/241] the short prune options without "keep-" are deprecated, so do not suggest them --- borg/archiver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index e18c1b277..fb13af676 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -383,8 +383,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") cache = Cache(repository, key, manifest, do_files=args.cache_files) archives = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list if args.hourly + args.daily + args.weekly + args.monthly + args.yearly == 0 and args.within is None: - self.print_error('At least one of the "within", "hourly", "daily", "weekly", "monthly" or "yearly" ' - 'settings must be specified') + self.print_error('At least one of the "within", "keep-hourly", "keep-daily", "keep-weekly", ' + '"keep-monthly" or "keep-yearly" settings must be specified') return 1 if args.prefix: archives = [archive for archive in archives if archive.name.startswith(args.prefix)] From 4068fc1e3144eb1694c9b5c1d520d5cbdfe242a5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 28 Jun 2015 14:02:38 +0200 Subject: [PATCH 151/241] clarify help text, fixes #73 --- borg/archiver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 4b406b6c7..8ddebd210 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -590,7 +590,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") traversing all paths specified. The archive will consume almost no disk space for files or parts of files that have already been stored in other archives. - See "borg help patterns" for more help on exclude patterns. + See the output of the "borg help patterns" command for more help on exclude patterns. """) subparser = subparsers.add_parser('create', parents=[common_parser], @@ -647,7 +647,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") by passing a list of ``PATHs`` as arguments. The file selection can further be restricted by using the ``--exclude`` option. - See "borg help patterns" for more help on exclude patterns. + See the output of the "borg help patterns" command for more help on exclude patterns. """) subparser = subparsers.add_parser('extract', parents=[common_parser], description=self.do_extract.__doc__, From 9ead4097cf974e61b1678ea8faa68a51c72f253d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 29 Jun 2015 23:07:07 +0200 Subject: [PATCH 152/241] we are now on #borgbackup on chat.freenode.net it seems like there is currently no bureaucracy required, freenode web site says group registration is suspended. i also asked on the freenode channel, they said just make sure you are right here and use it. so we do that now. --- docs/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.rst b/docs/index.rst index db4f4928d..fad266ed3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -66,7 +66,7 @@ For more general questions or discussions, IRC or mailing list are preferred. IRC --- -Join us on channel ##borgbackup on chat.freenode.net. As usual on IRC, just +Join us on channel #borgbackup on chat.freenode.net. As usual on IRC, just ask or tell directly and then patiently wait for replies. Stay connected. Mailing list From 77577b7417d2b87ec5e3a13fae6d779ab92dc80f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 29 Jun 2015 23:30:36 +0200 Subject: [PATCH 153/241] add related projects, fix web site url --- docs/_themes/local/sidebarusefullinks.html | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/_themes/local/sidebarusefullinks.html b/docs/_themes/local/sidebarusefullinks.html index 5723043b2..2f35c2695 100644 --- a/docs/_themes/local/sidebarusefullinks.html +++ b/docs/_themes/local/sidebarusefullinks.html @@ -3,9 +3,15 @@

Useful Links

+ +

Related Projects

+ + From 95828c576d0114db7cecae9a35dced6b6ef9b11f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 2 Jul 2015 23:05:14 +0200 Subject: [PATCH 154/241] update AUTHORS I merged some pull requests of other developers into borg and now added them to the developers / contributors list, with permission. In general, feel free to add yourself to the list if you contributed something to borg - just make a pull request including your addition to AUTHORS. That's way easier for me than having to ask whether you want to be in there and how precisely... - just write your entry yourself! --- AUTHORS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 6b2891eea..1f6fc9cdc 100644 --- a/AUTHORS +++ b/AUTHORS @@ -2,7 +2,8 @@ Borg Developers / Contributors ("The Borg Collective") `````````````````````````````````````````````````````` - Thomas Waldmann - Antoine Beaupré - +- Radek Podgorny +- Yuri D'Elia Borg is a fork of Attic. Attic is written and maintained by Jonas Borgström and various contributors: From 028e12473d5a40d86c67fe992698abaee14e16ff Mon Sep 17 00:00:00 2001 From: Jan Bader Date: Thu, 2 Jul 2015 23:21:28 +0200 Subject: [PATCH 155/241] Update internals to reflect new --compression argument --- docs/internals.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/internals.rst b/docs/internals.rst index 9ee4a8c9e..43c2fe2e3 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -355,4 +355,4 @@ representation of the repository id. Compression ----------- -Currently, zlib level 6 is used as compression. +Currently, compression is disabled by default. To enable zlib compression, ``--compression level`` on the command line. Level can be anything from 0 (no compression, fast) to 9 (high compression, slow). From 8de8073345e585491c83a87d3c9e5a604ee52d4c Mon Sep 17 00:00:00 2001 From: Jan Bader Date: Thu, 2 Jul 2015 23:23:45 +0200 Subject: [PATCH 156/241] Improve wording --- docs/internals.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/internals.rst b/docs/internals.rst index 43c2fe2e3..2928b284f 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -355,4 +355,4 @@ representation of the repository id. Compression ----------- -Currently, compression is disabled by default. To enable zlib compression, ``--compression level`` on the command line. Level can be anything from 0 (no compression, fast) to 9 (high compression, slow). +Currently, compression is disabled by default. Zlib compression can be enabled by passing ``--compression level`` on the command line. Level can be anything from 0 (no compression, fast) to 9 (high compression, slow). From a59211f2951530b803d34f9aed3505fcef361126 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 11 Jul 2015 17:22:12 +0200 Subject: [PATCH 157/241] use borg-tmp as prefix for temporary files / directories also: remove some unused temp dir. code --- borg/archive.py | 6 ------ borg/cache.py | 2 +- borg/fuse.py | 2 +- borg/remote.py | 2 +- borg/xattr.py | 2 +- 5 files changed, 4 insertions(+), 10 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 83ac6aecb..bcaf49830 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -2,8 +2,6 @@ from datetime import datetime from getpass import getuser from itertools import groupby import errno -import shutil -import tempfile from .key import key_factory from .remote import cache_if_remote import msgpack @@ -609,10 +607,6 @@ class ArchiveChecker: def __init__(self): self.error_found = False self.possibly_superseded = set() - self.tmpdir = tempfile.mkdtemp() - - def __del__(self): - shutil.rmtree(self.tmpdir) def check(self, repository, repair=False, last=None): self.report_progress('Starting archive consistency check...') diff --git a/borg/cache.py b/borg/cache.py index 110f088d9..5e3039161 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -313,7 +313,7 @@ class Cache: self.begin_txn() print('Synchronizing chunks cache...') # XXX we have to do stuff on disk due to lacking ChunkIndex api - with tempfile.TemporaryDirectory() as tmp_dir: + with tempfile.TemporaryDirectory(prefix='borg-tmp') as tmp_dir: repository = cache_if_remote(self.repository) out_archive = open_out_archive() in_archive = open_in_archive() diff --git a/borg/fuse.py b/borg/fuse.py index eb4b46ffd..ab28d3b3e 100644 --- a/borg/fuse.py +++ b/borg/fuse.py @@ -17,7 +17,7 @@ have_fuse_mtime_ns = hasattr(llfuse.EntryAttributes, 'st_mtime_ns') class ItemCache: def __init__(self): - self.fd = tempfile.TemporaryFile() + self.fd = tempfile.TemporaryFile(prefix='borg-tmp') self.offset = 1000000 def add(self, item): diff --git a/borg/remote.py b/borg/remote.py index 81d4ace05..93e178f79 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -305,7 +305,7 @@ class RepositoryCache: self.cleanup() def initialize(self): - self.tmppath = tempfile.mkdtemp() + self.tmppath = tempfile.mkdtemp(prefix='borg-tmp') self.index = NSIndex() self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b') diff --git a/borg/xattr.py b/borg/xattr.py index 2201e4b48..ded6d752d 100644 --- a/borg/xattr.py +++ b/borg/xattr.py @@ -11,7 +11,7 @@ from ctypes.util import find_library def is_enabled(path=None): """Determine if xattr is enabled on the filesystem """ - with tempfile.NamedTemporaryFile(dir=path) as fd: + with tempfile.NamedTemporaryFile(dir=path, prefix='borg-tmp') as fd: try: setxattr(fd.fileno(), 'user.name', b'value') except OSError: From 0580f2b4eb316a4aeaeb69d3ba93b1e2d308a851 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 11 Jul 2015 18:31:49 +0200 Subject: [PATCH 158/241] style and cosmetic fixes, no semantic changes use simpler super() syntax of python 3.x remove fixed errors/warnings' codes from setup.cfg flake8 configuration fix file exclusion list for flake8 --- borg/archive.py | 8 ++++---- borg/cache.py | 1 - borg/fuse.py | 2 +- borg/helpers.py | 20 +++++++++++--------- borg/key.py | 4 ++-- borg/lrucache.py | 10 +++++----- borg/remote.py | 27 ++++++++++++++------------- borg/testsuite/__init__.py | 2 -- borg/testsuite/archiver.py | 16 ++++++++-------- borg/testsuite/helpers.py | 25 +++++++++++++------------ borg/testsuite/platform.py | 1 - setup.cfg | 4 ++-- setup.py | 2 +- 13 files changed, 61 insertions(+), 61 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 83ac6aecb..bae3693ac 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -107,7 +107,7 @@ class ChunkBuffer: class CacheChunkBuffer(ChunkBuffer): def __init__(self, cache, key, stats, chunker_params=CHUNKER_PARAMS): - super(CacheChunkBuffer, self).__init__(key, chunker_params) + super().__init__(key, chunker_params) self.cache = cache self.stats = stats @@ -127,7 +127,6 @@ class Archive: class IncompatibleFilesystemEncodingError(Error): """Failed to encode filename "{}" into file system encoding "{}". Consider configuring the LANG environment variable.""" - def __init__(self, repository, key, manifest, name, cache=None, create=False, checkpoint_interval=300, numeric_owner=False, progress=False, chunker_params=CHUNKER_PARAMS): @@ -232,9 +231,11 @@ class Archive: count, size, csize = cache.chunks[id] stats.update(size, csize, count == 1) cache.chunks[id] = count - 1, size, csize + def add_file_chunks(chunks): for id, _, _ in chunks: add(id) + # This function is a bit evil since it abuses the cache to calculate # the stats. The cache transaction must be rolled back afterwards unpacker = msgpack.Unpacker(use_list=False) @@ -551,7 +552,7 @@ class RobustUnpacker(): item_keys = [msgpack.packb(name) for name in ('path', 'mode', 'source', 'chunks', 'rdev', 'xattrs', 'user', 'group', 'uid', 'gid', 'mtime')] def __init__(self, validator): - super(RobustUnpacker, self).__init__() + super().__init__() self.validator = validator self._buffered_data = [] self._resync = False @@ -810,4 +811,3 @@ class ArchiveChecker: self.repository.delete(id_) self.manifest.write() self.repository.commit() - diff --git a/borg/cache.py b/borg/cache.py index 110f088d9..435fca135 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -21,7 +21,6 @@ class Cache: class RepositoryReplay(Error): """Cache is newer than repository, refusing to continue""" - class CacheInitAbortedError(Error): """Cache initialization aborted""" diff --git a/borg/fuse.py b/borg/fuse.py index eb4b46ffd..8c9726c25 100644 --- a/borg/fuse.py +++ b/borg/fuse.py @@ -34,7 +34,7 @@ class FuseOperations(llfuse.Operations): """Export archive as a fuse filesystem """ def __init__(self, key, repository, manifest, archive): - super(FuseOperations, self).__init__() + super().__init__() self._inode_count = 0 self.key = key self.repository = cache_if_remote(repository) diff --git a/borg/helpers.py b/borg/helpers.py index 1f1612d3a..2b26b0e3f 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -73,10 +73,13 @@ class UpgradableLock: def check_extension_modules(): from . import platform - if (hashindex.API_VERSION != 2 or - chunker.API_VERSION != 2 or - crypto.API_VERSION != 2 or - platform.API_VERSION != 2): + if hashindex.API_VERSION != 2: + raise ExtensionModuleError + if chunker.API_VERSION != 2: + raise ExtensionModuleError + if crypto.API_VERSION != 2: + raise ExtensionModuleError + if platform.API_VERSION != 2: raise ExtensionModuleError @@ -529,9 +532,9 @@ class Location: else: path = self.path return 'ssh://{}{}{}{}'.format('{}@'.format(self.user) if self.user else '', - self.host, - ':{}'.format(self.port) if self.port else '', - path) + self.host, + ':{}'.format(self.port) if self.port else '', + path) def location_validator(archive=None): @@ -606,7 +609,7 @@ def daemonize(): class StableDict(dict): """A dict subclass with stable items() ordering""" def items(self): - return sorted(super(StableDict, self).items()) + return sorted(super().items()) if sys.version < '3.3': @@ -642,4 +645,3 @@ def int_to_bigint(value): if value.bit_length() > 63: return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True) return value - diff --git a/borg/key.py b/borg/key.py index b13295101..31267d0d9 100644 --- a/borg/key.py +++ b/borg/key.py @@ -17,6 +17,7 @@ class UnsupportedPayloadError(Error): """Unsupported payload type {}. A newer version is required to access this repository. """ + class KeyfileNotFoundError(Error): """No key file for repository {} found in {}. """ @@ -231,8 +232,7 @@ class KeyfileKey(AESKeyBase): filename = os.path.join(keys_dir, name) with open(filename, 'r') as fd: line = fd.readline().strip() - if (line and line.startswith(cls.FILE_ID) and - line[len(cls.FILE_ID)+1:] == id): + if line and line.startswith(cls.FILE_ID) and line[len(cls.FILE_ID)+1:] == id: return filename raise KeyfileNotFoundError(repository._location.canonical_path(), get_keys_dir()) diff --git a/borg/lrucache.py b/borg/lrucache.py index 3bb49fbc4..a692f10dd 100644 --- a/borg/lrucache.py +++ b/borg/lrucache.py @@ -1,7 +1,7 @@ class LRUCache(dict): def __init__(self, capacity): - super(LRUCache, self).__init__() + super().__init__() self._lru = [] self._capacity = capacity @@ -13,7 +13,7 @@ class LRUCache(dict): self._lru.append(key) while len(self._lru) > self._capacity: del self[self._lru[0]] - return super(LRUCache, self).__setitem__(key, value) + return super().__setitem__(key, value) def __getitem__(self, key): try: @@ -21,21 +21,21 @@ class LRUCache(dict): self._lru.append(key) except ValueError: pass - return super(LRUCache, self).__getitem__(key) + return super().__getitem__(key) def __delitem__(self, key): try: self._lru.remove(key) except ValueError: pass - return super(LRUCache, self).__delitem__(key) + return super().__delitem__(key) def pop(self, key, default=None): try: self._lru.remove(key) except ValueError: pass - return super(LRUCache, self).pop(key, default) + return super().pop(key, default) def _not_implemented(self, *args, **kw): raise NotImplementedError diff --git a/borg/remote.py b/borg/remote.py index 81d4ace05..bfd8eaa97 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -25,24 +25,25 @@ class ConnectionClosed(Error): class PathNotAllowed(Error): """Repository path not allowed""" + class InvalidRPCMethod(Error): """RPC method is not valid""" class RepositoryServer: rpc_methods = ( - '__len__', - 'check', - 'commit', - 'delete', - 'get', - 'list', - 'negotiate', - 'open', - 'put', - 'repair', - 'rollback', - ) + '__len__', + 'check', + 'commit', + 'delete', + 'get', + 'list', + 'negotiate', + 'open', + 'put', + 'repair', + 'rollback', + ) def __init__(self, restrict_to_paths): self.repository = None @@ -71,7 +72,7 @@ class RepositoryServer: type, msgid, method, args = unpacked method = method.decode('ascii') try: - if not method in self.rpc_methods: + if method not in self.rpc_methods: raise InvalidRPCMethod(method) try: f = getattr(self, method) diff --git a/borg/testsuite/__init__.py b/borg/testsuite/__init__.py index fac3de9e2..e1eb37eaa 100644 --- a/borg/testsuite/__init__.py +++ b/borg/testsuite/__init__.py @@ -119,5 +119,3 @@ class TestLoader(unittest.TestLoader): if pattern.lower() in test.id().lower(): tests.addTest(test) return tests - - diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 03427008a..e16687ff8 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -24,7 +24,7 @@ from .mock import patch try: import llfuse - has_llfuse = True + has_llfuse = True or llfuse # avoids "unused import" except ImportError: has_llfuse = False @@ -143,7 +143,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.create_regular_file('empty', size=0) # next code line raises OverflowError on 32bit cpu (raspberry pi 2): # 2600-01-01 > 2**64 ns - #os.utime('input/empty', (19880895600, 19880895600)) + # os.utime('input/empty', (19880895600, 19880895600)) # thus, we better test with something not that far in future: # 2038-01-19 (1970 + 2^31 - 1 seconds) is the 32bit "deadline": os.utime('input/empty', (2**31 - 1, 2**31 - 1)) @@ -157,9 +157,9 @@ class ArchiverTestCase(ArchiverTestCaseBase): os.chmod('input/file1', 0o7755) os.chmod('input/dir2', 0o555) # Block device - os.mknod('input/bdev', 0o600 | stat.S_IFBLK, os.makedev(10, 20)) + os.mknod('input/bdev', 0o600 | stat.S_IFBLK, os.makedev(10, 20)) # Char device - os.mknod('input/cdev', 0o600 | stat.S_IFCHR, os.makedev(30, 40)) + os.mknod('input/cdev', 0o600 | stat.S_IFCHR, os.makedev(30, 40)) # Hard link os.link(os.path.join(self.input_path, 'file1'), os.path.join(self.input_path, 'hardlink')) @@ -172,7 +172,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): # same for newer ubuntu and centos. # if this is supported just on specific platform, platform should be checked first, # so that the test setup for all tests using it does not fail here always for others. - #xattr.setxattr(os.path.join(self.input_path, 'link1'), 'user.foo_symlink', b'bar_symlink', follow_symlinks=False) + # xattr.setxattr(os.path.join(self.input_path, 'link1'), 'user.foo_symlink', b'bar_symlink', follow_symlinks=False) # FIFO node os.mkfifo(os.path.join(self.input_path, 'fifo1')) if has_lchflags: @@ -253,7 +253,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('init', '--encryption=none', self.repository_location) self._set_repository_id(self.repository_path, repository_id) self.assert_equal(repository_id, self._extract_repository_id(self.repository_path)) - self.assert_raises(Cache.EncryptionMethodMismatch, lambda :self.cmd('create', self.repository_location + '::test.2', 'input')) + self.assert_raises(Cache.EncryptionMethodMismatch, lambda: self.cmd('create', self.repository_location + '::test.2', 'input')) def test_repository_swap_detection2(self): self.create_test_files() @@ -263,7 +263,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('create', self.repository_location + '_encrypted::test', 'input') shutil.rmtree(self.repository_path + '_encrypted') os.rename(self.repository_path + '_unencrypted', self.repository_path + '_encrypted') - self.assert_raises(Cache.RepositoryAccessAborted, lambda :self.cmd('create', self.repository_location + '_encrypted::test.2', 'input')) + self.assert_raises(Cache.RepositoryAccessAborted, lambda: self.cmd('create', self.repository_location + '_encrypted::test.2', 'input')) def test_strip_components(self): self.cmd('init', self.repository_location) @@ -524,7 +524,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): class ArchiverCheckTestCase(ArchiverTestCaseBase): def setUp(self): - super(ArchiverCheckTestCase, self).setUp() + super().setUp() with patch.object(ChunkBuffer, 'BUFFER_SIZE', 10): self.cmd('init', self.repository_location) self.create_src_archive('archive1') diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index ac949ba7a..5c3b9c085 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -7,7 +7,7 @@ import unittest import msgpack -from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, to_localtime, \ +from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp from . import BaseTestCase @@ -96,7 +96,7 @@ class PatternTestCase(BaseTestCase): ['/etc/passwd', '/etc/hosts', '/home', '/var/log/messages', '/var/log/dmesg']) self.assert_equal(self.evaluate(['/home/u'], []), []) self.assert_equal(self.evaluate(['/', '/home', '/etc/hosts'], ['/']), []) - self.assert_equal(self.evaluate(['/home/'], ['/home/user2']), + self.assert_equal(self.evaluate(['/home/'], ['/home/user2']), ['/home', '/home/user/.profile', '/home/user/.bashrc']) self.assert_equal(self.evaluate(['/'], ['*.profile', '/var/log']), ['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc', '/home/user2/public_html/index.html']) @@ -118,6 +118,7 @@ class MakePathSafeTestCase(BaseTestCase): self.assert_equal(make_path_safe('/'), '.') self.assert_equal(make_path_safe('/'), '.') + class UpgradableLockTestCase(BaseTestCase): def test(self): @@ -161,7 +162,7 @@ class PruneSplitTestCase(BaseTestCase): for ta in test_archives, reversed(test_archives): self.assert_equal(set(prune_split(ta, '%Y-%m', n, skip)), subset(test_archives, indices)) - + test_pairs = [(1, 1), (2, 1), (2, 28), (3, 1), (3, 2), (3, 31), (5, 1)] test_dates = [local_to_UTC(month, day) for month, day in test_pairs] test_archives = [MockArchive(date) for date in test_dates] @@ -185,24 +186,24 @@ class PruneWithinTestCase(BaseTestCase): for ta in test_archives, reversed(test_archives): self.assert_equal(set(prune_within(ta, within)), subset(test_archives, indices)) - + # 1 minute, 1.5 hours, 2.5 hours, 3.5 hours, 25 hours, 49 hours test_offsets = [60, 90*60, 150*60, 210*60, 25*60*60, 49*60*60] now = datetime.now(timezone.utc) test_dates = [now - timedelta(seconds=s) for s in test_offsets] test_archives = [MockArchive(date) for date in test_dates] - dotest(test_archives, '1H', [0]) - dotest(test_archives, '2H', [0, 1]) - dotest(test_archives, '3H', [0, 1, 2]) + dotest(test_archives, '1H', [0]) + dotest(test_archives, '2H', [0, 1]) + dotest(test_archives, '3H', [0, 1, 2]) dotest(test_archives, '24H', [0, 1, 2, 3]) dotest(test_archives, '26H', [0, 1, 2, 3, 4]) - dotest(test_archives, '2d', [0, 1, 2, 3, 4]) + dotest(test_archives, '2d', [0, 1, 2, 3, 4]) dotest(test_archives, '50H', [0, 1, 2, 3, 4, 5]) - dotest(test_archives, '3d', [0, 1, 2, 3, 4, 5]) - dotest(test_archives, '1w', [0, 1, 2, 3, 4, 5]) - dotest(test_archives, '1m', [0, 1, 2, 3, 4, 5]) - dotest(test_archives, '1y', [0, 1, 2, 3, 4, 5]) + dotest(test_archives, '3d', [0, 1, 2, 3, 4, 5]) + dotest(test_archives, '1w', [0, 1, 2, 3, 4, 5]) + dotest(test_archives, '1m', [0, 1, 2, 3, 4, 5]) + dotest(test_archives, '1y', [0, 1, 2, 3, 4, 5]) class StableDictTestCase(BaseTestCase): diff --git a/borg/testsuite/platform.py b/borg/testsuite/platform.py index 2a9ebff9e..77940655d 100644 --- a/borg/testsuite/platform.py +++ b/borg/testsuite/platform.py @@ -102,4 +102,3 @@ class PlatformDarwinTestCase(BaseTestCase): self.set_acl(file2.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=True) self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)[b'acl_extended']) self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)[b'acl_extended']) - diff --git a/setup.cfg b/setup.cfg index 2f726b562..19a49eea6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,7 +2,7 @@ python_files = testsuite/*.py [flake8] -ignore = E123,E126,E127,E129,E203,E221,E226,E231,E241,E265,E301,E302,E303,E713,F401,F403,W291,W293,W391 +ignore = E226,F403 max-line-length = 250 -exclude = versioneer.py,docs/conf.py,borg/_version.py +exclude = versioneer.py,docs/conf.py,borg/_version.py,build,dist,.git,.idea,.cache max-complexity = 100 diff --git a/setup.py b/setup.py index 88dc2564b..f51dafd29 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ try: 'borg/platform_freebsd.c', 'borg/platform_darwin.c', ]) - super(Sdist, self).make_distribution() + super().make_distribution() except ImportError: class Sdist(versioneer.cmd_sdist): From 05b21e3c0d267b51ef2cd570c9220712c4fa8602 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 11 Jul 2015 19:27:00 +0200 Subject: [PATCH 159/241] document return codes --- docs/usage.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/docs/usage.rst b/docs/usage.rst index 688bd255b..6bd00bb5a 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -17,6 +17,27 @@ messages as it is processing. .. include:: usage/init.rst.inc +Return codes +------------ + +|project_name| can exit with the following return codes (rc): + +:: + + 0 no error, normal termination + 1 some error occurred (this can be a complete or a partial failure) + 128+N killed by signal N (e.g. 137 == kill -9) + + +Note: we are aware that more distinct return codes might be useful, but it is +not clear yet which return codes should be used for which precise conditions. + +See issue #61 for a discussion about that. Depending on the outcome of the +discussion there, return codes may change in future (the only thing rather sure +is that 0 will always mean some sort of success and "not 0" will always mean +some sort of warning / error / failure - but the definition of success might +change). + Examples ~~~~~~~~ :: From d1bb892c3d89098837c6853b2a702ea172065634 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 11 Jul 2015 19:45:37 +0200 Subject: [PATCH 160/241] fix placement of return codes docs --- docs/usage.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index 6bd00bb5a..945119435 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -15,8 +15,6 @@ Like most UNIX commands |project_name| is quiet by default but the ``-v`` or ``--verbose`` option can be used to get the program to output more status messages as it is processing. -.. include:: usage/init.rst.inc - Return codes ------------ @@ -38,6 +36,8 @@ is that 0 will always mean some sort of success and "not 0" will always mean some sort of warning / error / failure - but the definition of success might change). +.. include:: usage/init.rst.inc + Examples ~~~~~~~~ :: From afae7201126e7af17bcf6cc07d6d3fd81fbb38d3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 11 Jul 2015 20:08:22 +0200 Subject: [PATCH 161/241] document environment variables --- docs/usage.rst | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/docs/usage.rst b/docs/usage.rst index 945119435..d5d3a94a9 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -36,6 +36,36 @@ is that 0 will always mean some sort of success and "not 0" will always mean some sort of warning / error / failure - but the definition of success might change). +Environment Variables +--------------------- + +|project_name| uses some environment variables for automation: + +:: + + Specifying a passphrase: + BORG_PASSPHRASE : When set, use the value to answer the passphrase question for encrypted repositories. + + Some "yes" sayers (if set, they automatically confirm that you really want to do X even if there is that warning): + BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK : For "Warning: Attempting to access a previously unknown unencrypted repository" + BORG_RELOCATED_REPO_ACCESS_IS_OK : For "Warning: The repository at location ... was previously located at ..." + BORG_CHECK_I_KNOW_WHAT_I_AM_DOING : For "Warning: 'check --repair' is an experimental feature that might result in data loss." + + Directories: + BORG_KEYS_DIR : Default to '~/.borg/keys'. This directory contains keys for encrypted repositories. + BORG_CACHE_DIR : Default to '~/.cache/borg'. This directory contains the local cache. + + Building: + BORG_OPENSSL_PREFIX : Adds given OpenSSL header file directory to the default locations (setup.py). + + +Please note: + +- be very careful when using the "yes" sayers, the warnings with prompt exist for your / your data's security/safety +- also be very careful when putting your passphrase into a script, make sure it has appropriate file permissions + (e.g. mode 600, root:root). + + .. include:: usage/init.rst.inc Examples From bd354d7bb43fe07c650064f070d18ae3064eefc8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Jul 2015 00:18:49 +0200 Subject: [PATCH 162/241] create a RepositoryCache implementation that can cope with any amount of data, fixes attic #326 the old code blows up with an integer OverflowError when the cache file goes beyond 2GiB size. the new code just reuses the Repository implementation as a local temporary key/value store. still an issue: if the place where the temporary RepositoryCache is stored (usually /tmp) can't cope with the cache size and runs full. if you copy data from a fuse mount, the cache size is the copied deduplicated data size. so, if you have lots of data to extract (more than your /tmp can hold), rather do not use fuse! besides fuse mounts, this also affects attic check and cache sync (in these cases, only the metadata size counts, but even that can go beyond 2GiB for some people). --- borg/remote.py | 45 ++++++++------------------------------------- borg/repository.py | 5 +++++ 2 files changed, 13 insertions(+), 37 deletions(-) diff --git a/borg/remote.py b/borg/remote.py index 7c56dbff5..0ad91d76e 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -3,7 +3,6 @@ import fcntl import msgpack import os import select -import shutil from subprocess import Popen, PIPE import sys import tempfile @@ -11,7 +10,6 @@ import traceback from . import __version__ -from .hashindex import NSIndex from .helpers import Error, IntegrityError from .repository import Repository @@ -292,56 +290,29 @@ class RemoteRepository: class RepositoryCache: """A caching Repository wrapper - Caches Repository GET operations using a temporary file + Caches Repository GET operations using a local temporary Repository. """ def __init__(self, repository): - self.tmppath = None - self.index = None - self.data_fd = None self.repository = repository - self.entries = {} - self.initialize() + tmppath = tempfile.mkdtemp(prefix='borg-tmp') + self.caching_repo = Repository(tmppath, create=True, exclusive=True) def __del__(self): - self.cleanup() - - def initialize(self): - self.tmppath = tempfile.mkdtemp(prefix='borg-tmp') - self.index = NSIndex() - self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b') - - def cleanup(self): - del self.index - if self.data_fd: - self.data_fd.close() - if self.tmppath: - shutil.rmtree(self.tmppath) - - def load_object(self, offset, size): - self.data_fd.seek(offset) - data = self.data_fd.read(size) - assert len(data) == size - return data - - def store_object(self, key, data): - self.data_fd.seek(0, os.SEEK_END) - self.data_fd.write(data) - offset = self.data_fd.tell() - self.index[key] = offset - len(data), len(data) + self.caching_repo.destroy() def get(self, key): return next(self.get_many([key])) def get_many(self, keys): - unknown_keys = [key for key in keys if key not in self.index] + unknown_keys = [key for key in keys if key not in self.caching_repo] repository_iterator = zip(unknown_keys, self.repository.get_many(unknown_keys)) for key in keys: try: - yield self.load_object(*self.index[key]) - except KeyError: + yield self.caching_repo.get(key) + except Repository.ObjectNotFound: for key_, data in repository_iterator: if key_ == key: - self.store_object(key, data) + self.caching_repo.put(key, data) yield data break # Consume any pending requests diff --git a/borg/repository.py b/borg/repository.py index e124cc155..392950112 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -341,6 +341,11 @@ class Repository: self.index = self.open_index(self.get_transaction_id()) return len(self.index) + def __contains__(self, id): + if not self.index: + self.index = self.open_index(self.get_transaction_id()) + return id in self.index + def list(self, limit=None, marker=None): if not self.index: self.index = self.open_index(self.get_transaction_id()) From 45e89add52d94f3788a8afc7dce299ae36f173a5 Mon Sep 17 00:00:00 2001 From: Ronny Pfannschmidt Date: Sun, 12 Jul 2015 15:00:47 +0200 Subject: [PATCH 163/241] switch to entrypoints --- scripts/borg | 4 ---- setup.py | 6 +++++- 2 files changed, 5 insertions(+), 5 deletions(-) delete mode 100644 scripts/borg diff --git a/scripts/borg b/scripts/borg deleted file mode 100644 index 9fe6f7b5a..000000000 --- a/scripts/borg +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env python -from borg.archiver import main -main() - diff --git a/setup.py b/setup.py index f51dafd29..eed03381f 100644 --- a/setup.py +++ b/setup.py @@ -129,7 +129,11 @@ setup( 'Topic :: System :: Archiving :: Backup', ], packages=['borg', 'borg.testsuite'], - scripts=['scripts/borg'], + entry_points={ + 'console_scripts': [ + 'borg = borg.archiver:main', + ] + }, cmdclass=cmdclass, ext_modules=ext_modules, # msgpack pure python data corruption was fixed in 0.4.6. From 922a1f6bb70550deedcd5d72f0b24aa5792e1753 Mon Sep 17 00:00:00 2001 From: Ronny Pfannschmidt Date: Sun, 12 Jul 2015 15:07:24 +0200 Subject: [PATCH 164/241] never fall back to distutils, any sane install method uses setuptools --- setup.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index eed03381f..edd75dc1a 100644 --- a/setup.py +++ b/setup.py @@ -16,10 +16,8 @@ if sys.version_info < min_python: print("Borg requires Python %d.%d or later" % min_python) sys.exit(1) -try: - from setuptools import setup, Extension -except ImportError: - from distutils.core import setup, Extension + +from setuptools import setup, Extension crypto_source = 'borg/crypto.pyx' chunker_source = 'borg/chunker.pyx' From 414dba3de72e1031c6b392cabdc8c018e32a3e62 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Jul 2015 23:08:44 +0200 Subject: [PATCH 165/241] remove usage of evil / broken unittest.mock, use mock from pypi see testsuite.mock docstring for more details. one test shows brokenness right now that was hidden / silent until now. --- borg/testsuite/mock.py | 19 ++++++++++++++----- borg/testsuite/repository.py | 4 ++-- tox.ini | 6 +----- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/borg/testsuite/mock.py b/borg/testsuite/mock.py index b7501ed6c..bdd030b10 100644 --- a/borg/testsuite/mock.py +++ b/borg/testsuite/mock.py @@ -1,5 +1,14 @@ -try: - # Only available in python 3.3+ - from unittest.mock import * -except ImportError: - from mock import * +""" +Mocking + +Note: unittest.mock is broken on at least python 3.3.6 and 3.4.0. + it silently ignores mistyped method names starting with assert_..., + does nothing and just succeeds. + The issue was fixed in the separately distributed "mock" lib, you + get an AttributeError there. So, always use that one! + +Details: + +http://engineeringblog.yelp.com/2015/02/assert_called_once-threat-or-menace.html +""" +from mock import * diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 9cc8d2427..f63a86711 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -159,7 +159,7 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): with patch.object(UpgradableLock, 'upgrade', side_effect=UpgradableLock.WriteLockFailed) as upgrade: self.reopen() self.assert_raises(UpgradableLock.WriteLockFailed, lambda: len(self.repository)) - upgrade.assert_called_once() + upgrade.assert_called_once_with() def test_crash_before_write_index(self): self.add_keys() @@ -309,7 +309,7 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase): # Simulate a crash before compact with patch.object(Repository, 'compact_segments') as compact: self.repository.commit() - compact.assert_called_once() + compact.assert_called_once_with() self.reopen() self.check(repair=True) self.assert_equal(self.repository.get(bytes(32)), b'data2') diff --git a/tox.ini b/tox.ini index 619fc01ff..68d2e6a61 100644 --- a/tox.ini +++ b/tox.ini @@ -6,10 +6,6 @@ envlist = py32, py33, py34 changedir = {envdir} deps = pytest + mock commands = py.test passenv = * # fakeroot -u needs some env vars - -[testenv:py32] -deps = - pytest - mock From d8e9a9bf96b02f3c1b72c1b75095e6005cf4353e Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Jul 2015 23:29:34 +0200 Subject: [PATCH 166/241] skip test_crash_before_compact test for RemoteRepository it was silently failing until recently. and it can't work the way it is on RemoteRepository. it's still active (and now even really working) for the (local) Repository tests. --- borg/testsuite/repository.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index f63a86711..981cda658 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -328,3 +328,7 @@ class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase): def open(self, create=False): return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create) + + def test_crash_before_compact(self): + # skip this test, we can't mock-patch a Repository class in another process! + pass From 434dac0e4803cb61f43a1305948e1afad6d89088 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Jul 2015 03:19:23 +0200 Subject: [PATCH 167/241] move locking code to own module, same for locking tests fix imports, no other changes. --- borg/cache.py | 3 ++- borg/helpers.py | 44 ++---------------------------------- borg/locking.py | 43 +++++++++++++++++++++++++++++++++++ borg/repository.py | 3 ++- borg/testsuite/helpers.py | 24 ++------------------ borg/testsuite/locking.py | 24 ++++++++++++++++++++ borg/testsuite/repository.py | 3 ++- 7 files changed, 77 insertions(+), 67 deletions(-) create mode 100644 borg/locking.py create mode 100644 borg/testsuite/locking.py diff --git a/borg/cache.py b/borg/cache.py index 4dc40947d..937e2a757 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -10,8 +10,9 @@ import tarfile import tempfile from .key import PlaintextKey -from .helpers import Error, get_cache_dir, decode_dict, st_mtime_ns, unhexlify, UpgradableLock, int_to_bigint, \ +from .helpers import Error, get_cache_dir, decode_dict, st_mtime_ns, unhexlify, int_to_bigint, \ bigint_to_int +from .locking import UpgradableLock from .hashindex import ChunkIndex diff --git a/borg/helpers.py b/borg/helpers.py index 2b26b0e3f..d20532723 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -2,7 +2,6 @@ import argparse import binascii from collections import namedtuple import grp -import msgpack import os import pwd import re @@ -11,7 +10,8 @@ import time from datetime import datetime, timezone, timedelta from fnmatch import translate from operator import attrgetter -import fcntl + +import msgpack from . import hashindex from . import chunker @@ -31,46 +31,6 @@ class ExtensionModuleError(Error): """The Borg binary extension modules do not seem to be properly installed""" -class UpgradableLock: - - class ReadLockFailed(Error): - """Failed to acquire read lock on {}""" - - class WriteLockFailed(Error): - """Failed to acquire write lock on {}""" - - def __init__(self, path, exclusive=False): - self.path = path - try: - self.fd = open(path, 'r+') - except IOError: - self.fd = open(path, 'r') - try: - if exclusive: - fcntl.lockf(self.fd, fcntl.LOCK_EX) - else: - fcntl.lockf(self.fd, fcntl.LOCK_SH) - # Python 3.2 raises IOError, Python3.3+ raises OSError - except (IOError, OSError): - if exclusive: - raise self.WriteLockFailed(self.path) - else: - raise self.ReadLockFailed(self.path) - self.is_exclusive = exclusive - - def upgrade(self): - try: - fcntl.lockf(self.fd, fcntl.LOCK_EX) - # Python 3.2 raises IOError, Python3.3+ raises OSError - except (IOError, OSError): - raise self.WriteLockFailed(self.path) - self.is_exclusive = True - - def release(self): - fcntl.lockf(self.fd, fcntl.LOCK_UN) - self.fd.close() - - def check_extension_modules(): from . import platform if hashindex.API_VERSION != 2: diff --git a/borg/locking.py b/borg/locking.py new file mode 100644 index 000000000..0c6df2fee --- /dev/null +++ b/borg/locking.py @@ -0,0 +1,43 @@ +import fcntl + +from borg.helpers import Error + + +class UpgradableLock: + + class ReadLockFailed(Error): + """Failed to acquire read lock on {}""" + + class WriteLockFailed(Error): + """Failed to acquire write lock on {}""" + + def __init__(self, path, exclusive=False): + self.path = path + try: + self.fd = open(path, 'r+') + except IOError: + self.fd = open(path, 'r') + try: + if exclusive: + fcntl.lockf(self.fd, fcntl.LOCK_EX) + else: + fcntl.lockf(self.fd, fcntl.LOCK_SH) + # Python 3.2 raises IOError, Python3.3+ raises OSError + except (IOError, OSError): + if exclusive: + raise self.WriteLockFailed(self.path) + else: + raise self.ReadLockFailed(self.path) + self.is_exclusive = exclusive + + def upgrade(self): + try: + fcntl.lockf(self.fd, fcntl.LOCK_EX) + # Python 3.2 raises IOError, Python3.3+ raises OSError + except (IOError, OSError): + raise self.WriteLockFailed(self.path) + self.is_exclusive = True + + def release(self): + fcntl.lockf(self.fd, fcntl.LOCK_UN) + self.fd.close() diff --git a/borg/repository.py b/borg/repository.py index 392950112..0ad99970b 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -9,7 +9,8 @@ import sys from zlib import crc32 from .hashindex import NSIndex -from .helpers import Error, IntegrityError, read_msgpack, write_msgpack, unhexlify, UpgradableLock +from .helpers import Error, IntegrityError, read_msgpack, write_msgpack, unhexlify +from .locking import UpgradableLock from .lrucache import LRUCache MAX_OBJECT_SIZE = 20 * 1024 * 1024 diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 5c3b9c085..26b422b0c 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -1,13 +1,11 @@ import hashlib from time import mktime, strptime from datetime import datetime, timezone, timedelta -import os -import tempfile -import unittest import msgpack -from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, UpgradableLock, prune_within, prune_split, \ +from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \ + prune_within, prune_split, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp from . import BaseTestCase @@ -119,24 +117,6 @@ class MakePathSafeTestCase(BaseTestCase): self.assert_equal(make_path_safe('/'), '.') -class UpgradableLockTestCase(BaseTestCase): - - def test(self): - file = tempfile.NamedTemporaryFile() - lock = UpgradableLock(file.name) - lock.upgrade() - lock.upgrade() - lock.release() - - @unittest.skipIf(os.getuid() == 0, 'Root can always open files for writing') - def test_read_only_lock_file(self): - file = tempfile.NamedTemporaryFile() - os.chmod(file.name, 0o444) - lock = UpgradableLock(file.name) - self.assert_raises(UpgradableLock.WriteLockFailed, lock.upgrade) - lock.release() - - class MockArchive: def __init__(self, ts): diff --git a/borg/testsuite/locking.py b/borg/testsuite/locking.py new file mode 100644 index 000000000..6a910edb6 --- /dev/null +++ b/borg/testsuite/locking.py @@ -0,0 +1,24 @@ +import os +import tempfile +import unittest + +from ..locking import UpgradableLock +from . import BaseTestCase + + +class UpgradableLockTestCase(BaseTestCase): + + def test(self): + file = tempfile.NamedTemporaryFile() + lock = UpgradableLock(file.name) + lock.upgrade() + lock.upgrade() + lock.release() + + @unittest.skipIf(os.getuid() == 0, 'Root can always open files for writing') + def test_read_only_lock_file(self): + file = tempfile.NamedTemporaryFile() + os.chmod(file.name, 0o444) + lock = UpgradableLock(file.name) + self.assert_raises(UpgradableLock.WriteLockFailed, lock.upgrade) + lock.release() diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 981cda658..b32315938 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -3,7 +3,8 @@ import shutil import tempfile from ..hashindex import NSIndex -from ..helpers import Location, IntegrityError, UpgradableLock +from ..helpers import Location, IntegrityError +from ..locking import UpgradableLock from ..remote import RemoteRepository, InvalidRPCMethod from ..repository import Repository from . import BaseTestCase From e4c519b1e93cb9906295ecc32c1c7b721a2841ba Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 13 Jul 2015 12:22:49 +0200 Subject: [PATCH 168/241] new locking code exclusive locking by atomic mkdir fs operation on top of that, shared (read) locks and exclusive (write) locks using a json roster. --- borg/cache.py | 2 +- borg/locking.py | 267 ++++++++++++++++++++++++++++++----- borg/repository.py | 4 +- borg/testsuite/locking.py | 116 ++++++++++++--- borg/testsuite/repository.py | 4 +- 5 files changed, 337 insertions(+), 56 deletions(-) diff --git a/borg/cache.py b/borg/cache.py index 937e2a757..37e066685 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -124,7 +124,7 @@ class Cache: def open(self): if not os.path.isdir(self.path): raise Exception('%s Does not look like a Borg cache' % self.path) - self.lock = UpgradableLock(os.path.join(self.path, 'config'), exclusive=True) + self.lock = UpgradableLock(os.path.join(self.path, 'repo'), exclusive=True).acquire() self.rollback() def close(self): diff --git a/borg/locking.py b/borg/locking.py index 0c6df2fee..9d59c654b 100644 --- a/borg/locking.py +++ b/borg/locking.py @@ -1,43 +1,246 @@ -import fcntl +import errno +import json +import os +import socket +import threading +import time from borg.helpers import Error +ADD, REMOVE = 'add', 'remove' +SHARED, EXCLUSIVE = 'shared', 'exclusive' -class UpgradableLock: - class ReadLockFailed(Error): - """Failed to acquire read lock on {}""" +def get_id(): + """Get identification tuple for 'us'""" + hostname = socket.gethostname() + pid = os.getpid() + tid = threading.current_thread().ident & 0xffffffff + return hostname, pid, tid - class WriteLockFailed(Error): - """Failed to acquire write lock on {}""" - def __init__(self, path, exclusive=False): - self.path = path - try: - self.fd = open(path, 'r+') - except IOError: - self.fd = open(path, 'r') - try: - if exclusive: - fcntl.lockf(self.fd, fcntl.LOCK_EX) +class ExclusiveLock: + """An exclusive Lock based on mkdir fs operation being atomic""" + class LockError(Error): + """Failed to acquire the lock {}.""" + + class LockTimeout(LockError): + """Failed to create/acquire the lock {} (timeout).""" + + class LockFailed(LockError): + """Failed to create/acquire the lock {} ({}).""" + + class UnlockError(Error): + """Failed to release the lock {}.""" + + class NotLocked(UnlockError): + """Failed to release the lock {} (was not locked).""" + + class NotMyLock(UnlockError): + """Failed to release the lock {} (was/is locked, but not by me).""" + + def __init__(self, path, timeout=None, sleep=None, id=None): + self.timeout = timeout + self.sleep = sleep + self.path = os.path.abspath(path) + self.id = id or get_id() + self.unique_name = os.path.join(self.path, "%s.%d-%x" % self.id) + + def __enter__(self): + return self.acquire() + + def __exit__(self, *exc): + self.release() + + def __repr__(self): + return "<%s: %r>" % (self.__class__.__name__, self.unique_name) + + def _get_timing(self, timeout, sleep): + if timeout is None: + timeout = self.timeout + start = end = time.time() + if timeout is not None and timeout > 0: + end += timeout + if sleep is None: + sleep = self.sleep + if sleep is None: + if timeout is None: + sleep = 1.0 else: - fcntl.lockf(self.fd, fcntl.LOCK_SH) - # Python 3.2 raises IOError, Python3.3+ raises OSError - except (IOError, OSError): - if exclusive: - raise self.WriteLockFailed(self.path) - else: - raise self.ReadLockFailed(self.path) - self.is_exclusive = exclusive + sleep = max(0, timeout / 10.0) + return start, sleep, end, timeout - def upgrade(self): - try: - fcntl.lockf(self.fd, fcntl.LOCK_EX) - # Python 3.2 raises IOError, Python3.3+ raises OSError - except (IOError, OSError): - raise self.WriteLockFailed(self.path) - self.is_exclusive = True + def acquire(self, timeout=None, sleep=None): + start, sleep, end, timeout = self._get_timing(timeout, sleep) + while True: + try: + os.mkdir(self.path) + except OSError as err: + if err.errno == errno.EEXIST: # already locked + if self.by_me(): + return self + if timeout is not None and time.time() > end: + raise self.LockTimeout(self.path) + time.sleep(sleep) + else: + raise self.LockFailed(self.path, str(err)) + else: + with open(self.unique_name, "wb"): + pass + return self def release(self): - fcntl.lockf(self.fd, fcntl.LOCK_UN) - self.fd.close() + if not self.is_locked(): + raise self.NotLocked(self.path) + if not self.by_me(): + raise self.NotMyLock(self.path) + os.unlink(self.unique_name) + os.rmdir(self.path) + + def is_locked(self): + return os.path.exists(self.path) + + def by_me(self): + return os.path.exists(self.unique_name) + + def break_lock(self): + if self.is_locked(): + for name in os.listdir(self.path): + os.unlink(os.path.join(self.path, name)) + os.rmdir(self.path) + + +class LockRoster: + """ + A Lock Roster to track shared/exclusive lockers. + + Note: you usually should call the methods with an exclusive lock held, + to avoid conflicting access by multiple threads/processes/machines. + """ + def __init__(self, path, id=None): + self.path = path + self.id = id or get_id() + + def load(self): + try: + with open(self.path) as f: + data = json.load(f) + except IOError as err: + if err.errno != errno.ENOENT: + raise + data = {} + return data + + def save(self, data): + with open(self.path, "w") as f: + json.dump(data, f) + + def remove(self): + os.unlink(self.path) + + def get(self, key): + roster = self.load() + return set(tuple(e) for e in roster.get(key, [])) + + def modify(self, key, op): + roster = self.load() + try: + elements = set(tuple(e) for e in roster[key]) + except KeyError: + elements = set() + if op == ADD: + elements.add(self.id) + elif op == REMOVE: + elements.remove(self.id) + else: + raise ValueError('Unknown LockRoster op %r' % op) + roster[key] = list(list(e) for e in elements) + self.save(roster) + + +class UpgradableLock: + """ + A Lock for a resource that can be accessed in a shared or exclusive way. + Typically, write access to a resource needs an exclusive lock (1 writer, + noone is allowed reading) and read access to a resource needs a shared + lock (multiple readers are allowed). + """ + class SharedLockFailed(Error): + """Failed to acquire shared lock [{}]""" + + class ExclusiveLockFailed(Error): + """Failed to acquire write lock [{}]""" + + def __init__(self, path, exclusive=False, sleep=None, id=None): + self.path = path + self.is_exclusive = exclusive + self.sleep = sleep + self.id = id or get_id() + # globally keeping track of shared and exclusive lockers: + self._roster = LockRoster(path + '.roster', id=id) + # an exclusive lock, used for: + # - holding while doing roster queries / updates + # - holding while the UpgradableLock itself is exclusive + self._lock = ExclusiveLock(path + '.lock', id=id) + + def __enter__(self): + return self.acquire() + + def __exit__(self, *exc): + self.release() + + def __repr__(self): + return "<%s: %r>" % (self.__class__.__name__, self.id) + + def acquire(self, exclusive=None, remove=None, sleep=None): + if exclusive is None: + exclusive = self.is_exclusive + sleep = sleep or self.sleep or 0.2 + try: + if exclusive: + self._wait_for_readers_finishing(remove, sleep) + self._roster.modify(EXCLUSIVE, ADD) + else: + with self._lock: + if remove is not None: + self._roster.modify(remove, REMOVE) + self._roster.modify(SHARED, ADD) + self.is_exclusive = exclusive + return self + except ExclusiveLock.LockError as err: + msg = str(err) + if exclusive: + raise self.ExclusiveLockFailed(msg) + else: + raise self.SharedLockFailed(msg) + + def _wait_for_readers_finishing(self, remove, sleep): + while True: + self._lock.acquire() + if remove is not None: + self._roster.modify(remove, REMOVE) + remove = None + if len(self._roster.get(SHARED)) == 0: + return # we are the only one and we keep the lock! + self._lock.release() + time.sleep(sleep) + + def release(self): + if self.is_exclusive: + self._roster.modify(EXCLUSIVE, REMOVE) + self._lock.release() + else: + with self._lock: + self._roster.modify(SHARED, REMOVE) + + def upgrade(self): + if not self.is_exclusive: + self.acquire(exclusive=True, remove=SHARED) + + def downgrade(self): + if self.is_exclusive: + self.acquire(exclusive=False, remove=EXCLUSIVE) + + def break_lock(self): + self._roster.remove() + self._lock.break_lock() diff --git a/borg/repository.py b/borg/repository.py index 0ad99970b..d7db689a0 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -114,11 +114,11 @@ class Repository: self.path = path if not os.path.isdir(path): raise self.DoesNotExist(path) + self.lock = UpgradableLock(os.path.join(path, 'repo'), exclusive).acquire() self.config = RawConfigParser() self.config.read(os.path.join(self.path, 'config')) if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1: raise self.InvalidRepository(path) - self.lock = UpgradableLock(os.path.join(path, 'config'), exclusive) self.max_segment_size = self.config.getint('repository', 'max_segment_size') self.segments_per_dir = self.config.getint('repository', 'segments_per_dir') self.id = unhexlify(self.config.get('repository', 'id').strip()) @@ -149,7 +149,7 @@ class Repository: self._active_txn = True try: self.lock.upgrade() - except UpgradableLock.WriteLockFailed: + except UpgradableLock.ExclusiveLockFailed: # if upgrading the lock to exclusive fails, we do not have an # active transaction. this is important for "serve" mode, where # the repository instance lives on - even if exceptions happened. diff --git a/borg/testsuite/locking.py b/borg/testsuite/locking.py index 6a910edb6..aae5925a6 100644 --- a/borg/testsuite/locking.py +++ b/borg/testsuite/locking.py @@ -1,24 +1,102 @@ -import os -import tempfile -import unittest +import pytest -from ..locking import UpgradableLock -from . import BaseTestCase +from ..locking import get_id, ExclusiveLock, UpgradableLock, LockRoster, ADD, REMOVE, SHARED, EXCLUSIVE -class UpgradableLockTestCase(BaseTestCase): +ID1 = "foo", 1, 1 +ID2 = "bar", 2, 2 - def test(self): - file = tempfile.NamedTemporaryFile() - lock = UpgradableLock(file.name) - lock.upgrade() - lock.upgrade() - lock.release() +def test_id(): + hostname, pid, tid = get_id() + assert isinstance(hostname, str) + assert isinstance(pid, int) + assert isinstance(tid, int) + assert len(hostname) > 0 + assert pid > 0 - @unittest.skipIf(os.getuid() == 0, 'Root can always open files for writing') - def test_read_only_lock_file(self): - file = tempfile.NamedTemporaryFile() - os.chmod(file.name, 0o444) - lock = UpgradableLock(file.name) - self.assert_raises(UpgradableLock.WriteLockFailed, lock.upgrade) - lock.release() + +@pytest.fixture() +def lockpath(tmpdir): + return str(tmpdir.join('lock')) + + +class TestExclusiveLock: + def test_checks(self, lockpath): + with ExclusiveLock(lockpath, timeout=1) as lock: + assert lock.is_locked() and lock.by_me() + + def test_acquire_break_reacquire(self, lockpath): + lock = ExclusiveLock(lockpath, id=ID1).acquire() + lock.break_lock() + with ExclusiveLock(lockpath, id=ID2): + pass + + def test_timeout(self, lockpath): + with ExclusiveLock(lockpath, id=ID1): + with pytest.raises(ExclusiveLock.LockTimeout): + ExclusiveLock(lockpath, id=ID2, timeout=0.1).acquire() + + +class TestUpgradableLock: + def test_shared(self, lockpath): + lock1 = UpgradableLock(lockpath, exclusive=False, id=ID1).acquire() + lock2 = UpgradableLock(lockpath, exclusive=False, id=ID2).acquire() + assert len(lock1._roster.get(SHARED)) == 2 + assert len(lock1._roster.get(EXCLUSIVE)) == 0 + lock1.release() + lock2.release() + + def test_exclusive(self, lockpath): + with UpgradableLock(lockpath, exclusive=True, id=ID1) as lock: + assert len(lock._roster.get(SHARED)) == 0 + assert len(lock._roster.get(EXCLUSIVE)) == 1 + + def test_upgrade(self, lockpath): + with UpgradableLock(lockpath, exclusive=False) as lock: + lock.upgrade() + lock.upgrade() # NOP + assert len(lock._roster.get(SHARED)) == 0 + assert len(lock._roster.get(EXCLUSIVE)) == 1 + + def test_downgrade(self, lockpath): + with UpgradableLock(lockpath, exclusive=True) as lock: + lock.downgrade() + lock.downgrade() # NOP + assert len(lock._roster.get(SHARED)) == 1 + assert len(lock._roster.get(EXCLUSIVE)) == 0 + + def test_break(self, lockpath): + lock = UpgradableLock(lockpath, exclusive=True, id=ID1).acquire() + lock.break_lock() + assert len(lock._roster.get(SHARED)) == 0 + assert len(lock._roster.get(EXCLUSIVE)) == 0 + with UpgradableLock(lockpath, exclusive=True, id=ID2): + pass + + +@pytest.fixture() +def rosterpath(tmpdir): + return str(tmpdir.join('roster')) + + +class TestLockRoster: + def test_empty(self, rosterpath): + roster = LockRoster(rosterpath) + empty = roster.load() + roster.save(empty) + assert empty == {} + + def test_modify_get(self, rosterpath): + roster1 = LockRoster(rosterpath, id=ID1) + assert roster1.get(SHARED) == set() + roster1.modify(SHARED, ADD) + assert roster1.get(SHARED) == {ID1, } + roster2 = LockRoster(rosterpath, id=ID2) + roster2.modify(SHARED, ADD) + assert roster2.get(SHARED) == {ID1, ID2, } + roster1 = LockRoster(rosterpath, id=ID1) + roster1.modify(SHARED, REMOVE) + assert roster1.get(SHARED) == {ID2, } + roster2 = LockRoster(rosterpath, id=ID2) + roster2.modify(SHARED, REMOVE) + assert roster2.get(SHARED) == set() diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index b32315938..1c9fd072d 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -157,9 +157,9 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): for name in os.listdir(self.repository.path): if name.startswith('index.'): os.unlink(os.path.join(self.repository.path, name)) - with patch.object(UpgradableLock, 'upgrade', side_effect=UpgradableLock.WriteLockFailed) as upgrade: + with patch.object(UpgradableLock, 'upgrade', side_effect=UpgradableLock.ExclusiveLockFailed) as upgrade: self.reopen() - self.assert_raises(UpgradableLock.WriteLockFailed, lambda: len(self.repository)) + self.assert_raises(UpgradableLock.ExclusiveLockFailed, lambda: len(self.repository)) upgrade.assert_called_once_with() def test_crash_before_write_index(self): From 2deb520e671cfbe0f8cff3b1152604958fe49118 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 13 Jul 2015 16:45:18 +0200 Subject: [PATCH 169/241] locking code: extract timeout/sleep code into reusable TimeoutTimer class --- borg/locking.py | 70 ++++++++++++++++++++++++++++++--------- borg/testsuite/locking.py | 21 +++++++++++- 2 files changed, 75 insertions(+), 16 deletions(-) diff --git a/borg/locking.py b/borg/locking.py index 9d59c654b..735429260 100644 --- a/borg/locking.py +++ b/borg/locking.py @@ -19,6 +19,58 @@ def get_id(): return hostname, pid, tid +class TimeoutTimer: + """ + A timer for timeout checks (can also deal with no timeout, give timeout=None [default]). + It can also compute and optionally execute a reasonable sleep time (e.g. to avoid + polling too often or to support thread/process rescheduling). + """ + def __init__(self, timeout=None, sleep=None): + """ + Initialize a timer. + + :param timeout: time out interval [s] or None (no timeout) + :param sleep: sleep interval [s] (>= 0: do sleep call, <0: don't call sleep) + or None (autocompute: use 10% of timeout, or 1s for no timeout) + """ + if timeout is not None and timeout < 0: + raise ValueError("timeout must be >= 0") + self.timeout_interval = timeout + if sleep is None: + if timeout is None: + sleep = 1.0 + else: + sleep = timeout / 10.0 + self.sleep_interval = sleep + self.start_time = None + self.end_time = None + + def __repr__(self): + return "<%s: start=%r end=%r timeout=%r sleep=%r>" % ( + self.__class__.__name__, self.start_time, self.end_time, + self.timeout_interval, self.sleep_interval) + + def start(self): + self.start_time = time.time() + if self.timeout_interval is not None: + self.end_time = self.start_time + self.timeout_interval + return self + + def sleep(self): + if self.sleep_interval >= 0: + time.sleep(self.sleep_interval) + + def timed_out(self): + return self.end_time is not None and time.time() >= self.end_time + + def timed_out_or_sleep(self): + if self.timed_out(): + return True + else: + self.sleep() + return False + + class ExclusiveLock: """An exclusive Lock based on mkdir fs operation being atomic""" class LockError(Error): @@ -55,23 +107,12 @@ class ExclusiveLock: def __repr__(self): return "<%s: %r>" % (self.__class__.__name__, self.unique_name) - def _get_timing(self, timeout, sleep): + def acquire(self, timeout=None, sleep=None): if timeout is None: timeout = self.timeout - start = end = time.time() - if timeout is not None and timeout > 0: - end += timeout if sleep is None: sleep = self.sleep - if sleep is None: - if timeout is None: - sleep = 1.0 - else: - sleep = max(0, timeout / 10.0) - return start, sleep, end, timeout - - def acquire(self, timeout=None, sleep=None): - start, sleep, end, timeout = self._get_timing(timeout, sleep) + timer = TimeoutTimer(timeout, sleep).start() while True: try: os.mkdir(self.path) @@ -79,9 +120,8 @@ class ExclusiveLock: if err.errno == errno.EEXIST: # already locked if self.by_me(): return self - if timeout is not None and time.time() > end: + if timer.timed_out_or_sleep(): raise self.LockTimeout(self.path) - time.sleep(sleep) else: raise self.LockFailed(self.path, str(err)) else: diff --git a/borg/testsuite/locking.py b/borg/testsuite/locking.py index aae5925a6..4b36e0caa 100644 --- a/borg/testsuite/locking.py +++ b/borg/testsuite/locking.py @@ -1,6 +1,8 @@ +import time + import pytest -from ..locking import get_id, ExclusiveLock, UpgradableLock, LockRoster, ADD, REMOVE, SHARED, EXCLUSIVE +from ..locking import get_id, TimeoutTimer, ExclusiveLock , UpgradableLock, LockRoster, ADD, REMOVE, SHARED, EXCLUSIVE ID1 = "foo", 1, 1 @@ -15,6 +17,23 @@ def test_id(): assert pid > 0 +class TestTimeoutTimer: + def test_timeout(self): + timeout = 0.5 + t = TimeoutTimer(timeout).start() + assert not t.timed_out() + time.sleep(timeout * 1.5) + assert t.timed_out() + + def test_notimeout_sleep(self): + timeout, sleep = None, 0.5 + t = TimeoutTimer(timeout, sleep).start() + assert not t.timed_out_or_sleep() + assert time.time() >= t.start_time + 1 * sleep + assert not t.timed_out_or_sleep() + assert time.time() >= t.start_time + 2 * sleep + + @pytest.fixture() def lockpath(tmpdir): return str(tmpdir.join('lock')) From 43bc1d5b6afaf67b856407cbcd7813519ba34e79 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 13 Jul 2015 18:45:49 +0200 Subject: [PATCH 170/241] improved tox configuration, documented there how to invoke it the usage of some py.test features in the previous locking changesets somehow broke the test environment. it seems to help to point py.test to some specific package or module (and not just invoke "py.test" without arguments). --- tox.ini | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tox.ini b/tox.ini index 68d2e6a61..79603cda9 100644 --- a/tox.ini +++ b/tox.ini @@ -1,11 +1,27 @@ +# tox configuration - if you change anything here, run this to verify: +# fakeroot -u tox --recreate +# +# Invokation examples: +# fakeroot -u tox # run all tests +# fakeroot -u tox -e py32 # run all tests, but only on python 3.2 +# fakeroot -u tox borg.testsuite.locking # only run 1 test module +# fakeroot -u tox borg.testsuite.locking -- -k '"not Timer"' # exclude some tests +# fakeroot -u tox borg.testsuite -- -v # verbose py.test +# +# Important notes: +# Without fakeroot -u some tests will fail. +# When using -- to give options to py.test, you MUST also give borg.testsuite[.module]. + [tox] envlist = py32, py33, py34 [testenv] -# Change dir to avoid import problem -changedir = {envdir} +# Change dir to avoid import problem for cython code. The directory does +# not really matter, should be just different from the toplevel dir. +changedir = {toxworkdir} deps = pytest mock -commands = py.test -passenv = * # fakeroot -u needs some env vars +commands = py.test --pyargs {posargs:borg.testsuite} +# fakeroot -u needs some env vars: +passenv = * From b539169ec17f4a91b79b5a5bd15e7a2182eef0fe Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 13 Jul 2015 23:03:09 +0200 Subject: [PATCH 171/241] update CHANGES --- CHANGES | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/CHANGES b/CHANGES index 2acb58115..ef9376f80 100644 --- a/CHANGES +++ b/CHANGES @@ -7,31 +7,50 @@ Version 0.24.0 New features: -- borg create --chunker-params ... to configure the chunker. +- borg create --chunker-params ... to configure the chunker, fixes #16 + (attic #302, attic #300, and somehow also #41). + This can be used to reduce memory usage caused by chunk management overhead, + so borg does not create a huge chunks index/repo index and eats all your RAM + if you back up lots of data in huge files (like VM disk images). See docs/misc/create_chunker-params.txt for more information. - borg info now reports chunk counts in the chunk index. +- borg create --compression 0..9 to select zlib compression level, fixes #66 + (attic #295). +- improve at-end error logging, always log exceptions and set exit_code=1 +- LoggedIO: better error checks / exceptions / exception handling Bug fixes: -- reduce memory usage, see --chunker-params, fixes #16. - This can be used to reduce chunk management overhead, so borg does not create - a huge chunks index/repo index and eats all your RAM if you back up lots of - data in huge files (like VM disk images). -- better Exception msg if there is no Borg installed on the remote repo server. +- more compatible repository locking code (based on mkdir), maybe fixes #92 + (attic #317, attic #201). +- better Exception msg if no Borg is installed on the remote repo server, #56 +- create a RepositoryCache implementation that can cope with >2GiB, + fixes attic #326. +- clarify help text, fixes #73. Other changes: -- Fedora/Fedora-based install instructions added to docs. -- added docs/misc directory for misc. writeups that won't be included "as is" - into the html docs. - - +- improved docs: + - added docs/misc directory for misc. writeups that won't be included + "as is" into the html docs. + - document environment variables and return codes (attic #324, attic #52) + - web site: add related projects, fix web site url, IRC #borgbackup + - Fedora/Fedora-based install instructions added to docs + - updated AUTHORS +- use borg-tmp as prefix for temporary files / directories +- short prune options without "keep-" are deprecated, do not suggest them +- improved tox configuration, documented there how to invoke it +- remove usage of unittest.mock, always use mock from pypi +- use entrypoints instead of scripts, for better use of the wheel format and + modern installs + I forgot to list some stuff already implemented in 0.23.0, here they are: New features: - efficient archive list from manifest, meaning a big speedup for slow - repo connections and "list ", "delete ", "prune" + repo connections and "list ", "delete ", "prune" (attic #242, + attic #167) - big speedup for chunks cache sync (esp. for slow repo connections), fixes #18 - hashindex: improve error messages From b2f460d5910db54130f63825bae1e1d04cc6abaa Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 13 Jul 2015 23:20:46 +0200 Subject: [PATCH 172/241] fix filenames used for locking, update docs about locking --- borg/cache.py | 2 +- borg/locking.py | 2 +- borg/repository.py | 2 +- docs/internals.rst | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/borg/cache.py b/borg/cache.py index 37e066685..1b66bc1b9 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -124,7 +124,7 @@ class Cache: def open(self): if not os.path.isdir(self.path): raise Exception('%s Does not look like a Borg cache' % self.path) - self.lock = UpgradableLock(os.path.join(self.path, 'repo'), exclusive=True).acquire() + self.lock = UpgradableLock(os.path.join(self.path, 'lock'), exclusive=True).acquire() self.rollback() def close(self): diff --git a/borg/locking.py b/borg/locking.py index 735429260..9bed13e19 100644 --- a/borg/locking.py +++ b/borg/locking.py @@ -221,7 +221,7 @@ class UpgradableLock: # an exclusive lock, used for: # - holding while doing roster queries / updates # - holding while the UpgradableLock itself is exclusive - self._lock = ExclusiveLock(path + '.lock', id=id) + self._lock = ExclusiveLock(path + '.exclusive', id=id) def __enter__(self): return self.acquire() diff --git a/borg/repository.py b/borg/repository.py index d7db689a0..60831c8ee 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -114,7 +114,7 @@ class Repository: self.path = path if not os.path.isdir(path): raise self.DoesNotExist(path) - self.lock = UpgradableLock(os.path.join(path, 'repo'), exclusive).acquire() + self.lock = UpgradableLock(os.path.join(path, 'lock'), exclusive).acquire() self.config = RawConfigParser() self.config.read(os.path.join(self.path, 'config')) if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1: diff --git a/docs/internals.rst b/docs/internals.rst index 2928b284f..30237e61a 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -26,7 +26,7 @@ README simple text file telling that this is a |project_name| repository config - repository configuration and lock file + repository configuration data/ directory where the actual data is stored @@ -37,6 +37,9 @@ hints.%d index.%d repository index +lock.roster and lock.exclusive/* + used by the locking system to manage shared and exclusive locks + Config file ----------- @@ -55,9 +58,6 @@ identifier for repositories. It will not change if you move the repository around so you can make a local transfer then decide to move the repository to another (even remote) location at a later time. -|project_name| will do a POSIX read lock on the config file when operating -on the repository. - Keys ---- From b5bdb52b6a27d98aaf487309c3ff5a12620b5fe1 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 14 Jul 2015 00:43:35 +0200 Subject: [PATCH 173/241] update internals doc about chunker params, memory usage and compression --- docs/internals.rst | 59 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 13 deletions(-) diff --git a/docs/internals.rst b/docs/internals.rst index 30237e61a..6dfc8ba9b 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -168,13 +168,27 @@ A chunk is stored as an object as well, of course. Chunks ------ -|project_name| uses a rolling hash computed by the Buzhash_ algorithm, with a -window size of 4095 bytes (`0xFFF`), with a minimum chunk size of 1024 bytes. -It triggers (chunks) when the last 16 bits of the hash are zero, producing -chunks of 64kiB on average. +The |project_name| chunker uses a rolling hash computed by the Buzhash_ algorithm. +It triggers (chunks) when the last HASH_MASK_BITS bits of the hash are zero, +producing chunks of 2^HASH_MASK_BITS Bytes on average. + +create --chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE +can be used to tune the chunker parameters, the default is: + +- CHUNK_MIN_EXP = 10 (minimum chunk size = 2^10 B = 1 kiB) +- CHUNK_MAX_EXP = 23 (maximum chunk size = 2^23 B = 8 MiB) +- HASH_MASK_BITS = 16 (statistical medium chunk size ~= 2^16 B = 64 kiB) +- HASH_WINDOW_SIZE = 4095 [B] (`0xFFF`) + +The default parameters are OK for relatively small backup data volumes and +repository sizes and a lot of available memory (RAM) and disk space for the +chunk index. If that does not apply, you are advised to tune these parameters +to keep the chunk count lower than with the defaults. The buzhash table is altered by XORing it with a seed randomly generated once -for the archive, and stored encrypted in the keyfile. +for the archive, and stored encrypted in the keyfile. This is to prevent chunk +size based fingerprinting attacks on your encrypted repo contents (to guess +what files you have based on a specific set of chunk sizes). Indexes / Caches @@ -243,7 +257,7 @@ Indexes / Caches memory usage Here is the estimated memory usage of |project_name|: - chunk_count ~= total_file_size / 65536 + chunk_count ~= total_file_size / 2 ^ HASH_MASK_BITS repo_index_usage = chunk_count * 40 @@ -252,20 +266,32 @@ Here is the estimated memory usage of |project_name|: files_cache_usage = total_file_count * 240 + chunk_count * 80 mem_usage ~= repo_index_usage + chunks_cache_usage + files_cache_usage - = total_file_count * 240 + total_file_size / 400 + = chunk_count * 164 + total_file_count * 240 All units are Bytes. -It is assuming every chunk is referenced exactly once and that typical chunk size is 64kiB. +It is assuming every chunk is referenced exactly once (if you have a lot of +duplicate chunks, you will have less chunks than estimated above). + +It is also assuming that typical chunk size is 2^HASH_MASK_BITS (if you have +a lot of files smaller than this statistical medium chunk size, you will have +more chunks than estimated above, because 1 file is at least 1 chunk). If a remote repository is used the repo index will be allocated on the remote side. -E.g. backing up a total count of 1Mi files with a total size of 1TiB: +E.g. backing up a total count of 1Mi files with a total size of 1TiB. - mem_usage = 1 * 2**20 * 240 + 1 * 2**40 / 400 = 2.8GiB +a) with create --chunker-params 10,23,16,4095 (default): -Note: there is a commandline option to switch off the files cache. You'll save -some memory, but it will need to read / chunk all the files then. + mem_usage = 2.8GiB + +b) with create --chunker-params 10,23,20,4095 (custom): + + mem_usage = 0.4GiB + +Note: there is also the --no-files-cache option to switch off the files cache. +You'll save some memory, but it will need to read / chunk all the files then as +it can not skip unmodified files then. Encryption @@ -291,6 +317,7 @@ Encryption keys are either derived from a passphrase or kept in a key file. The passphrase is passed through the ``BORG_PASSPHRASE`` environment variable or prompted for interactive usage. + Key files --------- @@ -355,4 +382,10 @@ representation of the repository id. Compression ----------- -Currently, compression is disabled by default. Zlib compression can be enabled by passing ``--compression level`` on the command line. Level can be anything from 0 (no compression, fast) to 9 (high compression, slow). +|project_name| currently always pipes all data through a zlib compressor which +supports compression levels 0 (no compression, fast) to 9 (high compression, slow). + +See ``borg create --help`` about how to specify the compression level and its default. + +Note: zlib level 0 creates a little bit more output data than it gets as input, +due to zlib protocol overhead. From 3c34ef8145f91c97adfb1136768b42043699ad2e Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 14 Jul 2015 20:25:10 +0200 Subject: [PATCH 174/241] clarify that borg extract uses the cwd as extraction target --- docs/usage.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/usage.rst b/docs/usage.rst index d5d3a94a9..a77b608e6 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -123,6 +123,9 @@ Examples # Extract the "src" directory but exclude object files $ borg extract /mnt/backup::my-files home/USERNAME/src --exclude '*.o' +Note: currently, extract always writes into the current working directory ("."), + so make sure you ``cd`` to the right place before calling ``borg extract``. + .. include:: usage/check.rst.inc .. include:: usage/delete.rst.inc From f330b4bd2afae73fdc4cf18c10bb1ff005e66c3a Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 14 Jul 2015 21:31:35 +0200 Subject: [PATCH 175/241] some doc updates, see below faq about redundancy / integrity compression is optional having borg installed on backup server is optional (but faster) cygwin installation tipps do not document passphrase encryption mode example, use keyfile mode --- docs/faq.rst | 19 +++++++++++++++++-- docs/foreword.rst | 7 +++++-- docs/index.rst | 4 ++-- docs/installation.rst | 32 ++++++++++++++++++++++++++++++-- docs/usage.rst | 4 ++-- 5 files changed, 56 insertions(+), 10 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index ddfb0c834..06ea63bd2 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -70,8 +70,9 @@ When backing up to remote encrypted repos, is encryption done locally? When backing up to remote servers, do I have to trust the remote server? Yes and No. - No, as far as data confidentiality is concerned - all your files/dirs data - and metadata are stored in their encrypted form into the repository. + No, as far as data confidentiality is concerned - if you use encryption, + all your files/dirs data and metadata are stored in their encrypted form + into the repository. Yes, as an attacker with access to the remote server could delete (or otherwise make unavailable) all your backups. @@ -90,6 +91,20 @@ If I want to run |project_name| on a ARM CPU older than ARM v6? echo "2" > /proc/cpu/alignment +Can |project_name| add redundancy to the backup data to deal with hardware malfunction? + No, it can't. While that at first sounds like a good idea to defend against some + defect HDD sectors or SSD flash blocks, dealing with this in a reliable way needs a lot + of low-level storage layout information and control which we do not have (and also can't + get, even if we wanted). + + So, if you need that, consider RAID1 or a filesystems that offers redundant storage. + +Can |project_name| verify data integrity of a backup archive? + Yes, if you want to detect accidental data damage (like bit rot), use the ``check`` + operation. It will notice corruption using CRCs and hashes. + If you want to be able to detect malicious tampering also, use a encrypted repo. + It will then be able to check using CRCs and HMACs. + Why was Borg forked from Attic? Borg was created in May 2015 in response to the difficulty of getting new code or larger changes incorporated into Attic and diff --git a/docs/foreword.rst b/docs/foreword.rst index dc81e493e..c3f70c42e 100644 --- a/docs/foreword.rst +++ b/docs/foreword.rst @@ -15,7 +15,7 @@ Space efficient storage Variable block size `deduplication`_ is used to reduce the number of bytes stored by detecting redundant data. Each file is split into a number of variable length chunks and only chunks that have never been seen before - are compressed and added to the repository. + are added to the repository (and optionally compressed). Optional data encryption All data can be protected using 256-bit AES_ encryption and data integrity @@ -23,7 +23,10 @@ Optional data encryption Off-site backups |project_name| can store data on any remote host accessible over SSH as - long as |project_name| is installed. + long as |project_name| is installed. If you don't have |project_name| + installed there, you can use some network filesytem (sshfs, nfs, ...) + to mount a filesystem located on your remote host and use it like it was + local (but that will be slower). Backups mountable as filesystems Backup archives are :ref:`mountable ` as diff --git a/docs/index.rst b/docs/index.rst index fad266ed3..8ca4fe092 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,8 +2,8 @@ Welcome to Borg ================ -|project_name| is a deduplicating and compressing backup program. -Optionally, it also supports authenticated encryption. +|project_name| is a deduplicating backup program. +Optionally, it also supports compression and authenticated encryption. The main goal of |project_name| is to provide an efficient and secure way to backup data. The data deduplication technique used makes |project_name| diff --git a/docs/installation.rst b/docs/installation.rst index 11f13fc7a..b1a20d8f2 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -24,7 +24,7 @@ archive as a FUSE filesystem. Only FUSE >= 2.8.0 can support llfuse. You only need Cython to compile the .pyx files to the respective .c files when using |project_name| code from git. For |project_name| releases, the .c -files will be bundled. +files will be bundled, so you won't need Cython to install a release. Platform notes -------------- @@ -32,7 +32,7 @@ FreeBSD: You may need to get a recent enough OpenSSL version from FreeBSD ports. Mac OS X: You may need to get a recent enough OpenSSL version from homebrew_. -Mac OS X: A recent enough FUSE implementation might be unavailable. +Mac OS X: You need OS X FUSE >= 3.0. Debian / Ubuntu installation (from git) @@ -119,3 +119,31 @@ Some of the steps detailled below might be useful also for non-git installs. # optional: run all the tests, on all supported Python versions fakeroot -u tox + + +Cygwin (from git) +----------------- +Please note that running under cygwin is rather experimental. + +You'll need at least (use the cygwin installer to fetch/install these): + +:: + python3 + python3-setuptools + python3-cython + binutils + gcc-core + git + libopenssl + make + openssh + openssl-devel + +You can then install ``pip`` and ``virtualenv``: + +:: + + easy_install pip + pip install virtualenv + +And now continue as for Linux (see above). diff --git a/docs/usage.rst b/docs/usage.rst index a77b608e6..c13e4a158 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -78,8 +78,8 @@ Examples # Remote repository (accesses a remote borg via ssh) $ borg init user@hostname:backup - # Encrypted remote repository - $ borg init --encryption=passphrase user@hostname:backup + # Encrypted remote repository, using a local key file + $ borg init --encryption=keyfile user@hostname:backup .. include:: usage/create.rst.inc From b6445655460e8c61326410d024a30a377e8cd882 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 15 Jul 2015 00:01:07 +0200 Subject: [PATCH 176/241] repo key mode (and deprecate passphrase mode), fixes #85 see usage.rst change for a description and why this is needed --- borg/archiver.py | 2 +- borg/key.py | 274 +++++++++++++++++++++++++------------- borg/remote.py | 11 ++ borg/repository.py | 19 ++- borg/testsuite/archive.py | 4 +- docs/usage.rst | 39 +++++- 6 files changed, 249 insertions(+), 100 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 8ddebd210..6275edf22 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -539,7 +539,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") type=location_validator(archive=False), help='repository to create') subparser.add_argument('-e', '--encryption', dest='encryption', - choices=('none', 'passphrase', 'keyfile'), default='none', + choices=('none', 'passphrase', 'keyfile', 'repokey'), default='none', help='select encryption method') check_epilog = textwrap.dedent(""" diff --git a/borg/key.py b/borg/key.py index 31267d0d9..fabdae5b3 100644 --- a/borg/key.py +++ b/borg/key.py @@ -1,5 +1,6 @@ from binascii import hexlify, a2b_base64, b2a_base64 -from getpass import getpass +import configparser +import getpass import os import msgpack import textwrap @@ -23,6 +24,11 @@ class KeyfileNotFoundError(Error): """ +class RepoKeyNotFoundError(Error): + """No key entry found in the config of repository {}. + """ + + class HMAC(hmac.HMAC): """Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews """ @@ -33,27 +39,35 @@ class HMAC(hmac.HMAC): def key_creator(repository, args): if args.encryption == 'keyfile': return KeyfileKey.create(repository, args) - elif args.encryption == 'passphrase': + elif args.encryption == 'repokey': + return RepoKey.create(repository, args) + elif args.encryption == 'passphrase': # deprecated, kill in 1.x return PassphraseKey.create(repository, args) else: return PlaintextKey.create(repository, args) def key_factory(repository, manifest_data): - if manifest_data[0] == KeyfileKey.TYPE: + key_type = manifest_data[0] + if key_type == KeyfileKey.TYPE: return KeyfileKey.detect(repository, manifest_data) - elif manifest_data[0] == PassphraseKey.TYPE: + elif key_type == RepoKey.TYPE: + return RepoKey.detect(repository, manifest_data) + elif key_type == PassphraseKey.TYPE: # deprecated, kill in 1.x return PassphraseKey.detect(repository, manifest_data) - elif manifest_data[0] == PlaintextKey.TYPE: + elif key_type == PlaintextKey.TYPE: return PlaintextKey.detect(repository, manifest_data) else: - raise UnsupportedPayloadError(manifest_data[0]) + raise UnsupportedPayloadError(key_type) class KeyBase: + TYPE = None # override in subclasses - def __init__(self): + def __init__(self, repository): self.TYPE_STR = bytes([self.TYPE]) + self.repository = repository + self.target = None # key location file path / repo obj self.compression_level = 0 def id_hash(self, data): @@ -74,12 +88,12 @@ class PlaintextKey(KeyBase): @classmethod def create(cls, repository, args): - print('Encryption NOT enabled.\nUse the "--encryption=passphrase|keyfile" to enable encryption.') - return cls() + print('Encryption NOT enabled.\nUse the "--encryption=repokey|keyfile|passphrase" to enable encryption.') + return cls(repository) @classmethod def detect(cls, repository, manifest_data): - return cls() + return cls(repository) def id_hash(self, data): return sha256(data).digest() @@ -155,38 +169,65 @@ class AESKeyBase(KeyBase): self.dec_cipher = AES(is_encrypt=False, key=self.enc_key) +class Passphrase(str): + @classmethod + def env_passphrase(cls, default=None): + passphrase = os.environ.get('BORG_PASSPHRASE', default) + if passphrase is not None: + return cls(passphrase) + + @classmethod + def getpass(cls, prompt): + return cls(getpass.getpass(prompt)) + + @classmethod + def new(cls, allow_empty=False): + passphrase = cls.env_passphrase() + if passphrase is not None: + return passphrase + while True: + passphrase = cls.getpass('Enter new passphrase: ') + if allow_empty or passphrase: + passphrase2 = cls.getpass('Enter same passphrase again: ') + if passphrase == passphrase2: + print('Remember your passphrase. Your data will be inaccessible without it.') + return passphrase + else: + print('Passphrases do not match') + else: + print('Passphrase must not be blank') + + def __repr__(self): + return '' + + def kdf(self, salt, iterations, length): + return pbkdf2_sha256(self.encode('utf-8'), salt, iterations, length) + + class PassphraseKey(AESKeyBase): + # This mode is DEPRECATED and will be killed at 1.0 release. + # With this mode: + # - you can never ever change your passphrase for existing repos. + # - you can never ever use a different iterations count for existing repos. TYPE = 0x01 - iterations = 100000 + iterations = 100000 # must not be changed ever! @classmethod def create(cls, repository, args): - key = cls() - passphrase = os.environ.get('BORG_PASSPHRASE') - if passphrase is not None: - passphrase2 = passphrase - else: - passphrase, passphrase2 = 1, 2 - while passphrase != passphrase2: - passphrase = getpass('Enter passphrase: ') - if not passphrase: - print('Passphrase must not be blank') - continue - passphrase2 = getpass('Enter same passphrase again: ') - if passphrase != passphrase2: - print('Passphrases do not match') + key = cls(repository) + print('WARNING: "passphrase" mode is deprecated and will be removed in 1.0.') + print('If you want something similar (but with less issues), use "repokey" mode.') + passphrase = Passphrase.new(allow_empty=False) key.init(repository, passphrase) - if passphrase: - print('Remember your passphrase. Your data will be inaccessible without it.') return key @classmethod def detect(cls, repository, manifest_data): prompt = 'Enter passphrase for %s: ' % repository._location.orig - key = cls() - passphrase = os.environ.get('BORG_PASSPHRASE') + key = cls(repository) + passphrase = Passphrase.env_passphrase() if passphrase is None: - passphrase = getpass(prompt) + passphrase = Passphrase.getpass(prompt) while True: key.init(repository, passphrase) try: @@ -195,7 +236,7 @@ class PassphraseKey(AESKeyBase): key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks)) return key except IntegrityError: - passphrase = getpass(prompt) + passphrase = Passphrase.getpass(prompt) def change_passphrase(self): class ImmutablePassphraseError(Error): @@ -204,41 +245,31 @@ class PassphraseKey(AESKeyBase): raise ImmutablePassphraseError def init(self, repository, passphrase): - self.init_from_random_data(pbkdf2_sha256(passphrase.encode('utf-8'), repository.id, self.iterations, 100)) + self.init_from_random_data(passphrase.kdf(repository.id, self.iterations, 100)) self.init_ciphers() -class KeyfileKey(AESKeyBase): - FILE_ID = 'BORG_KEY' - TYPE = 0x00 - +class KeyfileKeyBase(AESKeyBase): @classmethod def detect(cls, repository, manifest_data): - key = cls() - path = cls.find_key_file(repository) - prompt = 'Enter passphrase for key file %s: ' % path - passphrase = os.environ.get('BORG_PASSPHRASE', '') - while not key.load(path, passphrase): - passphrase = getpass(prompt) + key = cls(repository) + target = key.find_key() + prompt = 'Enter passphrase for key %s: ' % target + passphrase = Passphrase.env_passphrase(default='') + while not key.load(target, passphrase): + passphrase = Passphrase.getpass(prompt) num_blocks = num_aes_blocks(len(manifest_data) - 41) key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks)) return key - @classmethod - def find_key_file(cls, repository): - id = hexlify(repository.id).decode('ascii') - keys_dir = get_keys_dir() - for name in os.listdir(keys_dir): - filename = os.path.join(keys_dir, name) - with open(filename, 'r') as fd: - line = fd.readline().strip() - if line and line.startswith(cls.FILE_ID) and line[len(cls.FILE_ID)+1:] == id: - return filename - raise KeyfileNotFoundError(repository._location.canonical_path(), get_keys_dir()) + def find_key(self): + raise NotImplementedError - def load(self, filename, passphrase): - with open(filename, 'r') as fd: - cdata = a2b_base64(''.join(fd.readlines()[1:]).encode('ascii')) # .encode needed for Python 3.[0-2] + def load(self, target, passphrase): + raise NotImplementedError + + def _load(self, key_data, passphrase): + cdata = a2b_base64(key_data.encode('ascii')) # .encode needed for Python 3.[0-2] data = self.decrypt_key_file(cdata, passphrase) if data: key = msgpack.unpackb(data) @@ -249,23 +280,22 @@ class KeyfileKey(AESKeyBase): self.enc_hmac_key = key[b'enc_hmac_key'] self.id_key = key[b'id_key'] self.chunk_seed = key[b'chunk_seed'] - self.path = filename return True + return False def decrypt_key_file(self, data, passphrase): d = msgpack.unpackb(data) assert d[b'version'] == 1 assert d[b'algorithm'] == b'sha256' - key = pbkdf2_sha256(passphrase.encode('utf-8'), d[b'salt'], d[b'iterations'], 32) + key = passphrase.kdf(d[b'salt'], d[b'iterations'], 32) data = AES(is_encrypt=False, key=key).decrypt(d[b'data']) - if HMAC(key, data, sha256).digest() != d[b'hash']: - return None - return data + if HMAC(key, data, sha256).digest() == d[b'hash']: + return data def encrypt_key_file(self, data, passphrase): salt = get_random_bytes(32) iterations = 100000 - key = pbkdf2_sha256(passphrase.encode('utf-8'), salt, iterations, 32) + key = passphrase.kdf(salt, iterations, 32) hash = HMAC(key, data, sha256).digest() cdata = AES(is_encrypt=True, key=key).encrypt(data) d = { @@ -278,7 +308,7 @@ class KeyfileKey(AESKeyBase): } return msgpack.packb(d) - def save(self, path, passphrase): + def _save(self, passphrase): key = { 'version': 1, 'repository_id': self.repository_id, @@ -288,45 +318,101 @@ class KeyfileKey(AESKeyBase): 'chunk_seed': self.chunk_seed, } data = self.encrypt_key_file(msgpack.packb(key), passphrase) - with open(path, 'w') as fd: - fd.write('%s %s\n' % (self.FILE_ID, hexlify(self.repository_id).decode('ascii'))) - fd.write('\n'.join(textwrap.wrap(b2a_base64(data).decode('ascii')))) - fd.write('\n') - self.path = path + key_data = '\n'.join(textwrap.wrap(b2a_base64(data).decode('ascii'))) + return key_data def change_passphrase(self): - passphrase, passphrase2 = 1, 2 - while passphrase != passphrase2: - passphrase = getpass('New passphrase: ') - passphrase2 = getpass('Enter same passphrase again: ') - if passphrase != passphrase2: - print('Passphrases do not match') - self.save(self.path, passphrase) - print('Key file "%s" updated' % self.path) + passphrase = Passphrase.new(allow_empty=True) + self.save(self.target, passphrase) + print('Key updated') @classmethod def create(cls, repository, args): + passphrase = Passphrase.new(allow_empty=True) + key = cls(repository) + key.repository_id = repository.id + key.init_from_random_data(get_random_bytes(100)) + key.init_ciphers() + target = key.get_new_target(args) + key.save(target, passphrase) + print('Key in "%s" created.' % target) + print('Keep this key safe. Your data will be inaccessible without it.') + return key + + def save(self, target, passphrase): + raise NotImplementedError + + def get_new_target(self, args): + raise NotImplementedError + + +class KeyfileKey(KeyfileKeyBase): + TYPE = 0x00 + FILE_ID = 'BORG_KEY' + + def find_key(self): + id = hexlify(self.repository.id).decode('ascii') + keys_dir = get_keys_dir() + for name in os.listdir(keys_dir): + filename = os.path.join(keys_dir, name) + with open(filename, 'r') as fd: + line = fd.readline().strip() + if line.startswith(self.FILE_ID) and line[len(self.FILE_ID)+1:] == id: + return filename + raise KeyfileNotFoundError(self.repository._location.canonical_path(), get_keys_dir()) + + def get_new_target(self, args): filename = args.repository.to_key_filename() path = filename i = 1 while os.path.exists(path): i += 1 path = filename + '.%d' % i - passphrase = os.environ.get('BORG_PASSPHRASE') - if passphrase is not None: - passphrase2 = passphrase - else: - passphrase, passphrase2 = 1, 2 - while passphrase != passphrase2: - passphrase = getpass('Enter passphrase (empty for no passphrase):') - passphrase2 = getpass('Enter same passphrase again: ') - if passphrase != passphrase2: - print('Passphrases do not match') - key = cls() - key.repository_id = repository.id - key.init_from_random_data(get_random_bytes(100)) - key.init_ciphers() - key.save(path, passphrase) - print('Key file "%s" created.' % key.path) - print('Keep this file safe. Your data will be inaccessible without it.') - return key + return path + + def load(self, target, passphrase): + with open(target, 'r') as fd: + key_data = ''.join(fd.readlines()[1:]) + success = self._load(key_data, passphrase) + if success: + self.target = target + return success + + def save(self, target, passphrase): + key_data = self._save(passphrase) + with open(target, 'w') as fd: + fd.write('%s %s\n' % (self.FILE_ID, hexlify(self.repository_id).decode('ascii'))) + fd.write(key_data) + fd.write('\n') + self.target = target + + +class RepoKey(KeyfileKeyBase): + TYPE = 0x03 + + def find_key(self): + loc = self.repository._location.canonical_path() + try: + self.repository.load_key() + return loc + except configparser.NoOptionError: + raise RepoKeyNotFoundError(loc) + + def get_new_target(self, args): + return self.repository + + def load(self, target, passphrase): + # what we get in target is just a repo location, but we already have the repo obj: + target = self.repository + key_data = target.load_key() + key_data = key_data.decode('utf-8') # remote repo: msgpack issue #99, getting bytes + success = self._load(key_data, passphrase) + if success: + self.target = target + return success + + def save(self, target, passphrase): + key_data = self._save(passphrase) + key_data = key_data.encode('utf-8') # remote repo: msgpack issue #99, giving bytes + target.save_key(key_data) + self.target = target diff --git a/borg/remote.py b/borg/remote.py index 0ad91d76e..fe859ac1f 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -41,6 +41,8 @@ class RepositoryServer: 'put', 'repair', 'rollback', + 'save_key', + 'load_key', ) def __init__(self, restrict_to_paths): @@ -151,6 +153,9 @@ class RemoteRepository: def __del__(self): self.close() + def __repr__(self): + return '<%s %s>' % (self.__class__.__name__, self.location.canonical_path()) + def call(self, cmd, *args, **kw): for resp in self.call_many(cmd, [args], **kw): return resp @@ -276,6 +281,12 @@ class RemoteRepository: def delete(self, id_, wait=True): return self.call('delete', id_, wait=wait) + def save_key(self, keydata): + return self.call('save_key', keydata) + + def load_key(self): + return self.call('load_key') + def close(self): if self.p: self.p.stdin.close() diff --git a/borg/repository.py b/borg/repository.py index 60831c8ee..97cdeac04 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -62,6 +62,9 @@ class Repository: def __del__(self): self.close() + def __repr__(self): + return '<%s %s>' % (self.__class__.__name__, self.path) + def create(self, path): """Create a new empty repository at `path` """ @@ -78,9 +81,23 @@ class Repository: config.set('repository', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR) config.set('repository', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE) config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii')) - with open(os.path.join(path, 'config'), 'w') as fd: + self.save_config(path, config) + + def save_config(self, path, config): + config_path = os.path.join(path, 'config') + with open(config_path, 'w') as fd: config.write(fd) + def save_key(self, keydata): + assert self.config + keydata = keydata.decode('utf-8') # remote repo: msgpack issue #99, getting bytes + self.config.set('repository', 'key', keydata) + self.save_config(self.path, self.config) + + def load_key(self): + keydata = self.config.get('repository', 'key') + return keydata.encode('utf-8') # remote repo: msgpack issue #99, returning bytes + def destroy(self): """Destroy the repository at `self.path` """ diff --git a/borg/testsuite/archive.py b/borg/testsuite/archive.py index abb5bccb9..9a20e9f6e 100644 --- a/borg/testsuite/archive.py +++ b/borg/testsuite/archive.py @@ -23,7 +23,7 @@ class ArchiveTimestampTestCase(BaseTestCase): def _test_timestamp_parsing(self, isoformat, expected): repository = Mock() - key = PlaintextKey() + key = PlaintextKey(repository) manifest = Manifest(repository, key) a = Archive(repository, key, manifest, 'test', create=True) a.metadata = {b'time': isoformat} @@ -45,7 +45,7 @@ class ChunkBufferTestCase(BaseTestCase): def test(self): data = [{b'foo': 1}, {b'bar': 2}] cache = MockCache() - key = PlaintextKey() + key = PlaintextKey(None) chunks = CacheChunkBuffer(cache, key, None) for d in data: chunks.add(d) diff --git a/docs/usage.rst b/docs/usage.rst index d5d3a94a9..c90cf97bc 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -78,8 +78,43 @@ Examples # Remote repository (accesses a remote borg via ssh) $ borg init user@hostname:backup - # Encrypted remote repository - $ borg init --encryption=passphrase user@hostname:backup + # Encrypted remote repository, store the key in the repo + $ borg init --encryption=repokey user@hostname:backup + + # Encrypted remote repository, store the key your home dir + $ borg init --encryption=keyfile user@hostname:backup + +Important notes about encryption: + +Use encryption! Repository encryption protects you e.g. against the case that +an attacker has access to your backup repository. + +But be careful with the key / the passphrase: + +``--encryption=passphrase`` is DEPRECATED and will be removed in next major release. +This mode has very fundamental, unfixable problems (like you can never change +your passphrase or the pbkdf2 iteration count for an existing repository, because +the encryption / decryption key is directly derived from the passphrase). + +If you want "passphrase-only" security, just use the ``repokey`` mode. The key will +be stored inside the repository (in its "config" file). In above mentioned +attack scenario, the attacker will have the key (but not the passphrase). + +If you want "passphrase and having-the-key" security, use the ``keyfile`` mode. +The key will be stored in your home directory (in ``.borg/keys``). In the attack +scenario, the attacker who has just access to your repo won't have the key (and +also not the passphrase). + +Make a backup copy of the key file (``keyfile`` mode) or repo config file +(``repokey`` mode) and keep it at a safe place, so you still have the key in +case it gets corrupted or lost. +The backup that is encrypted with that key won't help you with that, of course. + +Make sure you use a good passphrase. Not too short, not too simple. The real +encryption / decryption key is encrypted with / locked by your passphrase. +If an attacker gets your key, he can't unlock and use it without knowing the +passphrase. In ``repokey`` and ``keyfile`` modes, you can change your passphrase +for existing repos. .. include:: usage/create.rst.inc From cc88d174af5ad400e312be97eb49c819a95d21c3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 15 Jul 2015 11:14:53 +0200 Subject: [PATCH 177/241] fix typos --- borg/remote.py | 2 +- borg/repository.py | 2 +- borg/testsuite/archiver.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/borg/remote.py b/borg/remote.py index fe859ac1f..5da5f9cf8 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -203,7 +203,7 @@ class RemoteRepository: break r, w, x = select.select(self.r_fds, w_fds, self.x_fds, 1) if x: - raise Exception('FD exception occured') + raise Exception('FD exception occurred') if r: data = os.read(self.stdout_fd, BUFSIZE) if not data: diff --git a/borg/repository.py b/borg/repository.py index 97cdeac04..b760ec0cd 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -413,7 +413,7 @@ class Repository: self.segments.setdefault(segment, 0) def preload(self, ids): - """Preload objects (only applies to remote repositories + """Preload objects (only applies to remote repositories) """ diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index e16687ff8..29c6ac1f7 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -83,7 +83,7 @@ class ArchiverTestCaseBase(BaseTestCase): os.mkdir(self.keys_path) os.mkdir(self.cache_path) with open(self.exclude_file_path, 'wb') as fd: - fd.write(b'input/file2\n# A commment line, then a blank line\n\n') + fd.write(b'input/file2\n# A comment line, then a blank line\n\n') self._old_wd = os.getcwd() os.chdir(self.tmpdir) From 9b9c8087133c102cb13e9f40302e0cfb73e9590d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 15 Jul 2015 11:30:25 +0200 Subject: [PATCH 178/241] fixed some minor issues found by pycharm/pytest-flakes --- borg/archive.py | 2 +- docs/conf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index f4e21a9f0..886060d1d 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -544,7 +544,7 @@ class Archive: return Archive._open_rb(path, st) -class RobustUnpacker(): +class RobustUnpacker: """A restartable/robust version of the streaming msgpack unpacker """ item_keys = [msgpack.packb(name) for name in ('path', 'mode', 'source', 'chunks', 'rdev', 'xattrs', 'user', 'group', 'uid', 'gid', 'mtime')] diff --git a/docs/conf.py b/docs/conf.py index d51da9207..27eba5b76 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,12 +11,12 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os from borg import __version__ as sw_version # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. +#import sys, os #sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- From e4a41c89810f0abd84977aa72967c42b4025db82 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 15 Jul 2015 13:32:05 +0200 Subject: [PATCH 179/241] fix Traceback when running check --repair, attic issue #232 This fix is maybe not perfect yet, but maybe better than nothing. A comment by Ernest0x (see https://github.com/jborg/attic/issues/232 ): @ThomasWaldmann your patch did the job. attic check --repair did the repairing and attic delete deleted the archive. Thanks. That said, however, I am not sure if the best place to put the check is where you put it in the patch. For example, the check operation uses a custom msgpack unpacker class named "RobustUnpacker", which it does try to check for correct format (see the comment: "Abort early if the data does not look like a serialized dict"), but it seems it does not catch my case. The relevant code in 'cache.py', on the other hand, uses msgpack's Unpacker class. --- borg/archive.py | 4 ++++ borg/cache.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/borg/archive.py b/borg/archive.py index 886060d1d..a133af7bb 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -755,6 +755,10 @@ class ArchiveChecker: for chunk_id, cdata in zip(items, repository.get_many(items)): unpacker.feed(self.key.decrypt(chunk_id, cdata)) for item in unpacker: + if not isinstance(item, dict): + self.report_progress('Did not get expected metadata dict - archive corrupted!', + error=True) + continue yield item repository = cache_if_remote(self.repository) diff --git a/borg/cache.py b/borg/cache.py index 1b66bc1b9..d64cdfb14 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -287,6 +287,9 @@ class Cache: add(chunk_idx, item_id, len(data), len(chunk)) unpacker.feed(data) for item in unpacker: + if not isinstance(item, dict): + print('Error: Did not get expected metadata dict - archive corrupted!') + continue if b'chunks' in item: for chunk_id, size, csize in item[b'chunks']: add(chunk_idx, chunk_id, size, csize) From e17ca5123efa9f80aa93f611de2b02695f14ac43 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 15 Jul 2015 13:54:25 +0200 Subject: [PATCH 180/241] Update CHANGES --- CHANGES | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGES b/CHANGES index ef9376f80..aa7af3247 100644 --- a/CHANGES +++ b/CHANGES @@ -16,6 +16,8 @@ New features: - borg info now reports chunk counts in the chunk index. - borg create --compression 0..9 to select zlib compression level, fixes #66 (attic #295). +- borg init --encryption repokey (to store the encryption key into the repo), + deprecate --encryption passphrase, fixes #85 - improve at-end error logging, always log exceptions and set exit_code=1 - LoggedIO: better error checks / exceptions / exception handling @@ -26,6 +28,7 @@ Bug fixes: - better Exception msg if no Borg is installed on the remote repo server, #56 - create a RepositoryCache implementation that can cope with >2GiB, fixes attic #326. +- fix Traceback when running check --repair, attic #232 - clarify help text, fixes #73. Other changes: @@ -36,7 +39,12 @@ Other changes: - document environment variables and return codes (attic #324, attic #52) - web site: add related projects, fix web site url, IRC #borgbackup - Fedora/Fedora-based install instructions added to docs + - Cygwin-based install instructions added to docs - updated AUTHORS + - add FAQ entries about redundancy / integrity + - clarify that borg extract uses the cwd as extraction target + - update internals doc about chunker params, memory usage and compression + - use borg-tmp as prefix for temporary files / directories - short prune options without "keep-" are deprecated, do not suggest them - improved tox configuration, documented there how to invoke it From e3472a248ad30b509ba3aa3ebb901ba391882b4f Mon Sep 17 00:00:00 2001 From: Thomas Harold Date: Thu, 16 Jul 2015 16:40:33 -0400 Subject: [PATCH 181/241] Fix format issue in installation.rst for Cygwin Fixed formatting issue in installation.rst where packages were listed all on one line. --- docs/installation.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/installation.rst b/docs/installation.rst index b1a20d8f2..132043b11 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -128,6 +128,7 @@ Please note that running under cygwin is rather experimental. You'll need at least (use the cygwin installer to fetch/install these): :: + python3 python3-setuptools python3-cython From 2907dd0094c281147382bf085a4d201c97105bf8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 17 Jul 2015 22:55:28 +0200 Subject: [PATCH 182/241] add BountySource link --- docs/_themes/local/sidebarusefullinks.html | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/_themes/local/sidebarusefullinks.html b/docs/_themes/local/sidebarusefullinks.html index 2f35c2695..2f71b275d 100644 --- a/docs/_themes/local/sidebarusefullinks.html +++ b/docs/_themes/local/sidebarusefullinks.html @@ -7,6 +7,7 @@
  • PyPI packages
  • GitHub
  • Issue Tracker
  • +
  • Bounties & Fundraisers
  • Mailing List
  • From ed2548ca027b4fd062a10ddf2ce359d9115f40a4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 20 Jul 2015 16:16:32 +0200 Subject: [PATCH 183/241] add a __main__.py to nuitka works --- borg/__main__.py | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 borg/__main__.py diff --git a/borg/__main__.py b/borg/__main__.py new file mode 100644 index 000000000..b38dc4e9e --- /dev/null +++ b/borg/__main__.py @@ -0,0 +1,3 @@ +from borg.archiver import main +main() + From 59c519b3bbcb9cf098a7bb1c467d0a021e19ebc4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 25 Jul 2015 15:37:30 +0200 Subject: [PATCH 184/241] remove outdated locking problem warning --- docs/quickstart.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/quickstart.rst b/docs/quickstart.rst index d0881fb95..fcb223503 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -159,6 +159,3 @@ mounting the remote filesystem, for example, using sshfs:: $ borg init /mnt/backup $ fusermount -u /mnt -However, be aware that sshfs doesn't fully implement POSIX locks, so -you must be sure to not have two processes trying to access the same -repository at the same time. From 0b226aec7a6ae6a60337cb2bc8cc7cf80d3bfff5 Mon Sep 17 00:00:00 2001 From: Roberto Polli Date: Sat, 25 Jul 2015 15:39:42 +0200 Subject: [PATCH 185/241] add development.txt and modify tox.ini --- requirements.d/development.txt | 4 ++++ tox.ini | 4 +--- 2 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 requirements.d/development.txt diff --git a/requirements.d/development.txt b/requirements.d/development.txt new file mode 100644 index 000000000..6d2928a92 --- /dev/null +++ b/requirements.d/development.txt @@ -0,0 +1,4 @@ +tox +mock +pytest +Cython diff --git a/tox.ini b/tox.ini index 79603cda9..fdf91a2db 100644 --- a/tox.ini +++ b/tox.ini @@ -19,9 +19,7 @@ envlist = py32, py33, py34 # Change dir to avoid import problem for cython code. The directory does # not really matter, should be just different from the toplevel dir. changedir = {toxworkdir} -deps = - pytest - mock +deps = -rrequirements.d/development.txt commands = py.test --pyargs {posargs:borg.testsuite} # fakeroot -u needs some env vars: passenv = * From 12a50bc6fe71d0c08f79b993aa0ae9465444a155 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 25 Jul 2015 18:38:16 +0200 Subject: [PATCH 186/241] tested and updated cygwin docs, thanks to fvia --- docs/installation.rst | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 132043b11..54895527a 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -123,7 +123,8 @@ Some of the steps detailled below might be useful also for non-git installs. Cygwin (from git) ----------------- -Please note that running under cygwin is rather experimental. +Please note that running under cygwin is rather experimental, stuff has been +tested with CygWin (x86-64) v2.1.0. You'll need at least (use the cygwin installer to fetch/install these): @@ -144,7 +145,14 @@ You can then install ``pip`` and ``virtualenv``: :: - easy_install pip + easy_install-3.4 pip pip install virtualenv And now continue as for Linux (see above). + +In case that creation of the virtual env fails, try deleting this file: + +:: + + /usr/lib/python3.4/__pycache__/platform.cpython-34.pyc + From 859c33d42b846efa7b290007f6762d366fe4bf98 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 25 Jul 2015 19:07:24 +0200 Subject: [PATCH 187/241] docs: add solutions for (ll)fuse installation problems --- docs/installation.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/installation.rst b/docs/installation.rst index 54895527a..985f43eba 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -57,6 +57,11 @@ Some of the steps detailled below might be useful also for non-git installs. apt-get install build-essential # optional: lowlevel FUSE py binding - to mount backup archives + # in case you get complaints about permission denied on /etc/fuse.conf: + # on ubuntu this means your user is not in the "fuse" group. just add + # yourself there, log out and log in again. + # if it complains about not being able to find llfuse: make a symlink + # borg-env/lib/python3.4/site-packages/llfuse -> /usr/lib/python3/dist-packages/llfuse apt-get install python3-llfuse fuse # optional: for unit testing From fb998cbd66c78a88b51e58bac2042454c99280cf Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 26 Jul 2015 14:47:18 +0200 Subject: [PATCH 188/241] docs: add note about how to run borg from virtual env --- docs/installation.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/installation.rst b/docs/installation.rst index 985f43eba..90bd33f84 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -19,6 +19,12 @@ usually available as an optional install. Virtualenv_ can be used to build and install |project_name| without affecting the system Python or requiring root access. +Important: +if you install into a virtual environment, you need to activate +the virtual env first (``source borg-env/bin/activate``). +Alternatively, directly run ``borg-env/bin/borg`` (or symlink that into some +directory that is in your PATH so you can just run ``borg``). + The llfuse_ python package is also required if you wish to mount an archive as a FUSE filesystem. Only FUSE >= 2.8.0 can support llfuse. From fde952a6d96a6c25868bdeb946552a3e9a4cb2f9 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 26 Jul 2015 15:37:36 +0200 Subject: [PATCH 189/241] .gitignore: add nuitka and cache stuff --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 1e38a1479..f3564a429 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,7 @@ platform_linux.c *.so docs/usage/*.inc .idea/ +.cache/ +borg.build/ +borg.dist/ +borg.exe From 195545075ae7492cfc07fd531daf294fce2bfffe Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 26 Jul 2015 17:38:16 +0200 Subject: [PATCH 190/241] repo delete: add destroy to allowed rpc methods, fixes issue #114 also: add test, automate YES confirmation for testing --- borg/archiver.py | 12 +++++++----- borg/remote.py | 1 + borg/testsuite/archiver.py | 10 ++++++++++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 6275edf22..84e568e73 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -288,11 +288,13 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") print("You requested to completely DELETE the repository *including* all archives it contains:") for archive_info in manifest.list_archive_infos(sort_by='ts'): print(format_archive(archive_info)) - print("""Type "YES" if you understand this and want to continue.\n""") - if input('Do you want to continue? ') == 'YES': - repository.destroy() - cache.destroy() - print("Repository and corresponding cache were deleted.") + while not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'): + print("""Type "YES" if you understand this and want to continue.\n""") + if input('Do you want to continue? ') == 'YES': + break + repository.destroy() + cache.destroy() + print("Repository and corresponding cache were deleted.") return self.exit_code def do_mount(self, args): diff --git a/borg/remote.py b/borg/remote.py index 5da5f9cf8..afec54710 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -34,6 +34,7 @@ class RepositoryServer: 'check', 'commit', 'delete', + 'destroy', 'get', 'list', 'negotiate', diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 29c6ac1f7..35f8171d9 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -394,6 +394,16 @@ class ArchiverTestCase(ArchiverTestCaseBase): repository = Repository(self.repository_path) self.assert_equal(len(repository), 1) + def test_delete_repo(self): + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('dir2/file2', size=1024 * 80) + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + self.cmd('create', self.repository_location + '::test.2', 'input') + self.cmd('delete', self.repository_location) + # Make sure the repo is gone + self.assertFalse(os.path.exists(self.repository_path)) + def test_corrupted_repository(self): self.cmd('init', self.repository_location) self.create_src_archive('test') From bcdfda7ef083d77f5f05258d5d3f522790bd49f1 Mon Sep 17 00:00:00 2001 From: Per Guth Date: Mon, 27 Jul 2015 14:38:03 +0200 Subject: [PATCH 191/241] Linked "issue #1" to issue #1. --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 3668d38cf..e65dda433 100644 --- a/README.rst +++ b/README.rst @@ -17,7 +17,7 @@ NOT RELEASED DEVELOPMENT VERSIONS HAVE UNKNOWN COMPATIBILITY PROPERTIES. THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. -Read issue #1 on the issue tracker, goals are being defined there. +Read `issue #1 `_ on the issue tracker, goals are being defined there. Please also see the LICENSE for more informations. From 30d47cb68ab3ac58966b84dca1e8b7d5a4e05ee5 Mon Sep 17 00:00:00 2001 From: Per Guth Date: Mon, 27 Jul 2015 14:41:43 +0200 Subject: [PATCH 192/241] Fixed *ALL* the links! --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index e65dda433..fe523575e 100644 --- a/README.rst +++ b/README.rst @@ -7,7 +7,7 @@ an efficient and secure way to backup data. The data deduplication technique used makes Borg suitable for daily backups since only changes are stored. -Borg is a fork of Attic and maintained by "The Borg Collective" (see AUTHORS file). +Borg is a fork of `Attic `_ and maintained by "`The Borg Collective `_". BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC. EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY WHEN MAJOR RELEASE NUMBER @@ -19,7 +19,7 @@ THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. Read `issue #1 `_ on the issue tracker, goals are being defined there. -Please also see the LICENSE for more informations. +Please also see the `LICENSE `_ for more informations. Easy to use ~~~~~~~~~~~ From 60dfde2dc6236f30f312d2deb42dc94390221c06 Mon Sep 17 00:00:00 2001 From: Per Guth Date: Mon, 27 Jul 2015 14:45:32 +0200 Subject: [PATCH 193/241] Reference to BorgWeb --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index fe523575e..a11a29ca9 100644 --- a/README.rst +++ b/README.rst @@ -28,6 +28,8 @@ Initialize backup repository and create a backup archive:: $ borg init /mnt/backup $ borg create -v /mnt/backup::documents ~/Documents +For a graphical frontend refer to our complementary project `BorgWeb `_. + Main features ~~~~~~~~~~~~~ Space efficient storage From 729cc4d82d9597fdf56a81fc8a5bcd6228850fce Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 27 Jul 2015 22:27:13 +0200 Subject: [PATCH 194/241] document how to backup raw disk --- docs/usage.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/usage.rst b/docs/usage.rst index 46c56c9e0..f983ff662 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -139,6 +139,9 @@ Examples # Backup huge files with little chunk management overhead $ borg create --chunker-params 19,23,21,4095 /mnt/backup::VMs /srv/VMs + # Backup a raw device (must not be active/in use/mounted at that time) + $ dd if=/dev/sda bs=10M | borg create /mnt/backup::my-sda - + .. include:: usage/extract.rst.inc From 300c7351e70e487775967c464751111320f8225c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 27 Jul 2015 23:02:52 +0200 Subject: [PATCH 195/241] be more verbose about the great deduplication algorithm --- README.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.rst b/README.rst index a11a29ca9..c9cf90706 100644 --- a/README.rst +++ b/README.rst @@ -38,6 +38,23 @@ Space efficient storage variable length chunks and only chunks that have never been seen before are compressed and added to the repository. + The content-defined chunking based deduplication is applied to remove + duplicate chunks within: + + * the current backup data set (even inside single files / streams) + * current and previous backups of same machine + * all the chunks in the same repository, even if coming from other machines + + This advanced deduplication method does NOT depend on: + + * file/directory names staying the same (so you can move your stuff around + without killing the deduplication, even between machines sharing a repo) + * complete files or time stamps staying the same (if a big file changes a + little, only a few new chunks will be stored - this is great for VMs or + raw disks) + * the absolute position of a data chunk inside a file (stuff may get shifted + and will still be found by the deduplication algorithm) + Optional data encryption All data can be protected using 256-bit AES encryption and data integrity and authenticity is verified using HMAC-SHA256. From 81191243596aa1bfce143b062b4fbdf188be1e8f Mon Sep 17 00:00:00 2001 From: Jeff Rizzo Date: Tue, 28 Jul 2015 11:39:00 -0700 Subject: [PATCH 196/241] Don't process an entry if the nodump flag is set. --- attic/archiver.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/attic/archiver.py b/attic/archiver.py index 5731ffb4c..8d63c0714 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -158,6 +158,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") # Ignore unix sockets if stat.S_ISSOCK(st.st_mode): return + # Ignore if nodump flag set + if st.st_flags and stat.UF_NODUMP(st.st_flags): + return self.print_verbose(remove_surrogates(path)) if stat.S_ISREG(st.st_mode): try: From e11a4a5d3a912596d8db9d85f721699231982526 Mon Sep 17 00:00:00 2001 From: Jeff Rizzo Date: Tue, 28 Jul 2015 12:30:25 -0700 Subject: [PATCH 197/241] Check the UF_NODUMP flag properly. --- attic/archiver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attic/archiver.py b/attic/archiver.py index 8d63c0714..335012477 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -159,7 +159,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") if stat.S_ISSOCK(st.st_mode): return # Ignore if nodump flag set - if st.st_flags and stat.UF_NODUMP(st.st_flags): + if st.st_flags and (st.st_flags & stat.UF_NODUMP): return self.print_verbose(remove_surrogates(path)) if stat.S_ISREG(st.st_mode): From ebc04b0ebffe82036670409148ef856f83226be8 Mon Sep 17 00:00:00 2001 From: Jeff Rizzo Date: Tue, 28 Jul 2015 15:01:42 -0700 Subject: [PATCH 198/241] Check for lchflags properly. --- attic/archiver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/attic/archiver.py b/attic/archiver.py index 335012477..05076fc0e 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -21,6 +21,7 @@ from attic.helpers import Error, location_validator, format_time, \ is_cachedir, bigint_to_int from attic.remote import RepositoryServer, RemoteRepository +has_lchflags = hasattr(os, 'lchflags') class Archiver: @@ -159,7 +160,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") if stat.S_ISSOCK(st.st_mode): return # Ignore if nodump flag set - if st.st_flags and (st.st_flags & stat.UF_NODUMP): + if has_lchflags and (st.st_flags & stat.UF_NODUMP): return self.print_verbose(remove_surrogates(path)) if stat.S_ISREG(st.st_mode): From 1e097bfd6b9904954be2f739cbb67ac478df4ce5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 29 Jul 2015 21:39:20 +0200 Subject: [PATCH 199/241] docs: add some words about resource usage --- docs/usage.rst | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/docs/usage.rst b/docs/usage.rst index f983ff662..d80d5a5e2 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -53,11 +53,15 @@ Environment Variables Directories: BORG_KEYS_DIR : Default to '~/.borg/keys'. This directory contains keys for encrypted repositories. - BORG_CACHE_DIR : Default to '~/.cache/borg'. This directory contains the local cache. + BORG_CACHE_DIR : Default to '~/.cache/borg'. This directory contains the local cache and might need a lot + of space for dealing with big repositories). Building: BORG_OPENSSL_PREFIX : Adds given OpenSSL header file directory to the default locations (setup.py). + General: + TMPDIR : where temporary files are stored (might need a lot of temporary space for some operations) + Please note: @@ -66,6 +70,33 @@ Please note: (e.g. mode 600, root:root). +Resource Usage +-------------- + +|project_name| might use a lot of resources depending on the size of the data set it is dealing with. + +CPU: it won't go beyond 100% of 1 core as the code is currently single-threaded. + +Memory (RAM): the chunks index and files index is read into memory for performance reasons. + +Temporary files: reading data and metadata from a FUSE mounted repository will consume about the same space as the + deduplicated chunks used to represent them in the repository. + +Cache files: chunks index and files index (plus a collection of single-archive chunk indexes). + +Chunks index: proportional to the amount of data chunks in your repo. lots of small chunks in your repo implies a big + chunks index. you may need to tweak the chunker params (see create options) if you have a lot of data and + you want to keep the chunks index at some reasonable size. + +Files index: proportional to the amount of files in your last backup. can be switched off (see create options), but + next backup will be much slower if you do. + +Network: if your repository is remote, all deduplicated (and optionally compressed/encrypted) of course have to go over + the connection (ssh: repo url). if you use a locally mounted network filesystem, additional some copy + operations used for transaction support go over the connection additionally. if you backup multiple sources to + one target repository, additional traffic happens for cache resynchronization. + + .. include:: usage/init.rst.inc Examples From 9d21e4ad69189a39bacef8ed7e2a32093dcb0398 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 29 Jul 2015 21:48:57 +0200 Subject: [PATCH 200/241] docs: add some words about resource usage (fixed wording) --- docs/usage.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index d80d5a5e2..fcbee5fef 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -77,12 +77,12 @@ Resource Usage CPU: it won't go beyond 100% of 1 core as the code is currently single-threaded. -Memory (RAM): the chunks index and files index is read into memory for performance reasons. +Memory (RAM): the chunks index and the files index are read into memory for performance reasons. Temporary files: reading data and metadata from a FUSE mounted repository will consume about the same space as the deduplicated chunks used to represent them in the repository. -Cache files: chunks index and files index (plus a collection of single-archive chunk indexes). +Cache files: chunks index and files index (plus a compressed collection of single-archive chunk indexes). Chunks index: proportional to the amount of data chunks in your repo. lots of small chunks in your repo implies a big chunks index. you may need to tweak the chunker params (see create options) if you have a lot of data and @@ -91,10 +91,12 @@ Chunks index: proportional to the amount of data chunks in your repo. lots of sm Files index: proportional to the amount of files in your last backup. can be switched off (see create options), but next backup will be much slower if you do. -Network: if your repository is remote, all deduplicated (and optionally compressed/encrypted) of course have to go over - the connection (ssh: repo url). if you use a locally mounted network filesystem, additional some copy - operations used for transaction support go over the connection additionally. if you backup multiple sources to - one target repository, additional traffic happens for cache resynchronization. +Network: if your repository is remote, all deduplicated (and optionally compressed/encrypted) data of course has to go + over the connection (ssh: repo url). if you use a locally mounted network filesystem, additionally some copy + operations used for transaction support also go over the connection. if you backup multiple sources to one + target repository, additional traffic happens for cache resynchronization. + +In case you are interested in more details, please read the internals documentation. .. include:: usage/init.rst.inc From 3be55bedd3a44206c5398931cdebdc7cda4b94f5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 30 Jul 2015 15:21:13 +0200 Subject: [PATCH 201/241] chunker: n needs to be a signed size_t ... as it is also used for the read() return value, which can be negative in case of errors. --- borg/_chunker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 20461e7c6..4db21b75b 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -127,7 +127,7 @@ chunker_free(Chunker *c) static int chunker_fill(Chunker *c) { - size_t n; + ssize_t n; PyObject *data; memmove(c->data, c->data + c->last, c->position + c->remaining - c->last); c->position -= c->last; From 9f1d92c993a5c2219f8b9e6b9b2b8474cb0b630b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 3 Aug 2015 23:48:56 +0200 Subject: [PATCH 202/241] implement --umask M affects local and remote umask, secure by default M == 077 --- borg/archiver.py | 6 ++++-- borg/helpers.py | 10 ++++++++++ borg/remote.py | 8 +++++--- borg/testsuite/archiver.py | 7 +++++++ 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 84e568e73..b8faa62f3 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -21,7 +21,7 @@ from .helpers import Error, location_validator, format_time, format_file_size, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ - is_cachedir, bigint_to_int, ChunkerParams + is_cachedir, bigint_to_int, ChunkerParams, set_umask from .remote import RepositoryServer, RemoteRepository @@ -220,7 +220,6 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") # be restrictive when restoring files, restore permissions later if sys.getfilesystemencoding() == 'ascii': print('Warning: File system encoding is "ascii", extracting non-ascii filenames will not be supported.') - os.umask(0o077) repository = self.open_repository(args.archive) manifest, key = Manifest.load(repository) archive = Archive(repository, key, manifest, args.archive.archive, @@ -511,6 +510,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") default=False, help='verbose output') common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false') + common_parser.add_argument('--umask', dest='umask', type=lambda s: int(s, 8), default=0o077, metavar='M', + help='set umask to M (local and remote, default: 0o077)') # We can't use argparse for "serve" since we don't want it to show up in "Available commands" if args: @@ -821,6 +822,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") args = parser.parse_args(args or ['-h']) self.verbose = args.verbose + set_umask(args.umask) update_excludes(args) return args.func(args) diff --git a/borg/helpers.py b/borg/helpers.py index d20532723..7043822b7 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -605,3 +605,13 @@ def int_to_bigint(value): if value.bit_length() > 63: return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True) return value + + +def set_umask(umask): + return os.umask(umask) + + +def get_umask(): + umask = set_umask(0) + set_umask(umask) + return umask diff --git a/borg/remote.py b/borg/remote.py index afec54710..aede16d9e 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -10,7 +10,7 @@ import traceback from . import __version__ -from .helpers import Error, IntegrityError +from .helpers import Error, IntegrityError, get_umask from .repository import Repository BUFSIZE = 10 * 1024 * 1024 @@ -124,8 +124,10 @@ class RemoteRepository: self.responses = {} self.unpacker = msgpack.Unpacker(use_list=False) self.p = None + # use local umask also for the remote process + umask = ['--umask', '%03o' % get_umask()] if location.host == '__testsuite__': - args = [sys.executable, '-m', 'borg.archiver', 'serve'] + self.extra_test_args + args = [sys.executable, '-m', 'borg.archiver', 'serve'] + umask + self.extra_test_args else: args = ['ssh'] if location.port: @@ -134,7 +136,7 @@ class RemoteRepository: args.append('%s@%s' % (location.user, location.host)) else: args.append('%s' % location.host) - args += ['borg', 'serve'] + args += ['borg', 'serve'] + umask self.p = Popen(args, bufsize=0, stdin=PIPE, stdout=PIPE) self.stdin_fd = self.p.stdin.fileno() self.stdout_fd = self.p.stdout.fileno() diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 35f8171d9..b466d6ad6 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -425,6 +425,13 @@ class ArchiverTestCase(ArchiverTestCaseBase): # Restore permissions so shutil.rmtree is able to delete it os.system('chmod -R u+w ' + self.repository_path) + def test_umask(self): + self.create_regular_file('file1', size=1024 * 80) + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + mode = os.stat(self.repository_path).st_mode + self.assertEqual(stat.S_IMODE(mode), 0o700) + def test_cmdline_compatibility(self): self.create_regular_file('file1', size=1024 * 80) self.cmd('init', self.repository_location) From 71646249cb8167efb502f6bc0f3a1565eea2c598 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 4 Aug 2015 09:53:26 +0200 Subject: [PATCH 203/241] implement --remote-path to allow non-default-path borg locations --- borg/archiver.py | 3 +++ borg/remote.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index b8faa62f3..818955e0b 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -512,6 +512,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false') common_parser.add_argument('--umask', dest='umask', type=lambda s: int(s, 8), default=0o077, metavar='M', help='set umask to M (local and remote, default: 0o077)') + common_parser.add_argument('--remote-path', dest='remote_path', default='borg', metavar='PATH', + help='set remote path to executable (default: "borg")') # We can't use argparse for "serve" since we don't want it to show up in "Available commands" if args: @@ -823,6 +825,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") args = parser.parse_args(args or ['-h']) self.verbose = args.verbose set_umask(args.umask) + RemoteRepository.remote_path = args.remote_path update_excludes(args) return args.func(args) diff --git a/borg/remote.py b/borg/remote.py index aede16d9e..fad036ccb 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -108,9 +108,9 @@ class RepositoryServer: class RemoteRepository: extra_test_args = [] + remote_path = None class RPCError(Exception): - def __init__(self, name): self.name = name @@ -136,7 +136,7 @@ class RemoteRepository: args.append('%s@%s' % (location.user, location.host)) else: args.append('%s' % location.host) - args += ['borg', 'serve'] + umask + args += [self.remote_path, 'serve'] + umask self.p = Popen(args, bufsize=0, stdin=PIPE, stdout=PIPE) self.stdin_fd = self.p.stdin.fileno() self.stdout_fd = self.p.stdout.fileno() From 175a6d7b0418f35527e588b9d4bbb5a4ca5013db Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 4 Aug 2015 12:31:06 +0200 Subject: [PATCH 204/241] simplify umask code in a similar way as the remote_path code was implemented: just patch the RemoteRepository class object --- borg/archiver.py | 5 +++-- borg/helpers.py | 10 ---------- borg/remote.py | 5 +++-- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 818955e0b..8230677da 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -21,7 +21,7 @@ from .helpers import Error, location_validator, format_time, format_file_size, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ - is_cachedir, bigint_to_int, ChunkerParams, set_umask + is_cachedir, bigint_to_int, ChunkerParams from .remote import RepositoryServer, RemoteRepository @@ -824,8 +824,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") args = parser.parse_args(args or ['-h']) self.verbose = args.verbose - set_umask(args.umask) + os.umask(args.umask) RemoteRepository.remote_path = args.remote_path + RemoteRepository.umask = args.umask update_excludes(args) return args.func(args) diff --git a/borg/helpers.py b/borg/helpers.py index 7043822b7..d20532723 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -605,13 +605,3 @@ def int_to_bigint(value): if value.bit_length() > 63: return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True) return value - - -def set_umask(umask): - return os.umask(umask) - - -def get_umask(): - umask = set_umask(0) - set_umask(umask) - return umask diff --git a/borg/remote.py b/borg/remote.py index fad036ccb..1d7ae84e2 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -10,7 +10,7 @@ import traceback from . import __version__ -from .helpers import Error, IntegrityError, get_umask +from .helpers import Error, IntegrityError from .repository import Repository BUFSIZE = 10 * 1024 * 1024 @@ -109,6 +109,7 @@ class RepositoryServer: class RemoteRepository: extra_test_args = [] remote_path = None + umask = None class RPCError(Exception): def __init__(self, name): @@ -125,7 +126,7 @@ class RemoteRepository: self.unpacker = msgpack.Unpacker(use_list=False) self.p = None # use local umask also for the remote process - umask = ['--umask', '%03o' % get_umask()] + umask = ['--umask', '%03o' % self.umask] if location.host == '__testsuite__': args = [sys.executable, '-m', 'borg.archiver', 'serve'] + umask + self.extra_test_args else: From 8e717c55e689031796da42eb9193bceff580e8ab Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 4 Aug 2015 12:49:13 +0200 Subject: [PATCH 205/241] updated CHANGES --- CHANGES | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGES b/CHANGES index aa7af3247..d7080b15d 100644 --- a/CHANGES +++ b/CHANGES @@ -20,9 +20,14 @@ New features: deprecate --encryption passphrase, fixes #85 - improve at-end error logging, always log exceptions and set exit_code=1 - LoggedIO: better error checks / exceptions / exception handling +- implement --remote-path to allow non-default-path borg locations, #125 +- implement --umask M and use 077 as default umask, #117 Bug fixes: +- fix segfault that happened for unreadable files (chunker: n needs to be a + signed size_t), #116 +- repo delete: add destroy to allowed rpc methods, fixes issue #114 - more compatible repository locking code (based on mkdir), maybe fixes #92 (attic #317, attic #201). - better Exception msg if no Borg is installed on the remote repo server, #56 @@ -34,6 +39,7 @@ Bug fixes: Other changes: - improved docs: + - added docs/misc directory for misc. writeups that won't be included "as is" into the html docs. - document environment variables and return codes (attic #324, attic #52) @@ -44,6 +50,11 @@ Other changes: - add FAQ entries about redundancy / integrity - clarify that borg extract uses the cwd as extraction target - update internals doc about chunker params, memory usage and compression + - add some words about resource usage + - document how to backup raw disk + - add note about how to run borg from virtual env + - add solutions for (ll)fuse installation problems + - tested and updated cygwin docs - use borg-tmp as prefix for temporary files / directories - short prune options without "keep-" are deprecated, do not suggest them @@ -51,6 +62,7 @@ Other changes: - remove usage of unittest.mock, always use mock from pypi - use entrypoints instead of scripts, for better use of the wheel format and modern installs +- add requirements.d/development.txt and modify tox.ini I forgot to list some stuff already implemented in 0.23.0, here they are: From 45e3c3d04e880606902c7c53950b3192c43bef18 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 4 Aug 2015 13:22:04 +0200 Subject: [PATCH 206/241] add some compatibility notes about the umask --- CHANGES | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index d7080b15d..f19b7f8aa 100644 --- a/CHANGES +++ b/CHANGES @@ -5,6 +5,17 @@ Borg Changelog Version 0.24.0 -------------- +Incompatible changes (compared to 0.23): + +- borg now always issues --umask NNN option when invoking another borg via ssh + on the repository server. By that, it's making sure it uses the same umask + for remote repos as for local ones. Because of this, you must upgrade both + server and client(s) to 0.24. +- the default umask is 077 now (if you do not specify via --umask) which might + be a different one as you used previously. The default umask avoids that + you accidentially give access permissions for group and/or others to files + created by borg (e.g. the repository). + New features: - borg create --chunker-params ... to configure the chunker, fixes #16 @@ -21,7 +32,7 @@ New features: - improve at-end error logging, always log exceptions and set exit_code=1 - LoggedIO: better error checks / exceptions / exception handling - implement --remote-path to allow non-default-path borg locations, #125 -- implement --umask M and use 077 as default umask, #117 +- implement --umask M and use 077 as default umask for better security, #117 Bug fixes: From 5b441f78014b45dac6d49f09048475fbe7cdc3af Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 4 Aug 2015 13:30:35 +0200 Subject: [PATCH 207/241] some small Cython code improvements, thanks to Stefan Behnel --- borg/hashindex.pyx | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index d5d4b6f45..c44fe3947 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -24,15 +24,18 @@ cdef extern from "_hashindex.c": int _le32toh(int v) -_NoDefault = object() +cdef _NoDefault = object() +cimport cython + +@cython.internal cdef class IndexBase: cdef HashIndex *index key_size = 32 def __cinit__(self, capacity=0, path=None): if path: - self.index = hashindex_read(os.fsencode(path)) + self.index = hashindex_read(os.fsencode(path)) if not self.index: raise Exception('hashindex_read failed') else: @@ -49,7 +52,7 @@ cdef class IndexBase: return cls(path=path) def write(self, path): - if not hashindex_write(self.index, os.fsencode(path)): + if not hashindex_write(self.index, os.fsencode(path)): raise Exception('hashindex_write failed') def clear(self): From d65ca51d54d9eaac9fa2b35a9dcda5ebb1556b8c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 6 Aug 2015 12:59:51 +0200 Subject: [PATCH 208/241] deduplicate and refactor the docs README.rst (shown on github and also at the start of the html docs) shall be like an elevator speech - convince readers in a very short time. this is most important, everything else can come after we got the reader's interest. include README into docs to avoid duplication. also include CHANGES into docs. add developer docs, move examples from tox.ini there add separate support docs remove glossary, most of what was there can be understood by an admin from context move attic and compatibility note to the end --- CHANGES => CHANGES.rst | 0 README.rst | 128 +++++++++++++++++++++++++---------------- docs/changes.rst | 4 ++ docs/development.rst | 67 +++++++++++++++++++++ docs/foreword.rst | 65 --------------------- docs/index.rst | 75 ++---------------------- docs/intro.rst | 7 +++ docs/support.rst | 34 +++++++++++ tox.ini | 11 ---- 9 files changed, 195 insertions(+), 196 deletions(-) rename CHANGES => CHANGES.rst (100%) create mode 100644 docs/changes.rst create mode 100644 docs/development.rst delete mode 100644 docs/foreword.rst create mode 100644 docs/intro.rst create mode 100644 docs/support.rst diff --git a/CHANGES b/CHANGES.rst similarity index 100% rename from CHANGES rename to CHANGES.rst diff --git a/README.rst b/README.rst index c9cf90706..1e1920410 100644 --- a/README.rst +++ b/README.rst @@ -1,52 +1,31 @@ -|build| - What is Borg? ------------- -Borg is a deduplicating backup program. The main goal of Borg is to provide -an efficient and secure way to backup data. The data deduplication -technique used makes Borg suitable for daily backups since only changes -are stored. +Borg is a deduplicating backup program. +Optionally, it also supports compression and authenticated encryption. -Borg is a fork of `Attic `_ and maintained by "`The Borg Collective `_". - -BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC. -EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY WHEN MAJOR RELEASE NUMBER -CHANGES (like when going from 0.x.y to 1.0.0). Please read CHANGES document. - -NOT RELEASED DEVELOPMENT VERSIONS HAVE UNKNOWN COMPATIBILITY PROPERTIES. - -THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. - -Read `issue #1 `_ on the issue tracker, goals are being defined there. - -Please also see the `LICENSE `_ for more informations. - -Easy to use -~~~~~~~~~~~ -Initialize backup repository and create a backup archive:: - - $ borg init /mnt/backup - $ borg create -v /mnt/backup::documents ~/Documents - -For a graphical frontend refer to our complementary project `BorgWeb `_. +The main goal of Borg is to provide an efficient and secure way to backup data. +The data deduplication technique used makes Borg suitable for daily backups +since only changes are stored. +The authenticated encryption technique makes it suitable for backups to not +fully trusted targets. Main features ~~~~~~~~~~~~~ Space efficient storage - Variable block size deduplication is used to reduce the number of bytes + Variable block size deduplication is used to reduce the number of bytes stored by detecting redundant data. Each file is split into a number of variable length chunks and only chunks that have never been seen before are - compressed and added to the repository. + added to the repository. The content-defined chunking based deduplication is applied to remove - duplicate chunks within: + duplicate chunks within: * the current backup data set (even inside single files / streams) * current and previous backups of same machine * all the chunks in the same repository, even if coming from other machines This advanced deduplication method does NOT depend on: - + * file/directory names staying the same (so you can move your stuff around without killing the deduplication, even between machines sharing a repo) * complete files or time stamps staying the same (if a big file changes a @@ -59,37 +38,84 @@ Optional data encryption All data can be protected using 256-bit AES encryption and data integrity and authenticity is verified using HMAC-SHA256. +Optional compression + All data can be compressed (by zlib, level 0-9). + Off-site backups Borg can store data on any remote host accessible over SSH. This is - most efficient if Borg is also installed on the remote host. + most efficient if Borg is also installed on the remote host. If you can't + install Borg there, you can also use some network filesystem (sshfs, nfs, + ...), but it will be less efficient. Backups mountable as filesystems Backup archives are mountable as userspace filesystems for easy backup verification and restores. -What do I need? ---------------- -Borg requires Python 3.2 or above to work. -Borg also requires a sufficiently recent OpenSSL (>= 1.0.0). -In order to mount archives as filesystems, llfuse is required. +Platforms Borg works on + * Linux + * FreeBSD + * Mac OS X + * Cygwin (unsupported) -How do I install it? --------------------- -:: - $ pip3 install borgbackup +Easy to use +~~~~~~~~~~~ +Initialize a new backup repository and create a backup archive:: -Where are the docs? -------------------- -Go to https://borgbackup.github.io/ for a prebuilt version of the documentation. -You can also build it yourself from the docs folder. + $ borg init /mnt/backup + $ borg create /mnt/backup::Monday ~/Documents -Where are the tests? --------------------- -The tests are in the borg/testsuite package. To run the test suite use the -following command:: +Now doing another backup, just to show off the great deduplication:: + + $ borg create --stats /mnt/backup::Tuesday ~/Documents + + Archive name: Tuesday + Archive fingerprint: 387a5e3f9b0e792e91ce87134b0f4bfe17677d9248cb5337f3fbf3a8e157942a + Start time: Tue Mar 25 12:00:10 2014 + End time: Tue Mar 25 12:00:10 2014 + Duration: 0.08 seconds + Number of files: 358 + Original size Compressed size Deduplicated size + This archive: 57.16 MB 46.78 MB 151.67 kB + All archives: 114.02 MB 93.46 MB 44.81 MB + +For a graphical frontend refer to our complementary project +`BorgWeb `_. + + +How to proceed from here +------------------------ +Everything about requirements, installation, getting a quick start, usage +reference, FAQ, support info, internals and developer infos is in our +documentation: + +See `our online documentation `_ +or alternatively read it in raw text form in the `docs/*.rst` files. + + +Notes +----- + +Build status: +|build| + +Borg is a fork of `Attic `_ and maintained by +"`The Borg Collective `_". + +BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC. +EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY WHEN MAJOR RELEASE NUMBER +CHANGES (like when going from 0.x.y to 1.0.0). Please read CHANGES document. + +NOT RELEASED DEVELOPMENT VERSIONS HAVE UNKNOWN COMPATIBILITY PROPERTIES. + +THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. + +Read `issue #1 `_ on the issue +tracker, goals are being defined there. + +For more information, please also see the +`LICENSE `_. - $ fakeroot -u tox # you need to have tox and pytest installed .. |build| image:: https://travis-ci.org/borgbackup/borg.svg :alt: Build Status diff --git a/docs/changes.rst b/docs/changes.rst new file mode 100644 index 000000000..5e859ecc3 --- /dev/null +++ b/docs/changes.rst @@ -0,0 +1,4 @@ +.. include:: global.rst.inc +.. _changelog: + +.. include:: ../CHANGES.rst diff --git a/docs/development.rst b/docs/development.rst new file mode 100644 index 000000000..6c06eeb9e --- /dev/null +++ b/docs/development.rst @@ -0,0 +1,67 @@ +.. include:: global.rst.inc +.. _development: + +Development +=========== + +This chapter will get you started with |project_name|' development. + +|project_name| is written in Python (with a little bit of Cython and C for +the performance critical parts). + + +Building a development environment +---------------------------------- + +First, just install borg into a virtual env as described before. + +To install some additional packages needed for running the tests, activate your +virtual env and run:: + + pip install -r requirements.d/development.txt + + +Running the tests +----------------- + +The tests are in the borg/testsuite package. + +To run them, you need to have fakeroot, tox and pytest installed. + +To run the test suite use the following command:: + + fakeroot -u tox # run all tests + +Some more advanced examples:: + + # verify a changed tox.ini (run this after any change to tox.ini): + fakeroot -u tox --recreate + + fakeroot -u tox -e py32 # run all tests, but only on python 3.2 + + fakeroot -u tox borg.testsuite.locking # only run 1 test module + + fakeroot -u tox borg.testsuite.locking -- -k '"not Timer"' # exclude some tests + + fakeroot -u tox borg.testsuite -- -v # verbose py.test + +Important notes: + +- Without fakeroot -u some tests will fail. +- When using -- to give options to py.test, you MUST also give borg.testsuite[.module]. + +Building the docs with Sphinx +----------------------------- + +The documentation (in reStructuredText format, .rst) is in docs/. + +To build the html version of it, you need to have sphinx installed:: + + pip3 install sphinx + +Now run:: + + cd docs/ + make html + +Then point a web browser at docs/_build/html/index.html. diff --git a/docs/foreword.rst b/docs/foreword.rst deleted file mode 100644 index c3f70c42e..000000000 --- a/docs/foreword.rst +++ /dev/null @@ -1,65 +0,0 @@ -.. include:: global.rst.inc -.. _foreword: - -Foreword -======== - -|project_name| is a secure backup program for Linux, FreeBSD and Mac OS X. -|project_name| is designed for efficient data storage where only new or -modified data is stored. - -Features --------- - -Space efficient storage - Variable block size `deduplication`_ is used to reduce the number of bytes - stored by detecting redundant data. Each file is split into a number of - variable length chunks and only chunks that have never been seen before - are added to the repository (and optionally compressed). - -Optional data encryption - All data can be protected using 256-bit AES_ encryption and data integrity - and authenticity is verified using `HMAC-SHA256`_. - -Off-site backups - |project_name| can store data on any remote host accessible over SSH as - long as |project_name| is installed. If you don't have |project_name| - installed there, you can use some network filesytem (sshfs, nfs, ...) - to mount a filesystem located on your remote host and use it like it was - local (but that will be slower). - -Backups mountable as filesystems - Backup archives are :ref:`mountable ` as - `userspace filesystems`_ for easy backup verification and restores. - - -Glossary --------- - -.. _deduplication_def: - -Deduplication - Deduplication is a technique for improving storage utilization by - eliminating redundant data. - -.. _archive_def: - -Archive - An archive is a collection of files along with metadata that include file - permissions, directory structure and various file attributes. - Since each archive in a repository must have a unique name a good naming - convention is ``hostname-YYYY-MM-DD``. - -.. _repository_def: - -Repository - A repository is a filesystem directory storing data from zero or more - archives. The data in a repository is both deduplicated and - optionally encrypted making it both efficient and safe. Repositories are - created using :ref:`borg_init` and the contents can be listed using - :ref:`borg_list`. - -Key file - When a repository is initialized a key file containing a password - protected encryption key is created. It is vital to keep this file safe - since the repository data is totally inaccessible without it. diff --git a/docs/index.rst b/docs/index.rst index 8ca4fe092..a871ef353 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,81 +1,18 @@ .. include:: global.rst.inc -Welcome to Borg -================ -|project_name| is a deduplicating backup program. -Optionally, it also supports compression and authenticated encryption. -The main goal of |project_name| is to provide an efficient and secure way -to backup data. The data deduplication technique used makes |project_name| -suitable for daily backups since only the changes are stored. The authenticated -encryption makes it suitable for backups to not fully trusted targets. - -|project_name| is written in Python (with a little bit of Cython and C for -the performance critical parts). - - -Easy to use ------------ -Initialize a new backup :ref:`repository ` and create your -first backup :ref:`archive ` in two lines:: - - $ borg init /mnt/backup - $ borg create /mnt/backup::Monday ~/Documents - $ borg create --stats /mnt/backup::Tuesday ~/Documents - Archive name: Tuesday - Archive fingerprint: 387a5e3f9b0e792e91ce87134b0f4bfe17677d9248cb5337f3fbf3a8e157942a - Start time: Tue Mar 25 12:00:10 2014 - End time: Tue Mar 25 12:00:10 2014 - Duration: 0.08 seconds - Number of files: 358 - Original size Compressed size Deduplicated size - This archive: 57.16 MB 46.78 MB 151.67 kB - All archives: 114.02 MB 93.46 MB 44.81 MB - -See the :ref:`quickstart` chapter for a more detailed example. - -Easy installation ------------------ -You can use pip to install |project_name| quickly and easily:: - - $ pip3 install borgbackup - -Need more help with installing? See :ref:`installation`. - -User's Guide -============ +Borg Documentation +================== .. toctree:: :maxdepth: 2 - foreword + intro installation quickstart usage faq + support + changes internals - -Getting help -============ - -If you've found a bug or have a concrete feature request, please create a new -ticket on the project's `issue tracker`_ (after checking whether someone else -already has reported the same thing). - -For more general questions or discussions, IRC or mailing list are preferred. - -IRC ---- -Join us on channel #borgbackup on chat.freenode.net. As usual on IRC, just -ask or tell directly and then patiently wait for replies. Stay connected. - -Mailing list ------------- - -There is a mailing list for Borg on librelist_ that you can use for feature -requests and general discussions about Borg. A mailing list archive is -available `here `_. - -To subscribe to the list, send an email to borgbackup@librelist.com and reply -to the confirmation mail. Likewise, to unsubscribe, send an email to -borgbackup-unsubscribe@librelist.com and reply to the confirmation mail. + development diff --git a/docs/intro.rst b/docs/intro.rst new file mode 100644 index 000000000..7e7759c7d --- /dev/null +++ b/docs/intro.rst @@ -0,0 +1,7 @@ +.. include:: global.rst.inc +.. _foreword: + +Introduction +============ + +.. include:: ../README.rst diff --git a/docs/support.rst b/docs/support.rst new file mode 100644 index 000000000..5e953f202 --- /dev/null +++ b/docs/support.rst @@ -0,0 +1,34 @@ +.. include:: global.rst.inc +.. _support: + +Support +======= + +Issue Tracker +------------- + +If you've found a bug or have a concrete feature request, please create a new +ticket on the project's `issue tracker`_ (after checking whether someone else +already has reported the same thing). + +For more general questions or discussions, IRC or mailing list are preferred. + +IRC +--- +Join us on channel #borgbackup on chat.freenode.net. + +As usual on IRC, just ask or tell directly and then patiently wait for replies. +Stay connected. + +Mailing list +------------ + +There is a mailing list for Borg on librelist_ that you can use for feature +requests and general discussions about Borg. A mailing list archive is +available `here `_. + +To subscribe to the list, send an email to borgbackup@librelist.com and reply +to the confirmation mail. + +To unsubscribe, send an email to borgbackup-unsubscribe@librelist.com and reply +to the confirmation mail. diff --git a/tox.ini b/tox.ini index fdf91a2db..c1a9e019f 100644 --- a/tox.ini +++ b/tox.ini @@ -1,16 +1,5 @@ # tox configuration - if you change anything here, run this to verify: # fakeroot -u tox --recreate -# -# Invokation examples: -# fakeroot -u tox # run all tests -# fakeroot -u tox -e py32 # run all tests, but only on python 3.2 -# fakeroot -u tox borg.testsuite.locking # only run 1 test module -# fakeroot -u tox borg.testsuite.locking -- -k '"not Timer"' # exclude some tests -# fakeroot -u tox borg.testsuite -- -v # verbose py.test -# -# Important notes: -# Without fakeroot -u some tests will fail. -# When using -- to give options to py.test, you MUST also give borg.testsuite[.module]. [tox] envlist = py32, py33, py34 From e3baeefa1b8ae9ce864c8a9dbc720e64134405ef Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 6 Aug 2015 13:10:56 +0200 Subject: [PATCH 209/241] docs: reorganize sidebar, prev/next at top --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 27eba5b76..027fd0d4d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -134,7 +134,7 @@ html_static_path = [] # Custom sidebar templates, maps document names to template names. html_sidebars = { 'index': ['sidebarlogo.html', 'sidebarusefullinks.html', 'searchbox.html'], - '**': ['sidebarlogo.html', 'localtoc.html', 'relations.html', 'sidebarusefullinks.html', 'searchbox.html'] + '**': ['sidebarlogo.html', 'relations.html', 'searchbox.html', 'localtoc.html', 'sidebarusefullinks.html'] } # Additional templates that should be rendered to pages, maps page names to # template names. From dcdcbda87d20822359c0f0f3b80596a0533beaad Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 6 Aug 2015 15:17:07 +0200 Subject: [PATCH 210/241] try if readthedocs finds the borg package this way --- docs/conf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 027fd0d4d..9c0e84cb8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,13 +11,13 @@ # All configuration values have a default; values that are commented out # serve to show the default. -from borg import __version__ as sw_version - # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#import sys, os -#sys.path.insert(0, os.path.abspath('.')) +import sys, os +sys.path.insert(0, os.path.abspath('..')) + +from borg import __version__ as sw_version # -- General configuration ----------------------------------------------------- From 58d57df46d53eb90000fa70555856a5f508e4ea5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 6 Aug 2015 16:39:50 +0200 Subject: [PATCH 211/241] improve README.rst --- README.rst | 115 +++++++++++++++++++++++++++-------------------------- 1 file changed, 59 insertions(+), 56 deletions(-) diff --git a/README.rst b/README.rst index 1e1920410..0d4ee1dac 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ -What is Borg? -------------- -Borg is a deduplicating backup program. -Optionally, it also supports compression and authenticated encryption. +What is BorgBackup? +------------------- +BorgBackup (short: Borg) is a deduplicating backup program. +Optionally, it supports compression and authenticated encryption. The main goal of Borg is to provide an efficient and secure way to backup data. The data deduplication technique used makes Borg suitable for daily backups @@ -9,54 +9,69 @@ since only changes are stored. The authenticated encryption technique makes it suitable for backups to not fully trusted targets. +`Borg Installation docs `_ + + Main features ~~~~~~~~~~~~~ -Space efficient storage - Variable block size deduplication is used to reduce the number of bytes - stored by detecting redundant data. Each file is split into a number of - variable length chunks and only chunks that have never been seen before are - added to the repository. +**Space efficient storage** + Deduplication based on content-defined chunking is used to reduce the number + of bytes stored: each file is split into a number of variable length chunks + and only chunks that have never been seen before are added to the repository. - The content-defined chunking based deduplication is applied to remove - duplicate chunks within: + To deduplicate, all the chunks in the same repository are considered, no + matter whether they come from different machines, from previous backups, + from the same backup or even from the same single file. - * the current backup data set (even inside single files / streams) - * current and previous backups of same machine - * all the chunks in the same repository, even if coming from other machines + Compared to other deduplication approaches, this method does NOT depend on: - This advanced deduplication method does NOT depend on: + * file/directory names staying the same - * file/directory names staying the same (so you can move your stuff around - without killing the deduplication, even between machines sharing a repo) - * complete files or time stamps staying the same (if a big file changes a - little, only a few new chunks will be stored - this is great for VMs or - raw disks) - * the absolute position of a data chunk inside a file (stuff may get shifted - and will still be found by the deduplication algorithm) + So you can move your stuff around without killing the deduplication, + even between machines sharing a repo. -Optional data encryption - All data can be protected using 256-bit AES encryption and data integrity - and authenticity is verified using HMAC-SHA256. + * complete files or time stamps staying the same -Optional compression - All data can be compressed (by zlib, level 0-9). + If a big file changes a little, only a few new chunks will be stored - + this is great for VMs or raw disks. -Off-site backups - Borg can store data on any remote host accessible over SSH. This is - most efficient if Borg is also installed on the remote host. If you can't - install Borg there, you can also use some network filesystem (sshfs, nfs, - ...), but it will be less efficient. + * the absolute position of a data chunk inside a file -Backups mountable as filesystems - Backup archives are mountable as userspace filesystems for easy backup - verification and restores. + Stuff may get shifted and will still be found by the deduplication + algorithm. -Platforms Borg works on +**Speed** + * performance critical code (chunking, compression, encryption) is + implemented in C/Cython + * local caching of files/chunks index data + * quick detection of unmodified files + +**Data encryption** + All data can be protected using 256-bit AES encryption, data integrity and + authenticity is verified using HMAC-SHA256. + +**Compression** + All data can be compressed by zlib, level 0-9. + +**Off-site backups** + Borg can store data on any remote host accessible over SSH. If Borg is + installed on the remote host, big performance gains can be achieved + compared to using a network filesystem (sshfs, nfs, ...). + +**Backups mountable as filesystems** + Backup archives are mountable as userspace filesystems for easy interactive + backup examination and restores (e.g. by using a regular file manager). + +**Platforms Borg works on** * Linux * FreeBSD * Mac OS X * Cygwin (unsupported) +**Free and Open Source Software** + * security and functionality can be audited independently + * licensed under the BSD (3-clause) license + Easy to use ~~~~~~~~~~~ @@ -70,38 +85,28 @@ Now doing another backup, just to show off the great deduplication:: $ borg create --stats /mnt/backup::Tuesday ~/Documents Archive name: Tuesday - Archive fingerprint: 387a5e3f9b0e792e91ce87134b0f4bfe17677d9248cb5337f3fbf3a8e157942a + Archive fingerprint: 387a5e3f9b0e792e91c... Start time: Tue Mar 25 12:00:10 2014 End time: Tue Mar 25 12:00:10 2014 Duration: 0.08 seconds Number of files: 358 - Original size Compressed size Deduplicated size - This archive: 57.16 MB 46.78 MB 151.67 kB - All archives: 114.02 MB 93.46 MB 44.81 MB + Original size Compressed size Deduplicated size + This archive: 57.16 MB 46.78 MB 151.67 kB <--- ! + All archives: 114.02 MB 93.46 MB 44.81 MB For a graphical frontend refer to our complementary project `BorgWeb `_. -How to proceed from here ------------------------- -Everything about requirements, installation, getting a quick start, usage -reference, FAQ, support info, internals and developer infos is in our -documentation: - -See `our online documentation `_ -or alternatively read it in raw text form in the `docs/*.rst` files. - - Notes ----- -Build status: -|build| - Borg is a fork of `Attic `_ and maintained by "`The Borg Collective `_". +Read `issue #1 `_ about the initial +considerations regarding project goals and policy of the Borg project. + BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC. EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY WHEN MAJOR RELEASE NUMBER CHANGES (like when going from 0.x.y to 1.0.0). Please read CHANGES document. @@ -110,12 +115,10 @@ NOT RELEASED DEVELOPMENT VERSIONS HAVE UNKNOWN COMPATIBILITY PROPERTIES. THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. -Read `issue #1 `_ on the issue -tracker, goals are being defined there. - For more information, please also see the `LICENSE `_. +|build| .. |build| image:: https://travis-ci.org/borgbackup/borg.svg :alt: Build Status From 7e21d95deddc2030d0f00063013121dc7a4568a8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 6 Aug 2015 16:40:38 +0200 Subject: [PATCH 212/241] fix CHANGES.rst filename in MANIFEST.in --- MANIFEST.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 480b1088a..d74d9e2c4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ -include README.rst AUTHORS LICENSE CHANGES MANIFEST.in versioneer.py +include README.rst AUTHORS LICENSE CHANGES.rst MANIFEST.in versioneer.py recursive-include borg *.pyx recursive-include docs * recursive-exclude docs *.pyc From a1e039ba215b09f68b75d4727a67a144382ccbe3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 6 Aug 2015 23:32:53 +0200 Subject: [PATCH 213/241] reimplement the chunk index merging in C the python code could take a rather long time and likely most of it was converting stuff from python to C and back. --- borg/_hashindex.c | 19 +++++++++++++++++++ borg/cache.py | 3 +-- borg/hashindex.pyx | 4 ++++ borg/testsuite/hashindex.py | 22 ++++++++++++++++++++++ 4 files changed, 46 insertions(+), 2 deletions(-) diff --git a/borg/_hashindex.c b/borg/_hashindex.c index 2eebd09d9..128ab5b2f 100644 --- a/borg/_hashindex.c +++ b/borg/_hashindex.c @@ -385,3 +385,22 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs *total_unique_chunks = unique_chunks; *total_chunks = chunks; } + +static void +hashindex_merge(HashIndex *index, HashIndex *other) +{ + int32_t key_size = index->key_size; + const int32_t *other_values; + int32_t *my_values; + void *key = NULL; + + while((key = hashindex_next_key(other, key))) { + other_values = key + key_size; + my_values = hashindex_get(index, key); + if(my_values == NULL) { + hashindex_set(index, key, other_values); + } else { + *my_values += *other_values; + } + } +} diff --git a/borg/cache.py b/borg/cache.py index d64cdfb14..f50b456eb 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -309,8 +309,7 @@ class Cache: tf_in.extract(archive_id_hex, tmp_dir) chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8') archive_chunk_idx = ChunkIndex.read(chunk_idx_path) - for chunk_id, (count, size, csize) in archive_chunk_idx.iteritems(): - add(chunk_idx, chunk_id, size, csize, incr=count) + chunk_idx.merge(archive_chunk_idx) os.unlink(chunk_idx_path) self.begin_txn() diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index c44fe3947..83416bcdf 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -14,6 +14,7 @@ cdef extern from "_hashindex.c": void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize, long long *total_unique_chunks, long long *total_chunks) + void hashindex_merge(HashIndex *index, HashIndex *other) int hashindex_get_size(HashIndex *index) int hashindex_write(HashIndex *index, char *path) void *hashindex_get(HashIndex *index, void *key) @@ -190,6 +191,9 @@ cdef class ChunkIndex(IndexBase): &total_unique_chunks, &total_chunks) return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks + def merge(self, ChunkIndex other): + hashindex_merge(self.index, other.index) + cdef class ChunkKeyIterator: cdef ChunkIndex idx diff --git a/borg/testsuite/hashindex.py b/borg/testsuite/hashindex.py index 41c019d61..bbefeb05e 100644 --- a/borg/testsuite/hashindex.py +++ b/borg/testsuite/hashindex.py @@ -6,6 +6,11 @@ from ..hashindex import NSIndex, ChunkIndex from . import BaseTestCase +def H(x): + # make some 32byte long thing that depends on x + return bytes('%-0.32d' % x, 'ascii') + + class HashIndexTestCase(BaseTestCase): def _generic_test(self, cls, make_value, sha): @@ -78,3 +83,20 @@ class HashIndexTestCase(BaseTestCase): second_half = list(idx.iteritems(marker=all[49][0])) self.assert_equal(len(second_half), 50) self.assert_equal(second_half, all[50:]) + + def test_chunkindex_merge(self): + idx1 = ChunkIndex() + idx1[H(1)] = 1, 100, 100 + idx1[H(2)] = 2, 200, 200 + idx1[H(3)] = 3, 300, 300 + # no H(4) entry + idx2 = ChunkIndex() + idx2[H(1)] = 4, 100, 100 + idx2[H(2)] = 5, 200, 200 + # no H(3) entry + idx2[H(4)] = 6, 400, 400 + idx1.merge(idx2) + assert idx1[H(1)] == (5, 100, 100) + assert idx1[H(2)] == (7, 200, 200) + assert idx1[H(3)] == (3, 300, 300) + assert idx1[H(4)] == (6, 400, 400) From ba753563141d4cf2ecf0beb141cf37d1538bb0e3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 7 Aug 2015 15:17:07 +0200 Subject: [PATCH 214/241] add OS X to travis --- .travis.yml | 55 +++++++++++++++++++++++++++++++------- .travis/install.sh | 43 +++++++++++++++++++++++++++++ .travis/run.sh | 23 ++++++++++++++++ .travis/upload_coverage.sh | 10 +++++++ 4 files changed, 121 insertions(+), 10 deletions(-) create mode 100755 .travis/install.sh create mode 100755 .travis/run.sh create mode 100755 .travis/upload_coverage.sh diff --git a/.travis.yml b/.travis.yml index 87d3afb02..89fdbbff1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,12 +1,47 @@ +sudo: required + language: python -python: - - "3.2" - - "3.3" - - "3.4" -# command to install dependencies + +cache: + directories: + - $HOME/.cache/pip + +matrix: + include: + - python: 3.2 + os: linux + env: TOXENV=py32 + - python: 3.3 + os: linux + env: TOXENV=py33 + - python: 3.4 + os: linux + env: TOXENV=py34 + - language: generic + os: osx + osx_image: beta-xcode6.3 + env: TOXENV=py32 + - language: generic + os: osx + osx_image: beta-xcode6.3 + env: TOXENV=py33 + - language: generic + os: osx + osx_image: beta-xcode6.3 + env: TOXENV=py34 + install: - - "sudo apt-get install -y libacl1-dev" - - "pip install --use-mirrors Cython" - - "pip install -e ." -# command to run tests -script: fakeroot -u py.test + - ./.travis/install.sh + +script: + - ./.travis/run.sh + +after_success: + - ./.travis/upload_coverage.sh + +notifications: + irc: + channels: + - "irc.freenode.org#borgbackup" + use_notice: true + skip_join: true diff --git a/.travis/install.sh b/.travis/install.sh new file mode 100755 index 000000000..21ff76000 --- /dev/null +++ b/.travis/install.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +set -e +set -x + +if [[ "$(uname -s)" == 'Darwin' ]]; then + brew update || brew update + + if [[ "${OPENSSL}" != "0.9.8" ]]; then + brew outdated openssl || brew upgrade openssl + fi + + if which pyenv > /dev/null; then + eval "$(pyenv init -)" + fi + + brew outdated pyenv || brew upgrade pyenv + + case "${TOXENV}" in + py32) + pyenv install 3.2.6 + pyenv global 3.2.6 + ;; + py33) + pyenv install 3.3.6 + pyenv global 3.3.6 + ;; + py34) + pyenv install 3.4.3 + pyenv global 3.4.3 + ;; + esac + pyenv rehash + python -m pip install --user virtualenv +else + pip install virtualenv + sudo apt-get install -y libacl1-dev +fi + +python -m virtualenv ~/.venv +source ~/.venv/bin/activate +pip install tox pytest codecov Cython +pip install -e . diff --git a/.travis/run.sh b/.travis/run.sh new file mode 100755 index 000000000..cf504ac51 --- /dev/null +++ b/.travis/run.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -e +set -x + +if [[ "$(uname -s)" == "Darwin" ]]; then + eval "$(pyenv init -)" + if [[ "${OPENSSL}" != "0.9.8" ]]; then + # set our flags to use homebrew openssl + export ARCHFLAGS="-arch x86_64" + export LDFLAGS="-L/usr/local/opt/openssl/lib" + export CFLAGS="-I/usr/local/opt/openssl/include" + fi +fi + +source ~/.venv/bin/activate + +if [[ "$(uname -s)" == "Darwin" ]]; then + # no fakeroot on OS X + sudo tox -e $TOXENV +else + fakeroot -u tox +fi diff --git a/.travis/upload_coverage.sh b/.travis/upload_coverage.sh new file mode 100755 index 000000000..73584acfb --- /dev/null +++ b/.travis/upload_coverage.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -e +set -x + +NO_COVERAGE_TOXENVS=(pep8) +if ! [[ "${NO_COVERAGE_TOXENVS[*]}" =~ "${TOXENV}" ]]; then + source ~/.venv/bin/activate + bash <(curl -s https://codecov.io/bash) -e TRAVIS_OS_NAME,TOXENV +fi From 5864bd76ebc2985f82fd49d69aa58895cb0698a8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 01:55:46 +0200 Subject: [PATCH 215/241] fix test coverage / codecov.io, use xcode6.4 --- .coveragerc | 13 +++++++++++++ .gitignore | 1 + .travis.yml | 6 +++--- .travis/install.sh | 2 +- .travis/upload_coverage.sh | 3 ++- requirements.d/development.txt | 1 + tox.ini | 2 +- 7 files changed, 22 insertions(+), 6 deletions(-) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 000000000..9056361b9 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,13 @@ +[run] +branch = True +source = borg + +[report] +exclude_lines = + pragma: no cover + def __repr__ + raise AssertionError + raise NotImplementedError + if 0: + if __name__ == .__main__.: +ignore_errors = True diff --git a/.gitignore b/.gitignore index f3564a429..97df7c610 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,4 @@ docs/usage/*.inc borg.build/ borg.dist/ borg.exe +.coverage diff --git a/.travis.yml b/.travis.yml index 89fdbbff1..497bc7c04 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,15 +19,15 @@ matrix: env: TOXENV=py34 - language: generic os: osx - osx_image: beta-xcode6.3 + osx_image: xcode6.4 env: TOXENV=py32 - language: generic os: osx - osx_image: beta-xcode6.3 + osx_image: xcode6.4 env: TOXENV=py33 - language: generic os: osx - osx_image: beta-xcode6.3 + osx_image: xcode6.4 env: TOXENV=py34 install: diff --git a/.travis/install.sh b/.travis/install.sh index 21ff76000..80b39226f 100755 --- a/.travis/install.sh +++ b/.travis/install.sh @@ -39,5 +39,5 @@ fi python -m virtualenv ~/.venv source ~/.venv/bin/activate -pip install tox pytest codecov Cython +pip install tox pytest pytest-cov codecov Cython pip install -e . diff --git a/.travis/upload_coverage.sh b/.travis/upload_coverage.sh index 73584acfb..c2aa91bd2 100755 --- a/.travis/upload_coverage.sh +++ b/.travis/upload_coverage.sh @@ -6,5 +6,6 @@ set -x NO_COVERAGE_TOXENVS=(pep8) if ! [[ "${NO_COVERAGE_TOXENVS[*]}" =~ "${TOXENV}" ]]; then source ~/.venv/bin/activate - bash <(curl -s https://codecov.io/bash) -e TRAVIS_OS_NAME,TOXENV + ln .tox/.coverage .coverage + codecov -e TRAVIS_OS_NAME,TOXENV fi diff --git a/requirements.d/development.txt b/requirements.d/development.txt index 6d2928a92..37677a00f 100644 --- a/requirements.d/development.txt +++ b/requirements.d/development.txt @@ -1,4 +1,5 @@ tox mock pytest +pytest-cov<2.0.0 Cython diff --git a/tox.ini b/tox.ini index c1a9e019f..a120a237a 100644 --- a/tox.ini +++ b/tox.ini @@ -9,6 +9,6 @@ envlist = py32, py33, py34 # not really matter, should be just different from the toplevel dir. changedir = {toxworkdir} deps = -rrequirements.d/development.txt -commands = py.test --pyargs {posargs:borg.testsuite} +commands = py.test --cov=borg --pyargs {posargs:borg.testsuite} # fakeroot -u needs some env vars: passenv = * From 6164640ecce359d5303d78e7b984619afea53666 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 02:34:42 +0200 Subject: [PATCH 216/241] add codecov.io badge --- README.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 0d4ee1dac..22320d3fe 100644 --- a/README.rst +++ b/README.rst @@ -118,8 +118,12 @@ THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. For more information, please also see the `LICENSE `_. -|build| +|build| |coverage| .. |build| image:: https://travis-ci.org/borgbackup/borg.svg :alt: Build Status :target: https://travis-ci.org/borgbackup/borg + +.. |coverage| image:: http://codecov.io/github/borgbackup/borg/coverage.svg?branch=master + :alt: Test Coverage + :target: http://codecov.io/github/borgbackup/borg?branch=master From 40801d74a6a05f6a47ace486022f0f17f2b0629c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 19:03:37 +0200 Subject: [PATCH 217/241] remove old unittest discover / runner code, we use py.test now --- borg/testsuite/__init__.py | 27 +-------------------------- borg/testsuite/run.py | 11 ----------- 2 files changed, 1 insertion(+), 37 deletions(-) delete mode 100644 borg/testsuite/run.py diff --git a/borg/testsuite/__init__.py b/borg/testsuite/__init__.py index e1eb37eaa..9872edeb6 100644 --- a/borg/testsuite/__init__.py +++ b/borg/testsuite/__init__.py @@ -73,7 +73,7 @@ class BaseTestCase(unittest.TestCase): d1 = [filename] + [getattr(s1, a) for a in attrs] d2 = [filename] + [getattr(s2, a) for a in attrs] if not os.path.islink(path1) or utime_supports_fd: - # Older versions of llfuse does not support ns precision properly + # Older versions of llfuse do not support ns precision properly if fuse and not have_fuse_mtime_ns: d1.append(round(st_mtime_ns(s1), -4)) d2.append(round(st_mtime_ns(s2), -4)) @@ -94,28 +94,3 @@ class BaseTestCase(unittest.TestCase): return time.sleep(.1) raise Exception('wait_for_mount(%s) timeout' % path) - - -def get_tests(suite): - """Generates a sequence of tests from a test suite - """ - for item in suite: - try: - # TODO: This could be "yield from..." with Python 3.3+ - for i in get_tests(item): - yield i - except TypeError: - yield item - - -class TestLoader(unittest.TestLoader): - """A customized test loader that properly detects and filters our test cases - """ - - def loadTestsFromName(self, pattern, module=None): - suite = self.discover('borg.testsuite', '*.py') - tests = unittest.TestSuite() - for test in get_tests(suite): - if pattern.lower() in test.id().lower(): - tests.addTest(test) - return tests diff --git a/borg/testsuite/run.py b/borg/testsuite/run.py deleted file mode 100644 index 19d87699b..000000000 --- a/borg/testsuite/run.py +++ /dev/null @@ -1,11 +0,0 @@ -import unittest - -from . import TestLoader - - -def main(): - unittest.main(testLoader=TestLoader(), defaultTest='') - - -if __name__ == '__main__': - main() From a9027a033d21d69b67553c896e66c12b8ddcb5e7 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 19:12:14 +0200 Subject: [PATCH 218/241] coverage: omit some infrastructure / generated files --- .coveragerc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.coveragerc b/.coveragerc index 9056361b9..620f29fef 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,6 +1,10 @@ [run] branch = True source = borg +omit = + borg/__init__.py + borg/__main__.py + borg/_version.py [report] exclude_lines = From 60e34968b023a3411941fa9f17f3bd2ac332fb92 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 19:22:25 +0200 Subject: [PATCH 219/241] codecov: fixes for env vars and osx --- .travis/upload_coverage.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis/upload_coverage.sh b/.travis/upload_coverage.sh index c2aa91bd2..4cb8273cf 100755 --- a/.travis/upload_coverage.sh +++ b/.travis/upload_coverage.sh @@ -7,5 +7,7 @@ NO_COVERAGE_TOXENVS=(pep8) if ! [[ "${NO_COVERAGE_TOXENVS[*]}" =~ "${TOXENV}" ]]; then source ~/.venv/bin/activate ln .tox/.coverage .coverage - codecov -e TRAVIS_OS_NAME,TOXENV + # on osx, tests run as root, need access to .coverage + sudo chmod 666 .coverage + codecov -e TRAVIS_OS_NAME TOXENV fi From 616d16a9b028bdeff8a9c5f6d0d2e63566095059 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 20:50:21 +0200 Subject: [PATCH 220/241] add help string for --no-files-cache, fixes #140 --- borg/archiver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/borg/archiver.py b/borg/archiver.py index 8230677da..393609df2 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -509,7 +509,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") common_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', default=False, help='verbose output') - common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false') + common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false', + help='do not load/update the file metadata cache used to detect unchanged files') common_parser.add_argument('--umask', dest='umask', type=lambda s: int(s, 8), default=0o077, metavar='M', help='set umask to M (local and remote, default: 0o077)') common_parser.add_argument('--remote-path', dest='remote_path', default='borg', metavar='PATH', From cce0d20dad2ef3ca3fe6473786027877c8d483b3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 20:52:05 +0200 Subject: [PATCH 221/241] test whether borg extract can process unusual filenames --- borg/testsuite/archiver.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index b466d6ad6..7a2b75780 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -243,6 +243,19 @@ class ArchiverTestCase(ArchiverTestCaseBase): if sparse_support and hasattr(st, 'st_blocks'): self.assert_true(st.st_blocks * 512 < total_len / 10) # is output sparse? + def test_unusual_filenames(self): + filenames = ['normal', 'with some blanks', '(with_parens)', ] + for filename in filenames: + filename = os.path.join(self.input_path, filename) + with open(filename, 'wb') as fd: + pass + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + for filename in filenames: + with changedir('output'): + self.cmd('extract', self.repository_location + '::test', os.path.join('input', filename)) + assert os.path.exists(os.path.join('output', 'input', filename)) + def test_repository_swap_detection(self): self.create_test_files() os.environ['BORG_PASSPHRASE'] = 'passphrase' From 35b0f38f5ce94cb2b6ef7bfcce94fe224b6b0566 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 21:14:13 +0200 Subject: [PATCH 222/241] cache sync: show progress indication sync can take quite long, so show what we are doing. --- borg/cache.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/borg/cache.py b/borg/cache.py index f50b456eb..a480d708f 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -306,10 +306,15 @@ class Cache: chunk_idx.clear() for tarinfo in tf_in: archive_id_hex = tarinfo.name + archive_name = tarinfo.pax_headers['archive_name'] + print("- processing archive: %s -> extract, " % archive_name, end='') ; sys.stdout.flush() tf_in.extract(archive_id_hex, tmp_dir) chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8') + print("read, ", end='') ; sys.stdout.flush() archive_chunk_idx = ChunkIndex.read(chunk_idx_path) + print("merge, ", end='') ; sys.stdout.flush() chunk_idx.merge(archive_chunk_idx) + print("done.") os.unlink(chunk_idx_path) self.begin_txn() From 03f39c2663745e54a720191e9ca6d280e4c02720 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 22:11:40 +0200 Subject: [PATCH 223/241] borg check: give a named single archive to it, fixes #139 --- borg/archive.py | 23 +++++++++++++++-------- borg/archiver.py | 13 ++++++++----- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index a133af7bb..8798c4fdb 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -609,7 +609,7 @@ class ArchiveChecker: self.error_found = False self.possibly_superseded = set() - def check(self, repository, repair=False, last=None): + def check(self, repository, repair=False, archive=None, last=None): self.report_progress('Starting archive consistency check...') self.repair = repair self.repository = repository @@ -619,8 +619,8 @@ class ArchiveChecker: self.manifest = self.rebuild_manifest() else: self.manifest, _ = Manifest.load(repository, key=self.key) - self.rebuild_refcounts(last=last) - if last is None: + self.rebuild_refcounts(archive=archive, last=last) + if last is None and archive is None: self.verify_chunks() else: self.report_progress('Orphaned objects check skipped (needs all archives checked)') @@ -680,7 +680,7 @@ class ArchiveChecker: self.report_progress('Manifest rebuild complete', error=True) return manifest - def rebuild_refcounts(self, last=None): + def rebuild_refcounts(self, archive=None, last=None): """Rebuild object reference counts by walking the metadata Missing and/or incorrect data is repaired when detected @@ -762,10 +762,17 @@ class ArchiveChecker: yield item repository = cache_if_remote(self.repository) - num_archives = len(self.manifest.archives) - archive_items = sorted(self.manifest.archives.items(), reverse=True, - key=lambda name_info: name_info[1][b'time']) - end = None if last is None else min(num_archives, last) + if archive is None: + # we need last N or all archives + archive_items = sorted(self.manifest.archives.items(), reverse=True, + key=lambda name_info: name_info[1][b'time']) + num_archives = len(self.manifest.archives) + end = None if last is None else min(num_archives, last) + else: + # we only want one specific archive + archive_items = [item for item in self.manifest.archives.items() if item[0] == archive] + num_archives = 1 + end = 1 for i, (name, info) in enumerate(archive_items[:end]): self.report_progress('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives)) archive_id = info[b'id'] diff --git a/borg/archiver.py b/borg/archiver.py index 393609df2..9e2917ea3 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -85,8 +85,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") print('Repository check complete, no problems found.') else: return 1 - if not args.repo_only and not ArchiveChecker().check(repository, repair=args.repair, last=args.last): - return 1 + if not args.repo_only and not ArchiveChecker().check( + repository, repair=args.repair, archive=args.repository.archive, last=args.last): + return 1 return 0 def do_change_passphrase(self, args): @@ -554,6 +555,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") and other types of damage. After that the consistency and correctness of the archive metadata is verified. + By giving an archive name, you can specifically check that archive. + The archive metadata checks can be time consuming and requires access to the key file and/or passphrase if encryption is enabled. These checks can be skipped using the --repository-only option. @@ -563,9 +566,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") epilog=check_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) subparser.set_defaults(func=self.do_check) - subparser.add_argument('repository', metavar='REPOSITORY', - type=location_validator(archive=False), - help='repository to check consistency of') + subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE', + type=location_validator(), + help='repository or archive to check consistency of') subparser.add_argument('--repository-only', dest='repo_only', action='store_true', default=False, help='only perform repository checks') From 4f6c43baecb3dabc68070f22123861d3de415e19 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 00:36:17 +0200 Subject: [PATCH 224/241] document what borg check does, fixes #138 --- borg/archive.py | 2 +- borg/archiver.py | 39 +++++++++++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 8798c4fdb..82fd57cb8 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -631,7 +631,7 @@ class ArchiveChecker: def init_chunks(self): """Fetch a list of all object keys from repository """ - # Explicity set the initial hash table capacity to avoid performance issues + # Explicitly set the initial hash table capacity to avoid performance issues # due to hash table "resonance" capacity = int(len(self.repository) * 1.2) self.chunks = ChunkIndex(capacity) diff --git a/borg/archiver.py b/borg/archiver.py index 9e2917ea3..0b9fa0432 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -550,16 +550,39 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") help='select encryption method') check_epilog = textwrap.dedent(""" - The check command verifies the consistency of a repository and the corresponding - archives. The underlying repository data files are first checked to detect bit rot - and other types of damage. After that the consistency and correctness of the archive - metadata is verified. + The check command verifies the consistency of a repository and the corresponding archives. - By giving an archive name, you can specifically check that archive. + First, the underlying repository data files are checked: + - For all segments the segment magic (header) is checked + - For all objects stored in the segments, all metadata (e.g. crc and size) and + all data is read. The read data is checked by size and CRC. Bit rot and other + types of accidental damage can be detected this way. + - If we are in repair mode and a integrity error is detected for a segment, + we try to recover as many objects from the segment as possible. + - In repair mode, it makes sure that the index is consistent with the data + stored in the segments. + - If you use a remote repo server via ssh:, the repo check is executed on the + repo server without causing significant network traffic. + - The repository check can be skipped using the --archives-only option. - The archive metadata checks can be time consuming and requires access to the key - file and/or passphrase if encryption is enabled. These checks can be skipped using - the --repository-only option. + Second, the consistency and correctness of the archive metadata is verified: + - Is the repo manifest present? If not, it is rebuilt from archive metadata + chunks. + - Check if archive metadata chunk is present. if not, remove archive from + manifest. + - For all files (items) in the archive, for all chunks referenced by these + files, check if chunk is present (if not and we are in repair mode, replace + it with a chunk of zeros). + - Rebuild the chunks cache (refcounts) within the given archives in memory. + - If we are in repair mode and we checked all the archives: delete orphaned + chunks from the repo, write the repo manifest + - if you use a remote repo server via ssh:, the archive check is executed on + the client machine (because if encryption is enabled, the checks will require + decryption and this is always done client-side, because key access will be + required). Archive and file (item) metadata will get fetched over the network, + but not content data. + - The archive checks can be time consuming, they can be skipped using the + --repository-only option. """) subparser = subparsers.add_parser('check', parents=[common_parser], description=self.do_check.__doc__, From 80ee8b98af6be14131d56673165e0cefb266d6ff Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 12:43:57 +0200 Subject: [PATCH 225/241] fix the repair mode if one used --last (or since shortly: gave an archive name), verify_chunks (old method name) was not called because it requires all archives having been checked. the problem was that also the final manifest.write() and repository.commit() was done in that method, so all other repair work did not get committed in that case. I moved these calls that to a separate finish() method. --- borg/archive.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 82fd57cb8..e214c7857 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -611,6 +611,7 @@ class ArchiveChecker: def check(self, repository, repair=False, archive=None, last=None): self.report_progress('Starting archive consistency check...') + self.check_all = archive is None and last is None self.repair = repair self.repository = repository self.init_chunks() @@ -620,10 +621,8 @@ class ArchiveChecker: else: self.manifest, _ = Manifest.load(repository, key=self.key) self.rebuild_refcounts(archive=archive, last=last) - if last is None and archive is None: - self.verify_chunks() - else: - self.report_progress('Orphaned objects check skipped (needs all archives checked)') + self.orphan_chunks_check() + self.finish() if not self.error_found: self.report_progress('Archive consistency check complete, no problems found.') return self.repair or not self.error_found @@ -803,16 +802,22 @@ class ArchiveChecker: add_reference(new_archive_id, len(data), len(cdata), cdata) info[b'id'] = new_archive_id - def verify_chunks(self): - unused = set() - for id_, (count, size, csize) in self.chunks.iteritems(): - if count == 0: - unused.add(id_) - orphaned = unused - self.possibly_superseded - if orphaned: - self.report_progress('{} orphaned objects found'.format(len(orphaned)), error=True) + def orphan_chunks_check(self): + if self.check_all: + unused = set() + for id_, (count, size, csize) in self.chunks.iteritems(): + if count == 0: + unused.add(id_) + orphaned = unused - self.possibly_superseded + if orphaned: + self.report_progress('{} orphaned objects found'.format(len(orphaned)), error=True) + if self.repair: + for id_ in unused: + self.repository.delete(id_) + else: + self.report_progress('Orphaned objects check skipped (needs all archives checked)') + + def finish(self): if self.repair: - for id_ in unused: - self.repository.delete(id_) self.manifest.write() self.repository.commit() From e74c87d5b54556875786232bf9d96866db02e4b8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 12:52:39 +0200 Subject: [PATCH 226/241] update borg check help --- borg/archiver.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 0b9fa0432..af1e1446f 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -567,20 +567,19 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") Second, the consistency and correctness of the archive metadata is verified: - Is the repo manifest present? If not, it is rebuilt from archive metadata - chunks. + chunks (this requires reading and decrypting of all metadata and data). - Check if archive metadata chunk is present. if not, remove archive from manifest. - For all files (items) in the archive, for all chunks referenced by these files, check if chunk is present (if not and we are in repair mode, replace - it with a chunk of zeros). - - Rebuild the chunks cache (refcounts) within the given archives in memory. + it with a same-size chunk of zeros). This requires reading of archive and + file metadata, but not data. - If we are in repair mode and we checked all the archives: delete orphaned - chunks from the repo, write the repo manifest + chunks from the repo. - if you use a remote repo server via ssh:, the archive check is executed on the client machine (because if encryption is enabled, the checks will require decryption and this is always done client-side, because key access will be - required). Archive and file (item) metadata will get fetched over the network, - but not content data. + required). - The archive checks can be time consuming, they can be skipped using the --repository-only option. """) From 74e586050861c42192a1e3e1e097051ca6768335 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 13:47:36 +0200 Subject: [PATCH 227/241] document that passphrase(-only) mode is deprecated --- CHANGES.rst | 7 ++++++- borg/archiver.py | 6 ++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index f19b7f8aa..8235de3aa 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -16,6 +16,11 @@ Incompatible changes (compared to 0.23): you accidentially give access permissions for group and/or others to files created by borg (e.g. the repository). +Deprecations: + +- "--encryption passphrase" mode is deprecated, see #85 and #97. + See the new "--encryption repokey" mode for a replacement. + New features: - borg create --chunker-params ... to configure the chunker, fixes #16 @@ -28,7 +33,7 @@ New features: - borg create --compression 0..9 to select zlib compression level, fixes #66 (attic #295). - borg init --encryption repokey (to store the encryption key into the repo), - deprecate --encryption passphrase, fixes #85 + fixes #85 - improve at-end error logging, always log exceptions and set exit_code=1 - LoggedIO: better error checks / exceptions / exception handling - implement --remote-path to allow non-default-path borg locations, #125 diff --git a/borg/archiver.py b/borg/archiver.py index af1e1446f..38d270647 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -537,6 +537,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") This command initializes an empty repository. A repository is a filesystem directory containing the deduplicated data from zero or more archives. Encryption can be enabled at repository init time. + Please note that the 'passphrase' encryption mode is DEPRECATED (instead of it, + consider using 'repokey'). """) subparser = subparsers.add_parser('init', parents=[common_parser], description=self.do_init.__doc__, epilog=init_epilog, @@ -546,8 +548,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") type=location_validator(archive=False), help='repository to create') subparser.add_argument('-e', '--encryption', dest='encryption', - choices=('none', 'passphrase', 'keyfile', 'repokey'), default='none', - help='select encryption method') + choices=('none', 'keyfile', 'repokey', 'passphrase'), default='none', + help='select encryption key mode') check_epilog = textwrap.dedent(""" The check command verifies the consistency of a repository and the corresponding archives. From 7ffdfe1716f38233fba834c656d721d2d06191a0 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 14:10:53 +0200 Subject: [PATCH 228/241] update CHANGES --- CHANGES.rst | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 8235de3aa..199bea31b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -13,7 +13,7 @@ Incompatible changes (compared to 0.23): server and client(s) to 0.24. - the default umask is 077 now (if you do not specify via --umask) which might be a different one as you used previously. The default umask avoids that - you accidentially give access permissions for group and/or others to files + you accidentally give access permissions for group and/or others to files created by borg (e.g. the repository). Deprecations: @@ -38,11 +38,15 @@ New features: - LoggedIO: better error checks / exceptions / exception handling - implement --remote-path to allow non-default-path borg locations, #125 - implement --umask M and use 077 as default umask for better security, #117 +- borg check: give a named single archive to it, fixes #139 +- cache sync: show progress indication +- cache sync: reimplement the chunk index merging in C Bug fixes: - fix segfault that happened for unreadable files (chunker: n needs to be a signed size_t), #116 +- fix the repair mode, #144 - repo delete: add destroy to allowed rpc methods, fixes issue #114 - more compatible repository locking code (based on mkdir), maybe fixes #92 (attic #317, attic #201). @@ -51,6 +55,7 @@ Bug fixes: fixes attic #326. - fix Traceback when running check --repair, attic #232 - clarify help text, fixes #73. +- add help string for --no-files-cache, fixes #140 Other changes: @@ -66,11 +71,13 @@ Other changes: - add FAQ entries about redundancy / integrity - clarify that borg extract uses the cwd as extraction target - update internals doc about chunker params, memory usage and compression - - add some words about resource usage - - document how to backup raw disk + - add some words about resource usage in general + - document how to backup a raw disk - add note about how to run borg from virtual env - add solutions for (ll)fuse installation problems - - tested and updated cygwin docs + - document what borg check does, fixes #138 + - reorganize borgbackup.github.io sidebar, prev/next at top + - deduplicate and refactor the docs / README.rst - use borg-tmp as prefix for temporary files / directories - short prune options without "keep-" are deprecated, do not suggest them @@ -79,7 +86,9 @@ Other changes: - use entrypoints instead of scripts, for better use of the wheel format and modern installs - add requirements.d/development.txt and modify tox.ini - +- use travis-ci for linux and OS X testing +- use coverage.py, pytest-cov and codecov.io for test coverage support + I forgot to list some stuff already implemented in 0.23.0, here they are: New features: From 4c668a85b63955ccd41b8f7b1c151373da1a4924 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 14:16:56 +0200 Subject: [PATCH 229/241] update docs copyright (to be same as project copyright) --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 9c0e84cb8..5962d1cab 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,7 @@ master_doc = 'index' # General information about the project. project = 'Borg - Deduplicating Archiver' -copyright = '2010-2014, Jonas Borgström' +copyright = '2010-2014, Jonas Borgström, 2015 The Borg Collective (see AUTHORS file)' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the From 955ac9c44c36cfe5163f9f9b78578190843a0666 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 14:26:54 +0200 Subject: [PATCH 230/241] get rid of testsuite.mock, directly import from mock this was left over from times when we either used mock from stdlib or pypi mock. but as we only use pypi mock now, the indirection is not needed any more. --- borg/testsuite/archive.py | 2 +- borg/testsuite/archiver.py | 3 ++- borg/testsuite/mock.py | 14 -------------- borg/testsuite/repository.py | 3 ++- 4 files changed, 5 insertions(+), 17 deletions(-) delete mode 100644 borg/testsuite/mock.py diff --git a/borg/testsuite/archive.py b/borg/testsuite/archive.py index 9a20e9f6e..a963573ec 100644 --- a/borg/testsuite/archive.py +++ b/borg/testsuite/archive.py @@ -1,12 +1,12 @@ from datetime import datetime, timezone import msgpack +from mock import Mock from ..archive import Archive, CacheChunkBuffer, RobustUnpacker from ..key import PlaintextKey from ..helpers import Manifest from . import BaseTestCase -from .mock import Mock class MockCache: diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 7a2b75780..20e76a7e0 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -11,6 +11,8 @@ import time import unittest from hashlib import sha256 +from mock import patch + from .. import xattr from ..archive import Archive, ChunkBuffer, CHUNK_MAX_EXP from ..archiver import Archiver @@ -20,7 +22,6 @@ from ..helpers import Manifest from ..remote import RemoteRepository, PathNotAllowed from ..repository import Repository from . import BaseTestCase -from .mock import patch try: import llfuse diff --git a/borg/testsuite/mock.py b/borg/testsuite/mock.py deleted file mode 100644 index bdd030b10..000000000 --- a/borg/testsuite/mock.py +++ /dev/null @@ -1,14 +0,0 @@ -""" -Mocking - -Note: unittest.mock is broken on at least python 3.3.6 and 3.4.0. - it silently ignores mistyped method names starting with assert_..., - does nothing and just succeeds. - The issue was fixed in the separately distributed "mock" lib, you - get an AttributeError there. So, always use that one! - -Details: - -http://engineeringblog.yelp.com/2015/02/assert_called_once-threat-or-menace.html -""" -from mock import * diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 1c9fd072d..74996b717 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -2,13 +2,14 @@ import os import shutil import tempfile +from mock import patch + from ..hashindex import NSIndex from ..helpers import Location, IntegrityError from ..locking import UpgradableLock from ..remote import RemoteRepository, InvalidRPCMethod from ..repository import Repository from . import BaseTestCase -from .mock import patch class RepositoryTestCaseBase(BaseTestCase): From 197ca9c0d30f77bf77e48b51754384455255ddfe Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 16:19:53 +0200 Subject: [PATCH 231/241] C merge code: cast to correct pointer type, silences warning --- borg/_hashindex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/_hashindex.c b/borg/_hashindex.c index 128ab5b2f..33d12ca03 100644 --- a/borg/_hashindex.c +++ b/borg/_hashindex.c @@ -396,7 +396,7 @@ hashindex_merge(HashIndex *index, HashIndex *other) while((key = hashindex_next_key(other, key))) { other_values = key + key_size; - my_values = hashindex_get(index, key); + my_values = (int32_t *)hashindex_get(index, key); if(my_values == NULL) { hashindex_set(index, key, other_values); } else { From 69456e07c46b09d50cbde363d6c5eb1625df3fdb Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 19:02:35 +0200 Subject: [PATCH 232/241] cache sync: change progress output to separate lines printing without \n plus sys.stdout.flush() didn't work as expected. --- borg/cache.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/borg/cache.py b/borg/cache.py index a480d708f..2391be275 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -307,14 +307,13 @@ class Cache: for tarinfo in tf_in: archive_id_hex = tarinfo.name archive_name = tarinfo.pax_headers['archive_name'] - print("- processing archive: %s -> extract, " % archive_name, end='') ; sys.stdout.flush() + print("- extracting archive %s ..." % archive_name) tf_in.extract(archive_id_hex, tmp_dir) chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8') - print("read, ", end='') ; sys.stdout.flush() + print("- reading archive ...") archive_chunk_idx = ChunkIndex.read(chunk_idx_path) - print("merge, ", end='') ; sys.stdout.flush() + print("- merging archive ...") chunk_idx.merge(archive_chunk_idx) - print("done.") os.unlink(chunk_idx_path) self.begin_txn() From 1e35f5ce4a7f38917afdaddeebf61a94bc7478aa Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 21:22:55 +0200 Subject: [PATCH 233/241] minor fixes to CHANGES --- CHANGES.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 199bea31b..b333ba48a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -71,6 +71,7 @@ Other changes: - add FAQ entries about redundancy / integrity - clarify that borg extract uses the cwd as extraction target - update internals doc about chunker params, memory usage and compression + - added docs about development - add some words about resource usage in general - document how to backup a raw disk - add note about how to run borg from virtual env @@ -81,12 +82,12 @@ Other changes: - use borg-tmp as prefix for temporary files / directories - short prune options without "keep-" are deprecated, do not suggest them -- improved tox configuration, documented there how to invoke it +- improved tox configuration - remove usage of unittest.mock, always use mock from pypi - use entrypoints instead of scripts, for better use of the wheel format and modern installs - add requirements.d/development.txt and modify tox.ini -- use travis-ci for linux and OS X testing +- use travis-ci for testing based on Linux and (new) OS X - use coverage.py, pytest-cov and codecov.io for test coverage support I forgot to list some stuff already implemented in 0.23.0, here they are: From 822379048f0a0fdec0bc8b541b1a113e2b9bba01 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 22:32:14 +0200 Subject: [PATCH 234/241] added some sidebar links --- docs/_themes/local/sidebarusefullinks.html | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/_themes/local/sidebarusefullinks.html b/docs/_themes/local/sidebarusefullinks.html index 2f71b275d..368dee25f 100644 --- a/docs/_themes/local/sidebarusefullinks.html +++ b/docs/_themes/local/sidebarusefullinks.html @@ -5,6 +5,8 @@
    • Main Web Site
    • PyPI packages
    • +
    • Binary Packages
    • +
    • Current ChangeLog
    • GitHub
    • Issue Tracker
    • Bounties & Fundraisers
    • From e06b0b36129d42f979a09456ed66a6e9b2d9a8ad Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 01:04:03 +0200 Subject: [PATCH 235/241] use C99's uintmax_t and %ju format whatever size_t and off_t is, should even fit in there --- borg/_hashindex.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/borg/_hashindex.c b/borg/_hashindex.c index 33d12ca03..aa1881f18 100644 --- a/borg/_hashindex.c +++ b/borg/_hashindex.c @@ -145,10 +145,12 @@ hashindex_read(const char *path) bytes_read = fread(&header, 1, sizeof(HashHeader), fd); if(bytes_read != sizeof(HashHeader)) { if(ferror(fd)) { - EPRINTF_PATH(path, "fread header failed (expected %ld, got %ld)", sizeof(HashHeader), bytes_read); + EPRINTF_PATH(path, "fread header failed (expected %ju, got %ju)", + (uintmax_t) sizeof(HashHeader), (uintmax_t) bytes_read); } else { - EPRINTF_MSG_PATH(path, "fread header failed (expected %ld, got %ld)", sizeof(HashHeader), bytes_read); + EPRINTF_MSG_PATH(path, "fread header failed (expected %ju, got %ju)", + (uintmax_t) sizeof(HashHeader), (uintmax_t) bytes_read); } goto fail; } @@ -170,7 +172,8 @@ hashindex_read(const char *path) } buckets_length = (off_t)_le32toh(header.num_buckets) * (header.key_size + header.value_size); if(length != sizeof(HashHeader) + buckets_length) { - EPRINTF_MSG_PATH(path, "Incorrect file length (expected %ld, got %ld)", sizeof(HashHeader) + buckets_length, length); + EPRINTF_MSG_PATH(path, "Incorrect file length (expected %ju, got %ju)", + (uintmax_t) sizeof(HashHeader) + buckets_length, (uintmax_t) length); goto fail; } if(!(index = malloc(sizeof(HashIndex)))) { @@ -186,10 +189,12 @@ hashindex_read(const char *path) bytes_read = fread(index->buckets, 1, buckets_length, fd); if(bytes_read != buckets_length) { if(ferror(fd)) { - EPRINTF_PATH(path, "fread buckets failed (expected %ld, got %ld)", buckets_length, bytes_read); + EPRINTF_PATH(path, "fread buckets failed (expected %ju, got %ju)", + (uintmax_t) buckets_length, (uintmax_t) bytes_read); } else { - EPRINTF_MSG_PATH(path, "fread buckets failed (expected %ld, got %ld)", buckets_length, bytes_read); + EPRINTF_MSG_PATH(path, "fread buckets failed (expected %ju, got %ju)", + (uintmax_t) buckets_length, (uintmax_t) bytes_read); } free(index->buckets); free(index); From feff0f0c9421c7487e618eb7f771bbf1a2568603 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 03:15:44 +0200 Subject: [PATCH 236/241] install docs: replace hack for llfuse with proper solution found out why it could not install llfuse into virtual env: it always complained about not being able to find fuse.pc - which is part of libfuse-dev / fuse-devel and was missing. once one adds the fuse dev stuff, llfuse installs to virtual env without problems. --- docs/installation.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 90bd33f84..3cd4e13b6 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -62,13 +62,11 @@ Some of the steps detailled below might be useful also for non-git installs. # if you do not have gcc / make / etc. yet apt-get install build-essential - # optional: lowlevel FUSE py binding - to mount backup archives + # optional: FUSE support - to mount backup archives # in case you get complaints about permission denied on /etc/fuse.conf: # on ubuntu this means your user is not in the "fuse" group. just add # yourself there, log out and log in again. - # if it complains about not being able to find llfuse: make a symlink - # borg-env/lib/python3.4/site-packages/llfuse -> /usr/lib/python3/dist-packages/llfuse - apt-get install python3-llfuse fuse + apt-get install libfuse-dev fuse # optional: for unit testing apt-get install fakeroot @@ -84,6 +82,7 @@ Some of the steps detailled below might be useful also for non-git installs. pip install cython # compile .pyx -> .c pip install tox pytest # optional, for running unit tests pip install sphinx # optional, to build the docs + pip install llfuse # optional, for FUSE support cd borg pip install -e . # in-place editable mode @@ -108,8 +107,8 @@ Some of the steps detailled below might be useful also for non-git installs. # ACL support Headers + Library sudo dnf install libacl-devel libacl - # optional: lowlevel FUSE py binding - to mount backup archives - sudo dnf install python3-llfuse fuse + # optional: FUSE support - to mount backup archives + sudo dnf install fuse-devel fuse # optional: for unit testing sudo dnf install fakeroot @@ -125,6 +124,7 @@ Some of the steps detailled below might be useful also for non-git installs. pip install cython # compile .pyx -> .c pip install tox pytest # optional, for running unit tests pip install sphinx # optional, to build the docs + pip install llfuse # optional, for FUSE support cd borg pip install -e . # in-place editable mode From 4d8949e66a6f0183e50b07d7f68827b86f22641b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 04:09:36 +0200 Subject: [PATCH 237/241] archiver: more tests --- borg/archiver.py | 6 +++--- borg/testsuite/archiver.py | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 38d270647..deed03786 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -859,7 +859,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") return args.func(args) -def sig_info_handler(signum, stack): +def sig_info_handler(signum, stack): # pragma: no cover """search the stack for infos about the currently processed file and print them""" for frame in inspect.getouterframes(stack): func, loc = frame[3], frame[0].f_locals @@ -882,7 +882,7 @@ def sig_info_handler(signum, stack): break -def setup_signal_handlers(): +def setup_signal_handlers(): # pragma: no cover sigs = [] if hasattr(signal, 'SIGUSR1'): sigs.append(signal.SIGUSR1) # kill -USR1 pid @@ -892,7 +892,7 @@ def setup_signal_handlers(): signal.signal(sig, sig_info_handler) -def main(): +def main(): # pragma: no cover # Make sure stdout and stderr have errors='replace') to avoid unicode # issues when print()-ing unicode file names sys.stdout = io.TextIOWrapper(sys.stdout.buffer, sys.stdout.encoding, 'replace', line_buffering=True) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 20e76a7e0..489f3f69f 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -183,7 +183,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.create_test_files() self.cmd('init', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') - self.cmd('create', self.repository_location + '::test.2', 'input') + self.cmd('create', '--stats', self.repository_location + '::test.2', 'input') with changedir('output'): self.cmd('extract', self.repository_location + '::test') self.assert_equal(len(self.cmd('list', self.repository_location).splitlines()), 2) @@ -403,7 +403,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('extract', '--dry-run', self.repository_location + '::test.2') self.cmd('delete', self.repository_location + '::test') self.cmd('extract', '--dry-run', self.repository_location + '::test.2') - self.cmd('delete', self.repository_location + '::test.2') + self.cmd('delete', '--stats', self.repository_location + '::test.2') # Make sure all data except the manifest has been deleted repository = Repository(self.repository_path) self.assert_equal(len(repository), 1) @@ -470,10 +470,38 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_not_in('test1', output) self.assert_in('test2', output) + def test_prune_repository_prefix(self): + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::foo-2015-08-12-10:00', src_dir) + self.cmd('create', self.repository_location + '::foo-2015-08-12-20:00', src_dir) + self.cmd('create', self.repository_location + '::bar-2015-08-12-10:00', src_dir) + self.cmd('create', self.repository_location + '::bar-2015-08-12-20:00', src_dir) + output = self.cmd('prune', '-v', '--dry-run', self.repository_location, '--keep-daily=2', '--prefix=foo-') + self.assert_in('Keeping archive: foo-2015-08-12-20:00', output) + self.assert_in('Would prune: foo-2015-08-12-10:00', output) + output = self.cmd('list', self.repository_location) + self.assert_in('foo-2015-08-12-10:00', output) + self.assert_in('foo-2015-08-12-20:00', output) + self.assert_in('bar-2015-08-12-10:00', output) + self.assert_in('bar-2015-08-12-20:00', output) + self.cmd('prune', self.repository_location, '--keep-daily=2', '--prefix=foo-') + output = self.cmd('list', self.repository_location) + self.assert_not_in('foo-2015-08-12-10:00', output) + self.assert_in('foo-2015-08-12-20:00', output) + self.assert_in('bar-2015-08-12-10:00', output) + self.assert_in('bar-2015-08-12-20:00', output) + def test_usage(self): self.assert_raises(SystemExit, lambda: self.cmd()) self.assert_raises(SystemExit, lambda: self.cmd('-h')) + def test_help(self): + assert 'Borg' in self.cmd('help') + assert 'patterns' in self.cmd('help', 'patterns') + assert 'Initialize' in self.cmd('help', 'init') + assert 'positional arguments' not in self.cmd('help', 'init', '--epilog-only') + assert 'This command initializes' not in self.cmd('help', 'init', '--usage-only') + @unittest.skipUnless(has_llfuse, 'llfuse not installed') def test_fuse_mount_repository(self): mountpoint = os.path.join(self.tmpdir, 'mountpoint') From 8300efb1dbfe17d9964c68fe790480acbc453e51 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 04:28:31 +0200 Subject: [PATCH 238/241] remote: pragma: no cover for the stuff we can't test --- borg/remote.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/remote.py b/borg/remote.py index 1d7ae84e2..3a274b214 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -28,7 +28,7 @@ class InvalidRPCMethod(Error): """RPC method is not valid""" -class RepositoryServer: +class RepositoryServer: # pragma: no cover rpc_methods = ( '__len__', 'check', @@ -129,7 +129,7 @@ class RemoteRepository: umask = ['--umask', '%03o' % self.umask] if location.host == '__testsuite__': args = [sys.executable, '-m', 'borg.archiver', 'serve'] + umask + self.extra_test_args - else: + else: # pragma: no cover args = ['ssh'] if location.port: args += ['-p', str(location.port)] From 2194d9837e4021370402ead33d5724ceb78b0735 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 16:04:41 +0200 Subject: [PATCH 239/241] update CHANGES --- CHANGES.rst | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index b333ba48a..13dfdb4ce 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,30 @@ Borg Changelog ============== +Version 0.25.0 (not released yet) +--------------------------------- + +Incompatible changes (compared to 0.24): + +- none yet + +Deprecations: + +- none yet + +New features: + +- honor the nodump flag (UF_NODUMP) and do not backup such items + +Bug fixes: + +- close fds of segments we delete (during compaction) + +Other changes: + +- none yet + + Version 0.24.0 -------------- From 04814241289c4febdfb9c497783d1cf3d7a51538 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 16:41:30 +0200 Subject: [PATCH 240/241] fix archiver test to not expect backup of the UF_NODUMP file --- borg/testsuite/archiver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 489f3f69f..eb707ade1 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -187,7 +187,8 @@ class ArchiverTestCase(ArchiverTestCaseBase): with changedir('output'): self.cmd('extract', self.repository_location + '::test') self.assert_equal(len(self.cmd('list', self.repository_location).splitlines()), 2) - self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), 11) + file_count = 10 if has_lchflags else 11 # one file is UF_NODUMP + self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), file_count) self.assert_dirs_equal('input', 'output/input') info_output = self.cmd('info', self.repository_location + '::test') self.assert_in('Number of files: 4', info_output) From 3100fac3617851d4d67096df31f74a96f9fd2e86 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 17:03:30 +0200 Subject: [PATCH 241/241] fix archiver test to not expect backup of the UF_NODUMP file, try 2 --- borg/testsuite/archiver.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index eb707ade1..2ed2f7821 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -187,11 +187,15 @@ class ArchiverTestCase(ArchiverTestCaseBase): with changedir('output'): self.cmd('extract', self.repository_location + '::test') self.assert_equal(len(self.cmd('list', self.repository_location).splitlines()), 2) - file_count = 10 if has_lchflags else 11 # one file is UF_NODUMP - self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), file_count) + item_count = 10 if has_lchflags else 11 # one file is UF_NODUMP + self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), item_count) + if has_lchflags: + # remove the file we did not backup, so input and output become equal + os.remove(os.path.join('input', 'flagfile')) self.assert_dirs_equal('input', 'output/input') info_output = self.cmd('info', self.repository_location + '::test') - self.assert_in('Number of files: 4', info_output) + item_count = 3 if has_lchflags else 4 # one file is UF_NODUMP + self.assert_in('Number of files: %d' % item_count, info_output) shutil.rmtree(self.cache_path) with environment_variable(BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK='1'): info_output2 = self.cmd('info', self.repository_location + '::test')