From e3472a248ad30b509ba3aa3ebb901ba391882b4f Mon Sep 17 00:00:00 2001 From: Thomas Harold Date: Thu, 16 Jul 2015 16:40:33 -0400 Subject: [PATCH 001/142] Fix format issue in installation.rst for Cygwin Fixed formatting issue in installation.rst where packages were listed all on one line. --- docs/installation.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/installation.rst b/docs/installation.rst index b1a20d8f2..132043b11 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -128,6 +128,7 @@ Please note that running under cygwin is rather experimental. You'll need at least (use the cygwin installer to fetch/install these): :: + python3 python3-setuptools python3-cython From 2907dd0094c281147382bf085a4d201c97105bf8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 17 Jul 2015 22:55:28 +0200 Subject: [PATCH 002/142] add BountySource link --- docs/_themes/local/sidebarusefullinks.html | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/_themes/local/sidebarusefullinks.html b/docs/_themes/local/sidebarusefullinks.html index 2f35c2695..2f71b275d 100644 --- a/docs/_themes/local/sidebarusefullinks.html +++ b/docs/_themes/local/sidebarusefullinks.html @@ -7,6 +7,7 @@
  • PyPI packages
  • GitHub
  • Issue Tracker
  • +
  • Bounties & Fundraisers
  • Mailing List
  • From ed2548ca027b4fd062a10ddf2ce359d9115f40a4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 20 Jul 2015 16:16:32 +0200 Subject: [PATCH 003/142] add a __main__.py to nuitka works --- borg/__main__.py | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 borg/__main__.py diff --git a/borg/__main__.py b/borg/__main__.py new file mode 100644 index 000000000..b38dc4e9e --- /dev/null +++ b/borg/__main__.py @@ -0,0 +1,3 @@ +from borg.archiver import main +main() + From 59c519b3bbcb9cf098a7bb1c467d0a021e19ebc4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 25 Jul 2015 15:37:30 +0200 Subject: [PATCH 004/142] remove outdated locking problem warning --- docs/quickstart.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/quickstart.rst b/docs/quickstart.rst index d0881fb95..fcb223503 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -159,6 +159,3 @@ mounting the remote filesystem, for example, using sshfs:: $ borg init /mnt/backup $ fusermount -u /mnt -However, be aware that sshfs doesn't fully implement POSIX locks, so -you must be sure to not have two processes trying to access the same -repository at the same time. From 0b226aec7a6ae6a60337cb2bc8cc7cf80d3bfff5 Mon Sep 17 00:00:00 2001 From: Roberto Polli Date: Sat, 25 Jul 2015 15:39:42 +0200 Subject: [PATCH 005/142] add development.txt and modify tox.ini --- requirements.d/development.txt | 4 ++++ tox.ini | 4 +--- 2 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 requirements.d/development.txt diff --git a/requirements.d/development.txt b/requirements.d/development.txt new file mode 100644 index 000000000..6d2928a92 --- /dev/null +++ b/requirements.d/development.txt @@ -0,0 +1,4 @@ +tox +mock +pytest +Cython diff --git a/tox.ini b/tox.ini index 79603cda9..fdf91a2db 100644 --- a/tox.ini +++ b/tox.ini @@ -19,9 +19,7 @@ envlist = py32, py33, py34 # Change dir to avoid import problem for cython code. The directory does # not really matter, should be just different from the toplevel dir. changedir = {toxworkdir} -deps = - pytest - mock +deps = -rrequirements.d/development.txt commands = py.test --pyargs {posargs:borg.testsuite} # fakeroot -u needs some env vars: passenv = * From 12a50bc6fe71d0c08f79b993aa0ae9465444a155 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 25 Jul 2015 18:38:16 +0200 Subject: [PATCH 006/142] tested and updated cygwin docs, thanks to fvia --- docs/installation.rst | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 132043b11..54895527a 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -123,7 +123,8 @@ Some of the steps detailled below might be useful also for non-git installs. Cygwin (from git) ----------------- -Please note that running under cygwin is rather experimental. +Please note that running under cygwin is rather experimental, stuff has been +tested with CygWin (x86-64) v2.1.0. You'll need at least (use the cygwin installer to fetch/install these): @@ -144,7 +145,14 @@ You can then install ``pip`` and ``virtualenv``: :: - easy_install pip + easy_install-3.4 pip pip install virtualenv And now continue as for Linux (see above). + +In case that creation of the virtual env fails, try deleting this file: + +:: + + /usr/lib/python3.4/__pycache__/platform.cpython-34.pyc + From 859c33d42b846efa7b290007f6762d366fe4bf98 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 25 Jul 2015 19:07:24 +0200 Subject: [PATCH 007/142] docs: add solutions for (ll)fuse installation problems --- docs/installation.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/installation.rst b/docs/installation.rst index 54895527a..985f43eba 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -57,6 +57,11 @@ Some of the steps detailled below might be useful also for non-git installs. apt-get install build-essential # optional: lowlevel FUSE py binding - to mount backup archives + # in case you get complaints about permission denied on /etc/fuse.conf: + # on ubuntu this means your user is not in the "fuse" group. just add + # yourself there, log out and log in again. + # if it complains about not being able to find llfuse: make a symlink + # borg-env/lib/python3.4/site-packages/llfuse -> /usr/lib/python3/dist-packages/llfuse apt-get install python3-llfuse fuse # optional: for unit testing From fb998cbd66c78a88b51e58bac2042454c99280cf Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 26 Jul 2015 14:47:18 +0200 Subject: [PATCH 008/142] docs: add note about how to run borg from virtual env --- docs/installation.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/installation.rst b/docs/installation.rst index 985f43eba..90bd33f84 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -19,6 +19,12 @@ usually available as an optional install. Virtualenv_ can be used to build and install |project_name| without affecting the system Python or requiring root access. +Important: +if you install into a virtual environment, you need to activate +the virtual env first (``source borg-env/bin/activate``). +Alternatively, directly run ``borg-env/bin/borg`` (or symlink that into some +directory that is in your PATH so you can just run ``borg``). + The llfuse_ python package is also required if you wish to mount an archive as a FUSE filesystem. Only FUSE >= 2.8.0 can support llfuse. From fde952a6d96a6c25868bdeb946552a3e9a4cb2f9 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 26 Jul 2015 15:37:36 +0200 Subject: [PATCH 009/142] .gitignore: add nuitka and cache stuff --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 1e38a1479..f3564a429 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,7 @@ platform_linux.c *.so docs/usage/*.inc .idea/ +.cache/ +borg.build/ +borg.dist/ +borg.exe From 195545075ae7492cfc07fd531daf294fce2bfffe Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 26 Jul 2015 17:38:16 +0200 Subject: [PATCH 010/142] repo delete: add destroy to allowed rpc methods, fixes issue #114 also: add test, automate YES confirmation for testing --- borg/archiver.py | 12 +++++++----- borg/remote.py | 1 + borg/testsuite/archiver.py | 10 ++++++++++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 6275edf22..84e568e73 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -288,11 +288,13 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") print("You requested to completely DELETE the repository *including* all archives it contains:") for archive_info in manifest.list_archive_infos(sort_by='ts'): print(format_archive(archive_info)) - print("""Type "YES" if you understand this and want to continue.\n""") - if input('Do you want to continue? ') == 'YES': - repository.destroy() - cache.destroy() - print("Repository and corresponding cache were deleted.") + while not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'): + print("""Type "YES" if you understand this and want to continue.\n""") + if input('Do you want to continue? ') == 'YES': + break + repository.destroy() + cache.destroy() + print("Repository and corresponding cache were deleted.") return self.exit_code def do_mount(self, args): diff --git a/borg/remote.py b/borg/remote.py index 5da5f9cf8..afec54710 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -34,6 +34,7 @@ class RepositoryServer: 'check', 'commit', 'delete', + 'destroy', 'get', 'list', 'negotiate', diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 29c6ac1f7..35f8171d9 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -394,6 +394,16 @@ class ArchiverTestCase(ArchiverTestCaseBase): repository = Repository(self.repository_path) self.assert_equal(len(repository), 1) + def test_delete_repo(self): + self.create_regular_file('file1', size=1024 * 80) + self.create_regular_file('dir2/file2', size=1024 * 80) + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + self.cmd('create', self.repository_location + '::test.2', 'input') + self.cmd('delete', self.repository_location) + # Make sure the repo is gone + self.assertFalse(os.path.exists(self.repository_path)) + def test_corrupted_repository(self): self.cmd('init', self.repository_location) self.create_src_archive('test') From bcdfda7ef083d77f5f05258d5d3f522790bd49f1 Mon Sep 17 00:00:00 2001 From: Per Guth Date: Mon, 27 Jul 2015 14:38:03 +0200 Subject: [PATCH 011/142] Linked "issue #1" to issue #1. --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 3668d38cf..e65dda433 100644 --- a/README.rst +++ b/README.rst @@ -17,7 +17,7 @@ NOT RELEASED DEVELOPMENT VERSIONS HAVE UNKNOWN COMPATIBILITY PROPERTIES. THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. -Read issue #1 on the issue tracker, goals are being defined there. +Read `issue #1 `_ on the issue tracker, goals are being defined there. Please also see the LICENSE for more informations. From 30d47cb68ab3ac58966b84dca1e8b7d5a4e05ee5 Mon Sep 17 00:00:00 2001 From: Per Guth Date: Mon, 27 Jul 2015 14:41:43 +0200 Subject: [PATCH 012/142] Fixed *ALL* the links! --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index e65dda433..fe523575e 100644 --- a/README.rst +++ b/README.rst @@ -7,7 +7,7 @@ an efficient and secure way to backup data. The data deduplication technique used makes Borg suitable for daily backups since only changes are stored. -Borg is a fork of Attic and maintained by "The Borg Collective" (see AUTHORS file). +Borg is a fork of `Attic `_ and maintained by "`The Borg Collective `_". BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC. EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY WHEN MAJOR RELEASE NUMBER @@ -19,7 +19,7 @@ THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. Read `issue #1 `_ on the issue tracker, goals are being defined there. -Please also see the LICENSE for more informations. +Please also see the `LICENSE `_ for more informations. Easy to use ~~~~~~~~~~~ From 60dfde2dc6236f30f312d2deb42dc94390221c06 Mon Sep 17 00:00:00 2001 From: Per Guth Date: Mon, 27 Jul 2015 14:45:32 +0200 Subject: [PATCH 013/142] Reference to BorgWeb --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index fe523575e..a11a29ca9 100644 --- a/README.rst +++ b/README.rst @@ -28,6 +28,8 @@ Initialize backup repository and create a backup archive:: $ borg init /mnt/backup $ borg create -v /mnt/backup::documents ~/Documents +For a graphical frontend refer to our complementary project `BorgWeb `_. + Main features ~~~~~~~~~~~~~ Space efficient storage From 729cc4d82d9597fdf56a81fc8a5bcd6228850fce Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 27 Jul 2015 22:27:13 +0200 Subject: [PATCH 014/142] document how to backup raw disk --- docs/usage.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/usage.rst b/docs/usage.rst index 46c56c9e0..f983ff662 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -139,6 +139,9 @@ Examples # Backup huge files with little chunk management overhead $ borg create --chunker-params 19,23,21,4095 /mnt/backup::VMs /srv/VMs + # Backup a raw device (must not be active/in use/mounted at that time) + $ dd if=/dev/sda bs=10M | borg create /mnt/backup::my-sda - + .. include:: usage/extract.rst.inc From 300c7351e70e487775967c464751111320f8225c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 27 Jul 2015 23:02:52 +0200 Subject: [PATCH 015/142] be more verbose about the great deduplication algorithm --- README.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.rst b/README.rst index a11a29ca9..c9cf90706 100644 --- a/README.rst +++ b/README.rst @@ -38,6 +38,23 @@ Space efficient storage variable length chunks and only chunks that have never been seen before are compressed and added to the repository. + The content-defined chunking based deduplication is applied to remove + duplicate chunks within: + + * the current backup data set (even inside single files / streams) + * current and previous backups of same machine + * all the chunks in the same repository, even if coming from other machines + + This advanced deduplication method does NOT depend on: + + * file/directory names staying the same (so you can move your stuff around + without killing the deduplication, even between machines sharing a repo) + * complete files or time stamps staying the same (if a big file changes a + little, only a few new chunks will be stored - this is great for VMs or + raw disks) + * the absolute position of a data chunk inside a file (stuff may get shifted + and will still be found by the deduplication algorithm) + Optional data encryption All data can be protected using 256-bit AES encryption and data integrity and authenticity is verified using HMAC-SHA256. From 81191243596aa1bfce143b062b4fbdf188be1e8f Mon Sep 17 00:00:00 2001 From: Jeff Rizzo Date: Tue, 28 Jul 2015 11:39:00 -0700 Subject: [PATCH 016/142] Don't process an entry if the nodump flag is set. --- attic/archiver.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/attic/archiver.py b/attic/archiver.py index 5731ffb4c..8d63c0714 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -158,6 +158,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") # Ignore unix sockets if stat.S_ISSOCK(st.st_mode): return + # Ignore if nodump flag set + if st.st_flags and stat.UF_NODUMP(st.st_flags): + return self.print_verbose(remove_surrogates(path)) if stat.S_ISREG(st.st_mode): try: From e11a4a5d3a912596d8db9d85f721699231982526 Mon Sep 17 00:00:00 2001 From: Jeff Rizzo Date: Tue, 28 Jul 2015 12:30:25 -0700 Subject: [PATCH 017/142] Check the UF_NODUMP flag properly. --- attic/archiver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attic/archiver.py b/attic/archiver.py index 8d63c0714..335012477 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -159,7 +159,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") if stat.S_ISSOCK(st.st_mode): return # Ignore if nodump flag set - if st.st_flags and stat.UF_NODUMP(st.st_flags): + if st.st_flags and (st.st_flags & stat.UF_NODUMP): return self.print_verbose(remove_surrogates(path)) if stat.S_ISREG(st.st_mode): From ebc04b0ebffe82036670409148ef856f83226be8 Mon Sep 17 00:00:00 2001 From: Jeff Rizzo Date: Tue, 28 Jul 2015 15:01:42 -0700 Subject: [PATCH 018/142] Check for lchflags properly. --- attic/archiver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/attic/archiver.py b/attic/archiver.py index 335012477..05076fc0e 100644 --- a/attic/archiver.py +++ b/attic/archiver.py @@ -21,6 +21,7 @@ from attic.helpers import Error, location_validator, format_time, \ is_cachedir, bigint_to_int from attic.remote import RepositoryServer, RemoteRepository +has_lchflags = hasattr(os, 'lchflags') class Archiver: @@ -159,7 +160,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") if stat.S_ISSOCK(st.st_mode): return # Ignore if nodump flag set - if st.st_flags and (st.st_flags & stat.UF_NODUMP): + if has_lchflags and (st.st_flags & stat.UF_NODUMP): return self.print_verbose(remove_surrogates(path)) if stat.S_ISREG(st.st_mode): From 1e097bfd6b9904954be2f739cbb67ac478df4ce5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 29 Jul 2015 21:39:20 +0200 Subject: [PATCH 019/142] docs: add some words about resource usage --- docs/usage.rst | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/docs/usage.rst b/docs/usage.rst index f983ff662..d80d5a5e2 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -53,11 +53,15 @@ Environment Variables Directories: BORG_KEYS_DIR : Default to '~/.borg/keys'. This directory contains keys for encrypted repositories. - BORG_CACHE_DIR : Default to '~/.cache/borg'. This directory contains the local cache. + BORG_CACHE_DIR : Default to '~/.cache/borg'. This directory contains the local cache and might need a lot + of space for dealing with big repositories). Building: BORG_OPENSSL_PREFIX : Adds given OpenSSL header file directory to the default locations (setup.py). + General: + TMPDIR : where temporary files are stored (might need a lot of temporary space for some operations) + Please note: @@ -66,6 +70,33 @@ Please note: (e.g. mode 600, root:root). +Resource Usage +-------------- + +|project_name| might use a lot of resources depending on the size of the data set it is dealing with. + +CPU: it won't go beyond 100% of 1 core as the code is currently single-threaded. + +Memory (RAM): the chunks index and files index is read into memory for performance reasons. + +Temporary files: reading data and metadata from a FUSE mounted repository will consume about the same space as the + deduplicated chunks used to represent them in the repository. + +Cache files: chunks index and files index (plus a collection of single-archive chunk indexes). + +Chunks index: proportional to the amount of data chunks in your repo. lots of small chunks in your repo implies a big + chunks index. you may need to tweak the chunker params (see create options) if you have a lot of data and + you want to keep the chunks index at some reasonable size. + +Files index: proportional to the amount of files in your last backup. can be switched off (see create options), but + next backup will be much slower if you do. + +Network: if your repository is remote, all deduplicated (and optionally compressed/encrypted) of course have to go over + the connection (ssh: repo url). if you use a locally mounted network filesystem, additional some copy + operations used for transaction support go over the connection additionally. if you backup multiple sources to + one target repository, additional traffic happens for cache resynchronization. + + .. include:: usage/init.rst.inc Examples From 9d21e4ad69189a39bacef8ed7e2a32093dcb0398 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 29 Jul 2015 21:48:57 +0200 Subject: [PATCH 020/142] docs: add some words about resource usage (fixed wording) --- docs/usage.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index d80d5a5e2..fcbee5fef 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -77,12 +77,12 @@ Resource Usage CPU: it won't go beyond 100% of 1 core as the code is currently single-threaded. -Memory (RAM): the chunks index and files index is read into memory for performance reasons. +Memory (RAM): the chunks index and the files index are read into memory for performance reasons. Temporary files: reading data and metadata from a FUSE mounted repository will consume about the same space as the deduplicated chunks used to represent them in the repository. -Cache files: chunks index and files index (plus a collection of single-archive chunk indexes). +Cache files: chunks index and files index (plus a compressed collection of single-archive chunk indexes). Chunks index: proportional to the amount of data chunks in your repo. lots of small chunks in your repo implies a big chunks index. you may need to tweak the chunker params (see create options) if you have a lot of data and @@ -91,10 +91,12 @@ Chunks index: proportional to the amount of data chunks in your repo. lots of sm Files index: proportional to the amount of files in your last backup. can be switched off (see create options), but next backup will be much slower if you do. -Network: if your repository is remote, all deduplicated (and optionally compressed/encrypted) of course have to go over - the connection (ssh: repo url). if you use a locally mounted network filesystem, additional some copy - operations used for transaction support go over the connection additionally. if you backup multiple sources to - one target repository, additional traffic happens for cache resynchronization. +Network: if your repository is remote, all deduplicated (and optionally compressed/encrypted) data of course has to go + over the connection (ssh: repo url). if you use a locally mounted network filesystem, additionally some copy + operations used for transaction support also go over the connection. if you backup multiple sources to one + target repository, additional traffic happens for cache resynchronization. + +In case you are interested in more details, please read the internals documentation. .. include:: usage/init.rst.inc From 3be55bedd3a44206c5398931cdebdc7cda4b94f5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 30 Jul 2015 15:21:13 +0200 Subject: [PATCH 021/142] chunker: n needs to be a signed size_t ... as it is also used for the read() return value, which can be negative in case of errors. --- borg/_chunker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 20461e7c6..4db21b75b 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -127,7 +127,7 @@ chunker_free(Chunker *c) static int chunker_fill(Chunker *c) { - size_t n; + ssize_t n; PyObject *data; memmove(c->data, c->data + c->last, c->position + c->remaining - c->last); c->position -= c->last; From 27de1b0a438d0b63299ee6b9aa973d07d5922021 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 1 Aug 2015 15:07:54 +0200 Subject: [PATCH 022/142] add a wrapper around liblz4 --- .gitignore | 1 + borg/compress.pyx | 67 +++++++++++++++++++++++++++++++++++++++++++ docs/global.rst.inc | 1 + docs/installation.rst | 14 +++++++-- setup.py | 8 +++++- 5 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 borg/compress.pyx diff --git a/.gitignore b/.gitignore index f3564a429..f6b10cf78 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ env .tox hashindex.c chunker.c +compress.c crypto.c platform_darwin.c platform_freebsd.c diff --git a/borg/compress.pyx b/borg/compress.pyx new file mode 100644 index 000000000..5bd5fdfcb --- /dev/null +++ b/borg/compress.pyx @@ -0,0 +1,67 @@ +""" +A thin liblz4 wrapper for raw LZ4 compression / decompression. + +Features: + - lz4 is super fast + - wrapper releases CPython's GIL to support multithreaded code + - helper buffer only allocated once at instance creation and then reused + +But beware: + - this is not very generic, you MUST know the maximum uncompressed input + data size you will feed into the compressor / get from the decompressor! + - you must not do method calls to the same LZ4 instance from different + threads at the same time - create one LZ4 instance per thread! + - compress returns raw compressed data without adding any frame metadata + (like checksums, magics, length of data, etc.) + - decompress expects such raw compressed data as input +""" + +from libc.stdlib cimport malloc, free + + +cdef extern from "lz4.h": + int LZ4_compressBound(int inputSize) + int LZ4_compress(const char* source, char* dest, int inputSize) nogil + int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil + + +cdef class LZ4: + cdef char *buffer # helper buffer for (de)compression output + cdef int bufsize # size of this buffer + cdef int max_isize # maximum compressor input size safe for this bufsize + + def __cinit__(self, int max_isize): + self.max_isize = max_isize + # compute worst case bufsize for not compressible data: + self.bufsize = LZ4_compressBound(max_isize) + self.buffer = malloc(self.bufsize) + if not self.buffer: + raise MemoryError + + def __dealloc__(self): + free(self.buffer) + + def compress(self, idata): + cdef int isize = len(idata) + if isize > self.max_isize: + raise Exception('lz4 buffer might be too small, increase max_isize!') + cdef int osize + cdef char *source = idata + cdef char *dest = self.buffer + with nogil: + osize = LZ4_compress(source, dest, isize) + if not osize: + raise Exception('lz4 compress failed') + return dest[:osize] + + def decompress(self, idata): + cdef int isize = len(idata) + cdef int osize = self.bufsize + cdef char *source = idata # <-- does not work for memoryview idata, wants bytes + cdef char *dest = self.buffer + with nogil: + osize = LZ4_decompress_safe(source, dest, isize, osize) + if osize < 0: + # malformed input data, buffer too small, ... + raise Exception('lz4 decompress failed') + return dest[:osize] diff --git a/docs/global.rst.inc b/docs/global.rst.inc index c0629a143..c8c490498 100644 --- a/docs/global.rst.inc +++ b/docs/global.rst.inc @@ -13,6 +13,7 @@ .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2 .. _ACL: https://en.wikipedia.org/wiki/Access_control_list .. _libacl: http://savannah.nongnu.org/projects/acl/ +.. _liblz4: https://github.com/Cyan4973/lz4 .. _OpenSSL: https://www.openssl.org/ .. _Python: http://www.python.org/ .. _Buzhash: https://en.wikipedia.org/wiki/Buzhash diff --git a/docs/installation.rst b/docs/installation.rst index 90bd33f84..5a027b2c6 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -9,6 +9,7 @@ Installation * Python_ >= 3.2 * OpenSSL_ >= 1.0.0 * libacl_ +* liblz4_ * some python dependencies, see install_requires in setup.py General notes @@ -59,6 +60,9 @@ Some of the steps detailled below might be useful also for non-git installs. # ACL support Headers + Library apt-get install libacl1-dev libacl1 + # lz4 super fast compression support Headers + Library + apt-get install liblz4-dev liblz4-1 + # if you do not have gcc / make / etc. yet apt-get install build-essential @@ -107,13 +111,16 @@ Some of the steps detailled below might be useful also for non-git installs. # ACL support Headers + Library sudo dnf install libacl-devel libacl - + + # lz4 super fast compression support Headers + Library + sudo dnf install lz4 + # optional: lowlevel FUSE py binding - to mount backup archives sudo dnf install python3-llfuse fuse - + # optional: for unit testing sudo dnf install fakeroot - + # get |project_name| from github, install it git clone |git_url| @@ -148,6 +155,7 @@ You'll need at least (use the cygwin installer to fetch/install these): gcc-core git libopenssl + liblz4_1 liblz4-devel # from cygwinports.org make openssh openssl-devel diff --git a/setup.py b/setup.py index edd75dc1a..87de52b71 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ if sys.version_info < min_python: from setuptools import setup, Extension +compress_source = 'borg/compress.pyx' crypto_source = 'borg/crypto.pyx' chunker_source = 'borg/chunker.pyx' hashindex_source = 'borg/hashindex.pyx' @@ -38,6 +39,7 @@ try: def make_distribution(self): self.filelist.extend([ + 'borg/compress.c', 'borg/crypto.c', 'borg/chunker.c', 'borg/_chunker.c', 'borg/hashindex.c', 'borg/_hashindex.c', @@ -52,6 +54,7 @@ except ImportError: def __init__(self, *args, **kwargs): raise Exception('Cython is required to run sdist') + compress_source = compress_source.replace('.pyx', '.c') crypto_source = crypto_source.replace('.pyx', '.c') chunker_source = chunker_source.replace('.pyx', '.c') hashindex_source = hashindex_source.replace('.pyx', '.c') @@ -59,7 +62,9 @@ except ImportError: platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c') platform_darwin_source = platform_darwin_source.replace('.pyx', '.c') from distutils.command.build_ext import build_ext - if not all(os.path.exists(path) for path in [crypto_source, chunker_source, hashindex_source, platform_linux_source, platform_freebsd_source]): + if not all(os.path.exists(path) for path in [ + compress_source, crypto_source, chunker_source, hashindex_source, + platform_linux_source, platform_freebsd_source]): raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version') @@ -89,6 +94,7 @@ cmdclass = versioneer.get_cmdclass() cmdclass.update({'build_ext': build_ext, 'sdist': Sdist}) ext_modules = [ + Extension('borg.compress', [compress_source], libraries=['lz4']), Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs), Extension('borg.chunker', [chunker_source]), Extension('borg.hashindex', [hashindex_source]) From 746984c33b6349b051e84c58b667469ffcd903a3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 2 Aug 2015 01:21:41 +0200 Subject: [PATCH 023/142] compress: add tests, zlib and null compression, ID header and autodetection --- borg/compress.pyx | 163 ++++++++++++++++++++++++++++--------- borg/testsuite/compress.py | 81 ++++++++++++++++++ 2 files changed, 207 insertions(+), 37 deletions(-) create mode 100644 borg/testsuite/compress.py diff --git a/borg/compress.pyx b/borg/compress.pyx index 5bd5fdfcb..1ff00305f 100644 --- a/borg/compress.pyx +++ b/borg/compress.pyx @@ -1,63 +1,91 @@ -""" -A thin liblz4 wrapper for raw LZ4 compression / decompression. - -Features: - - lz4 is super fast - - wrapper releases CPython's GIL to support multithreaded code - - helper buffer only allocated once at instance creation and then reused - -But beware: - - this is not very generic, you MUST know the maximum uncompressed input - data size you will feed into the compressor / get from the decompressor! - - you must not do method calls to the same LZ4 instance from different - threads at the same time - create one LZ4 instance per thread! - - compress returns raw compressed data without adding any frame metadata - (like checksums, magics, length of data, etc.) - - decompress expects such raw compressed data as input -""" +import zlib from libc.stdlib cimport malloc, free cdef extern from "lz4.h": - int LZ4_compressBound(int inputSize) - int LZ4_compress(const char* source, char* dest, int inputSize) nogil + int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil -cdef class LZ4: +cdef class CompressorBase: + """ + base class for all (de)compression classes, + also handles compression format auto detection and + adding/stripping the ID header (which enable auto detection). + """ + ID = b'\xFF\xFF' # reserved and not used + # overwrite with a unique 2-bytes bytestring in child classes + name = 'baseclass' + + @classmethod + def detect(cls, data): + return data.startswith(cls.ID) + + def __init__(self, **kwargs): + pass + + def compress(self, data): + # add ID bytes + return self.ID + data + + def decompress(self, data): + # strip ID bytes + return data[2:] + + +class CNULL(CompressorBase): + """ + null compression, just pass through data + """ + ID = b'\x00\x00' + name = 'null' + # base class does all we need + + +cdef class LZ4(CompressorBase): + """ + raw LZ4 compression / decompression (liblz4). + + Features: + - lz4 is super fast + - wrapper releases CPython's GIL to support multithreaded code + - buffer given by caller, avoiding frequent reallocation and buffer duplication + - uses safe lz4 methods that never go beyond the end of the output buffer + + But beware: + - this is not very generic, the given buffer MUST be large enough to + handle all compression or decompression output (or it will fail). + - you must not do method calls to the same LZ4 instance from different + threads at the same time - create one LZ4 instance per thread! + """ + ID = b'\x01\x00' + name = 'lz4' + cdef char *buffer # helper buffer for (de)compression output cdef int bufsize # size of this buffer - cdef int max_isize # maximum compressor input size safe for this bufsize - def __cinit__(self, int max_isize): - self.max_isize = max_isize - # compute worst case bufsize for not compressible data: - self.bufsize = LZ4_compressBound(max_isize) - self.buffer = malloc(self.bufsize) - if not self.buffer: - raise MemoryError - - def __dealloc__(self): - free(self.buffer) + def __cinit__(self, **kwargs): + buffer = kwargs['buffer'] + self.buffer = buffer + self.bufsize = len(buffer) def compress(self, idata): cdef int isize = len(idata) - if isize > self.max_isize: - raise Exception('lz4 buffer might be too small, increase max_isize!') - cdef int osize + cdef int osize = self.bufsize cdef char *source = idata cdef char *dest = self.buffer with nogil: - osize = LZ4_compress(source, dest, isize) + osize = LZ4_compress_limitedOutput(source, dest, isize, osize) if not osize: raise Exception('lz4 compress failed') - return dest[:osize] + return super().compress(dest[:osize]) def decompress(self, idata): + idata = super().decompress(idata) cdef int isize = len(idata) cdef int osize = self.bufsize - cdef char *source = idata # <-- does not work for memoryview idata, wants bytes + cdef char *source = idata cdef char *dest = self.buffer with nogil: osize = LZ4_decompress_safe(source, dest, isize, osize) @@ -65,3 +93,64 @@ cdef class LZ4: # malformed input data, buffer too small, ... raise Exception('lz4 decompress failed') return dest[:osize] + + +class ZLIB(CompressorBase): + """ + zlib compression / decompression (python stdlib) + """ + ID = b'\x08\x00' # not used here, see detect() + # avoid all 0x.8.. IDs elsewhere! + name = 'zlib' + + @classmethod + def detect(cls, data): + # matches misc. patterns 0x.8.. used by zlib + cmf, flg = data[:2] + is_deflate = cmf & 0x0f == 8 + check_ok = (cmf * 256 + flg) % 31 == 0 + return check_ok and is_deflate + + def __init__(self, level=6, **kwargs): + super().__init__(**kwargs) + self.level = level + + def compress(self, data): + # note: for compatibility no super call, do not add ID bytes + return zlib.compress(data, self.level) + + def decompress(self, data): + # note: for compatibility no super call, do not strip ID bytes + return zlib.decompress(data) + + +COMPRESSOR_TABLE = { + CNULL.name: CNULL, + LZ4.name: LZ4, + ZLIB.name: ZLIB, +} +COMPRESSOR_LIST = [LZ4, CNULL, ZLIB, ] # check fast stuff first + +def get_compressor(name, **kwargs): + cls = COMPRESSOR_TABLE[name] + return cls(**kwargs) + + +class Compressor: + """ + compresses using a compressor with given name and parameters + decompresses everything we can handle (autodetect) + """ + def __init__(self, name='zlib', **kwargs): + self.params = kwargs + self.compressor = get_compressor(name, **self.params) + + def compress(self, data): + return self.compressor.compress(data) + + def decompress(self, data): + for cls in COMPRESSOR_LIST: + if cls.detect(data): + return cls(**self.params).decompress(data) + else: + raise ValueError('No decompressor for this data found: %r.', data[:2]) diff --git a/borg/testsuite/compress.py b/borg/testsuite/compress.py new file mode 100644 index 000000000..441214e7b --- /dev/null +++ b/borg/testsuite/compress.py @@ -0,0 +1,81 @@ +import zlib + +import pytest + +from ..compress import get_compressor, Compressor, CNULL, ZLIB, LZ4 + + +buffer = bytes(2**16) +data = b'fooooooooobaaaaaaaar' +params = dict(name='zlib', level=6, buffer=buffer) + + +def test_get_compressor(): + c = get_compressor(name='null') + assert isinstance(c, CNULL) + c = get_compressor(name='lz4', buffer=buffer) + assert isinstance(c, LZ4) + c = get_compressor(name='zlib') + assert isinstance(c, ZLIB) + with pytest.raises(KeyError): + get_compressor(name='foobar') + + +def test_cnull(): + c = get_compressor(name='null') + cdata = c.compress(data) + assert len(cdata) > len(data) + assert data in cdata # it's not compressed and just in there 1:1 + assert data == c.decompress(cdata) + assert data == Compressor(**params).decompress(cdata) # autodetect + + +def test_lz4(): + c = get_compressor(name='lz4', buffer=buffer) + cdata = c.compress(data) + assert len(cdata) < len(data) + assert data == c.decompress(cdata) + assert data == Compressor(**params).decompress(cdata) # autodetect + + +def test_zlib(): + c = get_compressor(name='zlib') + cdata = c.compress(data) + assert len(cdata) < len(data) + assert data == c.decompress(cdata) + assert data == Compressor(**params).decompress(cdata) # autodetect + + +def test_autodetect_invalid(): + with pytest.raises(ValueError): + Compressor(**params).decompress(b'\xff\xfftotalcrap') + with pytest.raises(ValueError): + Compressor(**params).decompress(b'\x08\x00notreallyzlib') + + +def test_zlib_compat(): + # for compatibility reasons, we do not add an extra header for zlib, + # nor do we expect one when decompressing / autodetecting + for level in range(10): + c = get_compressor(name='zlib', level=level) + cdata1 = c.compress(data) + cdata2 = zlib.compress(data, level) + assert cdata1 == cdata2 + data2 = c.decompress(cdata2) + assert data == data2 + data2 = Compressor(**params).decompress(cdata2) + assert data == data2 + + +def test_compressor(): + for params in [ + dict(name='null', buffer=buffer), + dict(name='lz4', buffer=buffer), + dict(name='zlib', level=0, buffer=buffer), + dict(name='zlib', level=6, buffer=buffer), + dict(name='zlib', level=9, buffer=buffer), + ]: + c = Compressor(**params) + assert data == c.decompress(c.compress(data)) + + From 899776620209f3707ee9c640ebec93e224a114bb Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 2 Aug 2015 18:10:30 +0200 Subject: [PATCH 024/142] integrate compress code, new compression spec parser for commandline New null and lz4 compression. Giving -C 0 now uses null compression, not zlib level 0 any more (null has almost zero overhead while zlib-level0 still had to package everything into zlib frames). Giving -C 10 uses new lz4 compression, super fast compression and even faster decompression. See borg create --help (and --compression argument). fix some issues, clean up, optimize: CNULL: always return bytes LZ4: deal with getting memoryviews Compressor: give bytes to detect(), avoid memoryviews for lz4, always use same COMPR_BUFFER, avoid memory management costs. check --chunker-params CHUNK_MAX_EXP upper limit --- borg/archiver.py | 20 +++++++++++++++----- borg/compress.pyx | 27 +++++++++++++++++++++------ borg/helpers.py | 40 ++++++++++++++++++++++++++++++++++++++++ borg/key.py | 12 ++++++------ 4 files changed, 82 insertions(+), 17 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 84e568e73..032313dbf 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -14,6 +14,7 @@ import traceback from . import __version__ from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS +from .compress import Compressor, COMPR_BUFFER from .repository import Repository from .cache import Cache from .key import key_creator @@ -21,7 +22,7 @@ from .helpers import Error, location_validator, format_time, format_file_size, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ - is_cachedir, bigint_to_int, ChunkerParams + is_cachedir, bigint_to_int, ChunkerParams, CompressionSpec from .remote import RepositoryServer, RemoteRepository @@ -101,7 +102,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") t0 = datetime.now() repository = self.open_repository(args.archive, exclusive=True) manifest, key = Manifest.load(repository) - key.compression_level = args.compression + compr_args = dict(buffer=COMPR_BUFFER) + compr_args.update(args.compression) + key.compressor = Compressor(**compr_args) cache = Cache(repository, key, manifest, do_files=args.cache_files) archive = Archive(repository, key, manifest, args.archive.archive, cache=cache, create=True, checkpoint_interval=args.checkpoint_interval, @@ -634,9 +637,16 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE', help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS) subparser.add_argument('-C', '--compression', dest='compression', - type=int, default=0, metavar='N', - help='select compression algorithm and level. 0..9 is supported and means zlib ' - 'level 0 (no compression, fast, default) .. zlib level 9 (high compression, slow).') + type=CompressionSpec, default=dict(name='null'), metavar='COMPRESSION', + help='select compression algorithm and level, by giving a number: ' + '0 == no compression [default], ' + '1..9 == zlib level 1..9, ' + '10 == lz4. ' + 'Alternatively, you can also give a name and optionally additional args: ' + 'null == no compression, ' + 'zlib == zlib (default level 6), ' + 'zlib,0 .. zlib,9 == zlib (with level 0..9), ' + 'lz4 == lz4.') subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to create') diff --git a/borg/compress.pyx b/borg/compress.pyx index 1ff00305f..03815b3a5 100644 --- a/borg/compress.pyx +++ b/borg/compress.pyx @@ -1,8 +1,5 @@ import zlib -from libc.stdlib cimport malloc, free - - cdef extern from "lz4.h": int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil @@ -40,7 +37,15 @@ class CNULL(CompressorBase): """ ID = b'\x00\x00' name = 'null' - # base class does all we need + + def compress(self, data): + return super().compress(data) + + def decompress(self, data): + data = super().decompress(data) + if not isinstance(data, bytes): + data = bytes(data) + return data cdef class LZ4(CompressorBase): @@ -71,6 +76,8 @@ cdef class LZ4(CompressorBase): self.bufsize = len(buffer) def compress(self, idata): + if not isinstance(idata, bytes): + idata = bytes(idata) # code below does not work with memoryview cdef int isize = len(idata) cdef int osize = self.bufsize cdef char *source = idata @@ -82,6 +89,8 @@ cdef class LZ4(CompressorBase): return super().compress(dest[:osize]) def decompress(self, idata): + if not isinstance(idata, bytes): + idata = bytes(idata) # code below does not work with memoryview idata = super().decompress(idata) cdef int isize = len(idata) cdef int osize = self.bufsize @@ -141,7 +150,7 @@ class Compressor: compresses using a compressor with given name and parameters decompresses everything we can handle (autodetect) """ - def __init__(self, name='zlib', **kwargs): + def __init__(self, name='null', **kwargs): self.params = kwargs self.compressor = get_compressor(name, **self.params) @@ -149,8 +158,14 @@ class Compressor: return self.compressor.compress(data) def decompress(self, data): + hdr = bytes(data[:2]) # detect() does not work with memoryview for cls in COMPRESSOR_LIST: - if cls.detect(data): + if cls.detect(hdr): return cls(**self.params).decompress(data) else: raise ValueError('No decompressor for this data found: %r.', data[:2]) + + +# a buffer used for (de)compression result, which can be slightly bigger +# than the chunk buffer in the worst (incompressible data) case, add 10%: +COMPR_BUFFER = bytes(int(1.1 * 2 ** 23)) # CHUNK_MAX_EXP == 23 diff --git a/borg/helpers.py b/borg/helpers.py index d20532723..69a6db0db 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -278,9 +278,49 @@ def timestamp(s): def ChunkerParams(s): window_size, chunk_mask, chunk_min, chunk_max = s.split(',') + if int(chunk_max) > 23: + # do not go beyond 2**23 (8MB) chunk size now, + # COMPR_BUFFER can only cope with up to this size + raise ValueError return int(window_size), int(chunk_mask), int(chunk_min), int(chunk_max) +def CompressionSpec(s): + values = s.split(',') + count = len(values) + if count < 1: + raise ValueError + compression = values[0] + try: + compression = int(compression) + if count > 1: + raise ValueError + # it is just --compression N + if compression == 0: + return dict(name='null') + if 1 <= compression <= 9: + return dict(name='zlib', level=compression) + if compression == 10: + return dict(name='lz4') + raise ValueError + except ValueError: + # --compression algo[,...] + name = compression + if name in ('null', 'lz4', ): + return dict(name=name) + if name == 'zlib': + if count < 2: + level = 6 # default compression level in py stdlib + elif count == 2: + level = int(values[1]) + if not 0 <= level <= 9: + raise ValueError + else: + raise ValueError + return dict(name='zlib', level=level) + raise ValueError + + def is_cachedir(path): """Determines whether the specified path is a cache directory (and therefore should potentially be excluded from the backup) according to diff --git a/borg/key.py b/borg/key.py index fabdae5b3..fcf083586 100644 --- a/borg/key.py +++ b/borg/key.py @@ -6,9 +6,9 @@ import msgpack import textwrap import hmac from hashlib import sha256 -import zlib from .crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks +from .compress import Compressor, COMPR_BUFFER from .helpers import IntegrityError, get_keys_dir, Error PREFIX = b'\0' * 8 @@ -68,7 +68,7 @@ class KeyBase: self.TYPE_STR = bytes([self.TYPE]) self.repository = repository self.target = None # key location file path / repo obj - self.compression_level = 0 + self.compressor = Compressor('null', buffer=COMPR_BUFFER) def id_hash(self, data): """Return HMAC hash using the "id" HMAC key @@ -99,12 +99,12 @@ class PlaintextKey(KeyBase): return sha256(data).digest() def encrypt(self, data): - return b''.join([self.TYPE_STR, zlib.compress(data, self.compression_level)]) + return b''.join([self.TYPE_STR, self.compressor.compress(data)]) def decrypt(self, id, data): if data[0] != self.TYPE: raise IntegrityError('Invalid encryption envelope') - data = zlib.decompress(memoryview(data)[1:]) + data = self.compressor.decompress(memoryview(data)[1:]) if id and sha256(data).digest() != id: raise IntegrityError('Chunk id verification failed') return data @@ -131,7 +131,7 @@ class AESKeyBase(KeyBase): return HMAC(self.id_key, data, sha256).digest() def encrypt(self, data): - data = zlib.compress(data, self.compression_level) + data = self.compressor.compress(data) self.enc_cipher.reset() data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data))) hmac = HMAC(self.enc_hmac_key, data, sha256).digest() @@ -144,7 +144,7 @@ class AESKeyBase(KeyBase): if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac: raise IntegrityError('Encryption envelope checksum mismatch') self.dec_cipher.reset(iv=PREFIX + data[33:41]) - data = zlib.decompress(self.dec_cipher.decrypt(data[41:])) # should use memoryview + data = self.compressor.decompress(self.dec_cipher.decrypt(data[41:])) if id and HMAC(self.id_key, data, sha256).digest() != id: raise IntegrityError('Chunk id verification failed') return data From a15daf3b80b0f1687daebba6c062db9b65c7b202 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 2 Aug 2015 18:21:28 +0200 Subject: [PATCH 025/142] add liblz4-dev to travis installation packages --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 87d3afb02..5497cd096 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,7 @@ python: - "3.4" # command to install dependencies install: - - "sudo apt-get install -y libacl1-dev" + - "sudo apt-get install -y libacl1-dev liblz4-dev" - "pip install --use-mirrors Cython" - "pip install -e ." # command to run tests From 946507aeaf9de0c9dbd3d1af4f025e23d1cda28b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 2 Aug 2015 22:24:02 +0200 Subject: [PATCH 026/142] fix travis to install liblz4-dev from ppa it is not available in ubuntu 12.04 by default. --- .travis.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 5497cd096..8d910c0fb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,10 @@ python: - "3.4" # command to install dependencies install: - - "sudo apt-get install -y libacl1-dev liblz4-dev" + - "sudo add-apt-repository -y ppa:gezakovacs/lz4" + - "sudo apt-get update" + - "sudo apt-get install -y liblz4-dev" + - "sudo apt-get install -y libacl1-dev" - "pip install --use-mirrors Cython" - "pip install -e ." # command to run tests From 4c0012bddfc91e45167f65293369814511695de9 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 3 Aug 2015 00:31:33 +0200 Subject: [PATCH 027/142] add lzma compression needs python 3.3+, on 3.2 it won't be available. --- borg/archiver.py | 6 ++++-- borg/compress.pyx | 30 +++++++++++++++++++++++++++++- borg/helpers.py | 6 ++++-- borg/testsuite/compress.py | 27 ++++++++++++++++++++++++--- 4 files changed, 61 insertions(+), 8 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 032313dbf..fb6db8a19 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -641,12 +641,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") help='select compression algorithm and level, by giving a number: ' '0 == no compression [default], ' '1..9 == zlib level 1..9, ' - '10 == lz4. ' + '10 == lz4, ' + '20-29 == lzma level 0..9.' 'Alternatively, you can also give a name and optionally additional args: ' 'null == no compression, ' 'zlib == zlib (default level 6), ' 'zlib,0 .. zlib,9 == zlib (with level 0..9), ' - 'lz4 == lz4.') + 'lz4 == lz4, ' + 'lzma,0 .. lzma,9 == lzma (with level 0..9).') subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to create') diff --git a/borg/compress.pyx b/borg/compress.pyx index 03815b3a5..c1bdeff82 100644 --- a/borg/compress.pyx +++ b/borg/compress.pyx @@ -1,4 +1,8 @@ import zlib +try: + import lzma +except ImportError: + lzma = None cdef extern from "lz4.h": int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil @@ -104,6 +108,29 @@ cdef class LZ4(CompressorBase): return dest[:osize] +class LZMA(CompressorBase): + """ + lzma compression / decompression (python 3.3+ stdlib) + """ + ID = b'\x02\x00' + name = 'lzma' + + def __init__(self, level=6, **kwargs): + super().__init__(**kwargs) + self.level = level + if lzma is None: + raise ValueError('No lzma support found.') + + def compress(self, data): + # we do not need integrity checks in lzma, we do that already + data = lzma.compress(data, preset=self.level, check=lzma.CHECK_NONE) + return super().compress(data) + + def decompress(self, data): + data = super().decompress(data) + return lzma.decompress(data) + + class ZLIB(CompressorBase): """ zlib compression / decompression (python stdlib) @@ -137,8 +164,9 @@ COMPRESSOR_TABLE = { CNULL.name: CNULL, LZ4.name: LZ4, ZLIB.name: ZLIB, + LZMA.name: LZMA, } -COMPRESSOR_LIST = [LZ4, CNULL, ZLIB, ] # check fast stuff first +COMPRESSOR_LIST = [LZ4, CNULL, ZLIB, LZMA, ] # check fast stuff first def get_compressor(name, **kwargs): cls = COMPRESSOR_TABLE[name] diff --git a/borg/helpers.py b/borg/helpers.py index 69a6db0db..020c263e7 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -302,13 +302,15 @@ def CompressionSpec(s): return dict(name='zlib', level=compression) if compression == 10: return dict(name='lz4') + if 20 <= compression <= 29: + return dict(name='lzma', level=compression-20) raise ValueError except ValueError: # --compression algo[,...] name = compression if name in ('null', 'lz4', ): return dict(name=name) - if name == 'zlib': + if name in ('zlib', 'lzma', ): if count < 2: level = 6 # default compression level in py stdlib elif count == 2: @@ -317,7 +319,7 @@ def CompressionSpec(s): raise ValueError else: raise ValueError - return dict(name='zlib', level=level) + return dict(name=name, level=level) raise ValueError diff --git a/borg/testsuite/compress.py b/borg/testsuite/compress.py index 441214e7b..6d7319c1b 100644 --- a/borg/testsuite/compress.py +++ b/borg/testsuite/compress.py @@ -1,4 +1,8 @@ import zlib +try: + import lzma +except ImportError: + lzma = None import pytest @@ -6,7 +10,7 @@ from ..compress import get_compressor, Compressor, CNULL, ZLIB, LZ4 buffer = bytes(2**16) -data = b'fooooooooobaaaaaaaar' +data = b'fooooooooobaaaaaaaar' * 10 params = dict(name='zlib', level=6, buffer=buffer) @@ -46,6 +50,16 @@ def test_zlib(): assert data == Compressor(**params).decompress(cdata) # autodetect +def test_lzma(): + if lzma is None: + pytest.skip("No lzma support found.") + c = get_compressor(name='lzma') + cdata = c.compress(data) + assert len(cdata) < len(data) + assert data == c.decompress(cdata) + assert data == Compressor(**params).decompress(cdata) # autodetect + + def test_autodetect_invalid(): with pytest.raises(ValueError): Compressor(**params).decompress(b'\xff\xfftotalcrap') @@ -68,13 +82,20 @@ def test_zlib_compat(): def test_compressor(): - for params in [ + params_list = [ dict(name='null', buffer=buffer), dict(name='lz4', buffer=buffer), dict(name='zlib', level=0, buffer=buffer), dict(name='zlib', level=6, buffer=buffer), dict(name='zlib', level=9, buffer=buffer), - ]: + ] + if lzma: + params_list += [ + dict(name='lzma', level=0, buffer=buffer), + dict(name='lzma', level=6, buffer=buffer), + dict(name='lzma', level=9, buffer=buffer), + ] + for params in params_list: c = Compressor(**params) assert data == c.decompress(c.compress(data)) From 9f1d92c993a5c2219f8b9e6b9b2b8474cb0b630b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 3 Aug 2015 23:48:56 +0200 Subject: [PATCH 028/142] implement --umask M affects local and remote umask, secure by default M == 077 --- borg/archiver.py | 6 ++++-- borg/helpers.py | 10 ++++++++++ borg/remote.py | 8 +++++--- borg/testsuite/archiver.py | 7 +++++++ 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 84e568e73..b8faa62f3 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -21,7 +21,7 @@ from .helpers import Error, location_validator, format_time, format_file_size, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ - is_cachedir, bigint_to_int, ChunkerParams + is_cachedir, bigint_to_int, ChunkerParams, set_umask from .remote import RepositoryServer, RemoteRepository @@ -220,7 +220,6 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") # be restrictive when restoring files, restore permissions later if sys.getfilesystemencoding() == 'ascii': print('Warning: File system encoding is "ascii", extracting non-ascii filenames will not be supported.') - os.umask(0o077) repository = self.open_repository(args.archive) manifest, key = Manifest.load(repository) archive = Archive(repository, key, manifest, args.archive.archive, @@ -511,6 +510,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") default=False, help='verbose output') common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false') + common_parser.add_argument('--umask', dest='umask', type=lambda s: int(s, 8), default=0o077, metavar='M', + help='set umask to M (local and remote, default: 0o077)') # We can't use argparse for "serve" since we don't want it to show up in "Available commands" if args: @@ -821,6 +822,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") args = parser.parse_args(args or ['-h']) self.verbose = args.verbose + set_umask(args.umask) update_excludes(args) return args.func(args) diff --git a/borg/helpers.py b/borg/helpers.py index d20532723..7043822b7 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -605,3 +605,13 @@ def int_to_bigint(value): if value.bit_length() > 63: return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True) return value + + +def set_umask(umask): + return os.umask(umask) + + +def get_umask(): + umask = set_umask(0) + set_umask(umask) + return umask diff --git a/borg/remote.py b/borg/remote.py index afec54710..aede16d9e 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -10,7 +10,7 @@ import traceback from . import __version__ -from .helpers import Error, IntegrityError +from .helpers import Error, IntegrityError, get_umask from .repository import Repository BUFSIZE = 10 * 1024 * 1024 @@ -124,8 +124,10 @@ class RemoteRepository: self.responses = {} self.unpacker = msgpack.Unpacker(use_list=False) self.p = None + # use local umask also for the remote process + umask = ['--umask', '%03o' % get_umask()] if location.host == '__testsuite__': - args = [sys.executable, '-m', 'borg.archiver', 'serve'] + self.extra_test_args + args = [sys.executable, '-m', 'borg.archiver', 'serve'] + umask + self.extra_test_args else: args = ['ssh'] if location.port: @@ -134,7 +136,7 @@ class RemoteRepository: args.append('%s@%s' % (location.user, location.host)) else: args.append('%s' % location.host) - args += ['borg', 'serve'] + args += ['borg', 'serve'] + umask self.p = Popen(args, bufsize=0, stdin=PIPE, stdout=PIPE) self.stdin_fd = self.p.stdin.fileno() self.stdout_fd = self.p.stdout.fileno() diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 35f8171d9..b466d6ad6 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -425,6 +425,13 @@ class ArchiverTestCase(ArchiverTestCaseBase): # Restore permissions so shutil.rmtree is able to delete it os.system('chmod -R u+w ' + self.repository_path) + def test_umask(self): + self.create_regular_file('file1', size=1024 * 80) + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + mode = os.stat(self.repository_path).st_mode + self.assertEqual(stat.S_IMODE(mode), 0o700) + def test_cmdline_compatibility(self): self.create_regular_file('file1', size=1024 * 80) self.cmd('init', self.repository_location) From 71646249cb8167efb502f6bc0f3a1565eea2c598 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 4 Aug 2015 09:53:26 +0200 Subject: [PATCH 029/142] implement --remote-path to allow non-default-path borg locations --- borg/archiver.py | 3 +++ borg/remote.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index b8faa62f3..818955e0b 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -512,6 +512,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false') common_parser.add_argument('--umask', dest='umask', type=lambda s: int(s, 8), default=0o077, metavar='M', help='set umask to M (local and remote, default: 0o077)') + common_parser.add_argument('--remote-path', dest='remote_path', default='borg', metavar='PATH', + help='set remote path to executable (default: "borg")') # We can't use argparse for "serve" since we don't want it to show up in "Available commands" if args: @@ -823,6 +825,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") args = parser.parse_args(args or ['-h']) self.verbose = args.verbose set_umask(args.umask) + RemoteRepository.remote_path = args.remote_path update_excludes(args) return args.func(args) diff --git a/borg/remote.py b/borg/remote.py index aede16d9e..fad036ccb 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -108,9 +108,9 @@ class RepositoryServer: class RemoteRepository: extra_test_args = [] + remote_path = None class RPCError(Exception): - def __init__(self, name): self.name = name @@ -136,7 +136,7 @@ class RemoteRepository: args.append('%s@%s' % (location.user, location.host)) else: args.append('%s' % location.host) - args += ['borg', 'serve'] + umask + args += [self.remote_path, 'serve'] + umask self.p = Popen(args, bufsize=0, stdin=PIPE, stdout=PIPE) self.stdin_fd = self.p.stdin.fileno() self.stdout_fd = self.p.stdout.fileno() From 175a6d7b0418f35527e588b9d4bbb5a4ca5013db Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 4 Aug 2015 12:31:06 +0200 Subject: [PATCH 030/142] simplify umask code in a similar way as the remote_path code was implemented: just patch the RemoteRepository class object --- borg/archiver.py | 5 +++-- borg/helpers.py | 10 ---------- borg/remote.py | 5 +++-- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 818955e0b..8230677da 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -21,7 +21,7 @@ from .helpers import Error, location_validator, format_time, format_file_size, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ - is_cachedir, bigint_to_int, ChunkerParams, set_umask + is_cachedir, bigint_to_int, ChunkerParams from .remote import RepositoryServer, RemoteRepository @@ -824,8 +824,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") args = parser.parse_args(args or ['-h']) self.verbose = args.verbose - set_umask(args.umask) + os.umask(args.umask) RemoteRepository.remote_path = args.remote_path + RemoteRepository.umask = args.umask update_excludes(args) return args.func(args) diff --git a/borg/helpers.py b/borg/helpers.py index 7043822b7..d20532723 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -605,13 +605,3 @@ def int_to_bigint(value): if value.bit_length() > 63: return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True) return value - - -def set_umask(umask): - return os.umask(umask) - - -def get_umask(): - umask = set_umask(0) - set_umask(umask) - return umask diff --git a/borg/remote.py b/borg/remote.py index fad036ccb..1d7ae84e2 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -10,7 +10,7 @@ import traceback from . import __version__ -from .helpers import Error, IntegrityError, get_umask +from .helpers import Error, IntegrityError from .repository import Repository BUFSIZE = 10 * 1024 * 1024 @@ -109,6 +109,7 @@ class RepositoryServer: class RemoteRepository: extra_test_args = [] remote_path = None + umask = None class RPCError(Exception): def __init__(self, name): @@ -125,7 +126,7 @@ class RemoteRepository: self.unpacker = msgpack.Unpacker(use_list=False) self.p = None # use local umask also for the remote process - umask = ['--umask', '%03o' % get_umask()] + umask = ['--umask', '%03o' % self.umask] if location.host == '__testsuite__': args = [sys.executable, '-m', 'borg.archiver', 'serve'] + umask + self.extra_test_args else: From 8e717c55e689031796da42eb9193bceff580e8ab Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 4 Aug 2015 12:49:13 +0200 Subject: [PATCH 031/142] updated CHANGES --- CHANGES | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGES b/CHANGES index aa7af3247..d7080b15d 100644 --- a/CHANGES +++ b/CHANGES @@ -20,9 +20,14 @@ New features: deprecate --encryption passphrase, fixes #85 - improve at-end error logging, always log exceptions and set exit_code=1 - LoggedIO: better error checks / exceptions / exception handling +- implement --remote-path to allow non-default-path borg locations, #125 +- implement --umask M and use 077 as default umask, #117 Bug fixes: +- fix segfault that happened for unreadable files (chunker: n needs to be a + signed size_t), #116 +- repo delete: add destroy to allowed rpc methods, fixes issue #114 - more compatible repository locking code (based on mkdir), maybe fixes #92 (attic #317, attic #201). - better Exception msg if no Borg is installed on the remote repo server, #56 @@ -34,6 +39,7 @@ Bug fixes: Other changes: - improved docs: + - added docs/misc directory for misc. writeups that won't be included "as is" into the html docs. - document environment variables and return codes (attic #324, attic #52) @@ -44,6 +50,11 @@ Other changes: - add FAQ entries about redundancy / integrity - clarify that borg extract uses the cwd as extraction target - update internals doc about chunker params, memory usage and compression + - add some words about resource usage + - document how to backup raw disk + - add note about how to run borg from virtual env + - add solutions for (ll)fuse installation problems + - tested and updated cygwin docs - use borg-tmp as prefix for temporary files / directories - short prune options without "keep-" are deprecated, do not suggest them @@ -51,6 +62,7 @@ Other changes: - remove usage of unittest.mock, always use mock from pypi - use entrypoints instead of scripts, for better use of the wheel format and modern installs +- add requirements.d/development.txt and modify tox.ini I forgot to list some stuff already implemented in 0.23.0, here they are: From 45e3c3d04e880606902c7c53950b3192c43bef18 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 4 Aug 2015 13:22:04 +0200 Subject: [PATCH 032/142] add some compatibility notes about the umask --- CHANGES | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index d7080b15d..f19b7f8aa 100644 --- a/CHANGES +++ b/CHANGES @@ -5,6 +5,17 @@ Borg Changelog Version 0.24.0 -------------- +Incompatible changes (compared to 0.23): + +- borg now always issues --umask NNN option when invoking another borg via ssh + on the repository server. By that, it's making sure it uses the same umask + for remote repos as for local ones. Because of this, you must upgrade both + server and client(s) to 0.24. +- the default umask is 077 now (if you do not specify via --umask) which might + be a different one as you used previously. The default umask avoids that + you accidentially give access permissions for group and/or others to files + created by borg (e.g. the repository). + New features: - borg create --chunker-params ... to configure the chunker, fixes #16 @@ -21,7 +32,7 @@ New features: - improve at-end error logging, always log exceptions and set exit_code=1 - LoggedIO: better error checks / exceptions / exception handling - implement --remote-path to allow non-default-path borg locations, #125 -- implement --umask M and use 077 as default umask, #117 +- implement --umask M and use 077 as default umask for better security, #117 Bug fixes: From 5b441f78014b45dac6d49f09048475fbe7cdc3af Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 4 Aug 2015 13:30:35 +0200 Subject: [PATCH 033/142] some small Cython code improvements, thanks to Stefan Behnel --- borg/hashindex.pyx | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index d5d4b6f45..c44fe3947 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -24,15 +24,18 @@ cdef extern from "_hashindex.c": int _le32toh(int v) -_NoDefault = object() +cdef _NoDefault = object() +cimport cython + +@cython.internal cdef class IndexBase: cdef HashIndex *index key_size = 32 def __cinit__(self, capacity=0, path=None): if path: - self.index = hashindex_read(os.fsencode(path)) + self.index = hashindex_read(os.fsencode(path)) if not self.index: raise Exception('hashindex_read failed') else: @@ -49,7 +52,7 @@ cdef class IndexBase: return cls(path=path) def write(self, path): - if not hashindex_write(self.index, os.fsencode(path)): + if not hashindex_write(self.index, os.fsencode(path)): raise Exception('hashindex_write failed') def clear(self): From d65ca51d54d9eaac9fa2b35a9dcda5ebb1556b8c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 6 Aug 2015 12:59:51 +0200 Subject: [PATCH 034/142] deduplicate and refactor the docs README.rst (shown on github and also at the start of the html docs) shall be like an elevator speech - convince readers in a very short time. this is most important, everything else can come after we got the reader's interest. include README into docs to avoid duplication. also include CHANGES into docs. add developer docs, move examples from tox.ini there add separate support docs remove glossary, most of what was there can be understood by an admin from context move attic and compatibility note to the end --- CHANGES => CHANGES.rst | 0 README.rst | 128 +++++++++++++++++++++++++---------------- docs/changes.rst | 4 ++ docs/development.rst | 67 +++++++++++++++++++++ docs/foreword.rst | 65 --------------------- docs/index.rst | 75 ++---------------------- docs/intro.rst | 7 +++ docs/support.rst | 34 +++++++++++ tox.ini | 11 ---- 9 files changed, 195 insertions(+), 196 deletions(-) rename CHANGES => CHANGES.rst (100%) create mode 100644 docs/changes.rst create mode 100644 docs/development.rst delete mode 100644 docs/foreword.rst create mode 100644 docs/intro.rst create mode 100644 docs/support.rst diff --git a/CHANGES b/CHANGES.rst similarity index 100% rename from CHANGES rename to CHANGES.rst diff --git a/README.rst b/README.rst index c9cf90706..1e1920410 100644 --- a/README.rst +++ b/README.rst @@ -1,52 +1,31 @@ -|build| - What is Borg? ------------- -Borg is a deduplicating backup program. The main goal of Borg is to provide -an efficient and secure way to backup data. The data deduplication -technique used makes Borg suitable for daily backups since only changes -are stored. +Borg is a deduplicating backup program. +Optionally, it also supports compression and authenticated encryption. -Borg is a fork of `Attic `_ and maintained by "`The Borg Collective `_". - -BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC. -EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY WHEN MAJOR RELEASE NUMBER -CHANGES (like when going from 0.x.y to 1.0.0). Please read CHANGES document. - -NOT RELEASED DEVELOPMENT VERSIONS HAVE UNKNOWN COMPATIBILITY PROPERTIES. - -THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. - -Read `issue #1 `_ on the issue tracker, goals are being defined there. - -Please also see the `LICENSE `_ for more informations. - -Easy to use -~~~~~~~~~~~ -Initialize backup repository and create a backup archive:: - - $ borg init /mnt/backup - $ borg create -v /mnt/backup::documents ~/Documents - -For a graphical frontend refer to our complementary project `BorgWeb `_. +The main goal of Borg is to provide an efficient and secure way to backup data. +The data deduplication technique used makes Borg suitable for daily backups +since only changes are stored. +The authenticated encryption technique makes it suitable for backups to not +fully trusted targets. Main features ~~~~~~~~~~~~~ Space efficient storage - Variable block size deduplication is used to reduce the number of bytes + Variable block size deduplication is used to reduce the number of bytes stored by detecting redundant data. Each file is split into a number of variable length chunks and only chunks that have never been seen before are - compressed and added to the repository. + added to the repository. The content-defined chunking based deduplication is applied to remove - duplicate chunks within: + duplicate chunks within: * the current backup data set (even inside single files / streams) * current and previous backups of same machine * all the chunks in the same repository, even if coming from other machines This advanced deduplication method does NOT depend on: - + * file/directory names staying the same (so you can move your stuff around without killing the deduplication, even between machines sharing a repo) * complete files or time stamps staying the same (if a big file changes a @@ -59,37 +38,84 @@ Optional data encryption All data can be protected using 256-bit AES encryption and data integrity and authenticity is verified using HMAC-SHA256. +Optional compression + All data can be compressed (by zlib, level 0-9). + Off-site backups Borg can store data on any remote host accessible over SSH. This is - most efficient if Borg is also installed on the remote host. + most efficient if Borg is also installed on the remote host. If you can't + install Borg there, you can also use some network filesystem (sshfs, nfs, + ...), but it will be less efficient. Backups mountable as filesystems Backup archives are mountable as userspace filesystems for easy backup verification and restores. -What do I need? ---------------- -Borg requires Python 3.2 or above to work. -Borg also requires a sufficiently recent OpenSSL (>= 1.0.0). -In order to mount archives as filesystems, llfuse is required. +Platforms Borg works on + * Linux + * FreeBSD + * Mac OS X + * Cygwin (unsupported) -How do I install it? --------------------- -:: - $ pip3 install borgbackup +Easy to use +~~~~~~~~~~~ +Initialize a new backup repository and create a backup archive:: -Where are the docs? -------------------- -Go to https://borgbackup.github.io/ for a prebuilt version of the documentation. -You can also build it yourself from the docs folder. + $ borg init /mnt/backup + $ borg create /mnt/backup::Monday ~/Documents -Where are the tests? --------------------- -The tests are in the borg/testsuite package. To run the test suite use the -following command:: +Now doing another backup, just to show off the great deduplication:: + + $ borg create --stats /mnt/backup::Tuesday ~/Documents + + Archive name: Tuesday + Archive fingerprint: 387a5e3f9b0e792e91ce87134b0f4bfe17677d9248cb5337f3fbf3a8e157942a + Start time: Tue Mar 25 12:00:10 2014 + End time: Tue Mar 25 12:00:10 2014 + Duration: 0.08 seconds + Number of files: 358 + Original size Compressed size Deduplicated size + This archive: 57.16 MB 46.78 MB 151.67 kB + All archives: 114.02 MB 93.46 MB 44.81 MB + +For a graphical frontend refer to our complementary project +`BorgWeb `_. + + +How to proceed from here +------------------------ +Everything about requirements, installation, getting a quick start, usage +reference, FAQ, support info, internals and developer infos is in our +documentation: + +See `our online documentation `_ +or alternatively read it in raw text form in the `docs/*.rst` files. + + +Notes +----- + +Build status: +|build| + +Borg is a fork of `Attic `_ and maintained by +"`The Borg Collective `_". + +BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC. +EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY WHEN MAJOR RELEASE NUMBER +CHANGES (like when going from 0.x.y to 1.0.0). Please read CHANGES document. + +NOT RELEASED DEVELOPMENT VERSIONS HAVE UNKNOWN COMPATIBILITY PROPERTIES. + +THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. + +Read `issue #1 `_ on the issue +tracker, goals are being defined there. + +For more information, please also see the +`LICENSE `_. - $ fakeroot -u tox # you need to have tox and pytest installed .. |build| image:: https://travis-ci.org/borgbackup/borg.svg :alt: Build Status diff --git a/docs/changes.rst b/docs/changes.rst new file mode 100644 index 000000000..5e859ecc3 --- /dev/null +++ b/docs/changes.rst @@ -0,0 +1,4 @@ +.. include:: global.rst.inc +.. _changelog: + +.. include:: ../CHANGES.rst diff --git a/docs/development.rst b/docs/development.rst new file mode 100644 index 000000000..6c06eeb9e --- /dev/null +++ b/docs/development.rst @@ -0,0 +1,67 @@ +.. include:: global.rst.inc +.. _development: + +Development +=========== + +This chapter will get you started with |project_name|' development. + +|project_name| is written in Python (with a little bit of Cython and C for +the performance critical parts). + + +Building a development environment +---------------------------------- + +First, just install borg into a virtual env as described before. + +To install some additional packages needed for running the tests, activate your +virtual env and run:: + + pip install -r requirements.d/development.txt + + +Running the tests +----------------- + +The tests are in the borg/testsuite package. + +To run them, you need to have fakeroot, tox and pytest installed. + +To run the test suite use the following command:: + + fakeroot -u tox # run all tests + +Some more advanced examples:: + + # verify a changed tox.ini (run this after any change to tox.ini): + fakeroot -u tox --recreate + + fakeroot -u tox -e py32 # run all tests, but only on python 3.2 + + fakeroot -u tox borg.testsuite.locking # only run 1 test module + + fakeroot -u tox borg.testsuite.locking -- -k '"not Timer"' # exclude some tests + + fakeroot -u tox borg.testsuite -- -v # verbose py.test + +Important notes: + +- Without fakeroot -u some tests will fail. +- When using -- to give options to py.test, you MUST also give borg.testsuite[.module]. + +Building the docs with Sphinx +----------------------------- + +The documentation (in reStructuredText format, .rst) is in docs/. + +To build the html version of it, you need to have sphinx installed:: + + pip3 install sphinx + +Now run:: + + cd docs/ + make html + +Then point a web browser at docs/_build/html/index.html. diff --git a/docs/foreword.rst b/docs/foreword.rst deleted file mode 100644 index c3f70c42e..000000000 --- a/docs/foreword.rst +++ /dev/null @@ -1,65 +0,0 @@ -.. include:: global.rst.inc -.. _foreword: - -Foreword -======== - -|project_name| is a secure backup program for Linux, FreeBSD and Mac OS X. -|project_name| is designed for efficient data storage where only new or -modified data is stored. - -Features --------- - -Space efficient storage - Variable block size `deduplication`_ is used to reduce the number of bytes - stored by detecting redundant data. Each file is split into a number of - variable length chunks and only chunks that have never been seen before - are added to the repository (and optionally compressed). - -Optional data encryption - All data can be protected using 256-bit AES_ encryption and data integrity - and authenticity is verified using `HMAC-SHA256`_. - -Off-site backups - |project_name| can store data on any remote host accessible over SSH as - long as |project_name| is installed. If you don't have |project_name| - installed there, you can use some network filesytem (sshfs, nfs, ...) - to mount a filesystem located on your remote host and use it like it was - local (but that will be slower). - -Backups mountable as filesystems - Backup archives are :ref:`mountable ` as - `userspace filesystems`_ for easy backup verification and restores. - - -Glossary --------- - -.. _deduplication_def: - -Deduplication - Deduplication is a technique for improving storage utilization by - eliminating redundant data. - -.. _archive_def: - -Archive - An archive is a collection of files along with metadata that include file - permissions, directory structure and various file attributes. - Since each archive in a repository must have a unique name a good naming - convention is ``hostname-YYYY-MM-DD``. - -.. _repository_def: - -Repository - A repository is a filesystem directory storing data from zero or more - archives. The data in a repository is both deduplicated and - optionally encrypted making it both efficient and safe. Repositories are - created using :ref:`borg_init` and the contents can be listed using - :ref:`borg_list`. - -Key file - When a repository is initialized a key file containing a password - protected encryption key is created. It is vital to keep this file safe - since the repository data is totally inaccessible without it. diff --git a/docs/index.rst b/docs/index.rst index 8ca4fe092..a871ef353 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,81 +1,18 @@ .. include:: global.rst.inc -Welcome to Borg -================ -|project_name| is a deduplicating backup program. -Optionally, it also supports compression and authenticated encryption. -The main goal of |project_name| is to provide an efficient and secure way -to backup data. The data deduplication technique used makes |project_name| -suitable for daily backups since only the changes are stored. The authenticated -encryption makes it suitable for backups to not fully trusted targets. - -|project_name| is written in Python (with a little bit of Cython and C for -the performance critical parts). - - -Easy to use ------------ -Initialize a new backup :ref:`repository ` and create your -first backup :ref:`archive ` in two lines:: - - $ borg init /mnt/backup - $ borg create /mnt/backup::Monday ~/Documents - $ borg create --stats /mnt/backup::Tuesday ~/Documents - Archive name: Tuesday - Archive fingerprint: 387a5e3f9b0e792e91ce87134b0f4bfe17677d9248cb5337f3fbf3a8e157942a - Start time: Tue Mar 25 12:00:10 2014 - End time: Tue Mar 25 12:00:10 2014 - Duration: 0.08 seconds - Number of files: 358 - Original size Compressed size Deduplicated size - This archive: 57.16 MB 46.78 MB 151.67 kB - All archives: 114.02 MB 93.46 MB 44.81 MB - -See the :ref:`quickstart` chapter for a more detailed example. - -Easy installation ------------------ -You can use pip to install |project_name| quickly and easily:: - - $ pip3 install borgbackup - -Need more help with installing? See :ref:`installation`. - -User's Guide -============ +Borg Documentation +================== .. toctree:: :maxdepth: 2 - foreword + intro installation quickstart usage faq + support + changes internals - -Getting help -============ - -If you've found a bug or have a concrete feature request, please create a new -ticket on the project's `issue tracker`_ (after checking whether someone else -already has reported the same thing). - -For more general questions or discussions, IRC or mailing list are preferred. - -IRC ---- -Join us on channel #borgbackup on chat.freenode.net. As usual on IRC, just -ask or tell directly and then patiently wait for replies. Stay connected. - -Mailing list ------------- - -There is a mailing list for Borg on librelist_ that you can use for feature -requests and general discussions about Borg. A mailing list archive is -available `here `_. - -To subscribe to the list, send an email to borgbackup@librelist.com and reply -to the confirmation mail. Likewise, to unsubscribe, send an email to -borgbackup-unsubscribe@librelist.com and reply to the confirmation mail. + development diff --git a/docs/intro.rst b/docs/intro.rst new file mode 100644 index 000000000..7e7759c7d --- /dev/null +++ b/docs/intro.rst @@ -0,0 +1,7 @@ +.. include:: global.rst.inc +.. _foreword: + +Introduction +============ + +.. include:: ../README.rst diff --git a/docs/support.rst b/docs/support.rst new file mode 100644 index 000000000..5e953f202 --- /dev/null +++ b/docs/support.rst @@ -0,0 +1,34 @@ +.. include:: global.rst.inc +.. _support: + +Support +======= + +Issue Tracker +------------- + +If you've found a bug or have a concrete feature request, please create a new +ticket on the project's `issue tracker`_ (after checking whether someone else +already has reported the same thing). + +For more general questions or discussions, IRC or mailing list are preferred. + +IRC +--- +Join us on channel #borgbackup on chat.freenode.net. + +As usual on IRC, just ask or tell directly and then patiently wait for replies. +Stay connected. + +Mailing list +------------ + +There is a mailing list for Borg on librelist_ that you can use for feature +requests and general discussions about Borg. A mailing list archive is +available `here `_. + +To subscribe to the list, send an email to borgbackup@librelist.com and reply +to the confirmation mail. + +To unsubscribe, send an email to borgbackup-unsubscribe@librelist.com and reply +to the confirmation mail. diff --git a/tox.ini b/tox.ini index fdf91a2db..c1a9e019f 100644 --- a/tox.ini +++ b/tox.ini @@ -1,16 +1,5 @@ # tox configuration - if you change anything here, run this to verify: # fakeroot -u tox --recreate -# -# Invokation examples: -# fakeroot -u tox # run all tests -# fakeroot -u tox -e py32 # run all tests, but only on python 3.2 -# fakeroot -u tox borg.testsuite.locking # only run 1 test module -# fakeroot -u tox borg.testsuite.locking -- -k '"not Timer"' # exclude some tests -# fakeroot -u tox borg.testsuite -- -v # verbose py.test -# -# Important notes: -# Without fakeroot -u some tests will fail. -# When using -- to give options to py.test, you MUST also give borg.testsuite[.module]. [tox] envlist = py32, py33, py34 From e3baeefa1b8ae9ce864c8a9dbc720e64134405ef Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 6 Aug 2015 13:10:56 +0200 Subject: [PATCH 035/142] docs: reorganize sidebar, prev/next at top --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 27eba5b76..027fd0d4d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -134,7 +134,7 @@ html_static_path = [] # Custom sidebar templates, maps document names to template names. html_sidebars = { 'index': ['sidebarlogo.html', 'sidebarusefullinks.html', 'searchbox.html'], - '**': ['sidebarlogo.html', 'localtoc.html', 'relations.html', 'sidebarusefullinks.html', 'searchbox.html'] + '**': ['sidebarlogo.html', 'relations.html', 'searchbox.html', 'localtoc.html', 'sidebarusefullinks.html'] } # Additional templates that should be rendered to pages, maps page names to # template names. From dcdcbda87d20822359c0f0f3b80596a0533beaad Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 6 Aug 2015 15:17:07 +0200 Subject: [PATCH 036/142] try if readthedocs finds the borg package this way --- docs/conf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 027fd0d4d..9c0e84cb8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,13 +11,13 @@ # All configuration values have a default; values that are commented out # serve to show the default. -from borg import __version__ as sw_version - # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#import sys, os -#sys.path.insert(0, os.path.abspath('.')) +import sys, os +sys.path.insert(0, os.path.abspath('..')) + +from borg import __version__ as sw_version # -- General configuration ----------------------------------------------------- From 58d57df46d53eb90000fa70555856a5f508e4ea5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 6 Aug 2015 16:39:50 +0200 Subject: [PATCH 037/142] improve README.rst --- README.rst | 115 +++++++++++++++++++++++++++-------------------------- 1 file changed, 59 insertions(+), 56 deletions(-) diff --git a/README.rst b/README.rst index 1e1920410..0d4ee1dac 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ -What is Borg? -------------- -Borg is a deduplicating backup program. -Optionally, it also supports compression and authenticated encryption. +What is BorgBackup? +------------------- +BorgBackup (short: Borg) is a deduplicating backup program. +Optionally, it supports compression and authenticated encryption. The main goal of Borg is to provide an efficient and secure way to backup data. The data deduplication technique used makes Borg suitable for daily backups @@ -9,54 +9,69 @@ since only changes are stored. The authenticated encryption technique makes it suitable for backups to not fully trusted targets. +`Borg Installation docs `_ + + Main features ~~~~~~~~~~~~~ -Space efficient storage - Variable block size deduplication is used to reduce the number of bytes - stored by detecting redundant data. Each file is split into a number of - variable length chunks and only chunks that have never been seen before are - added to the repository. +**Space efficient storage** + Deduplication based on content-defined chunking is used to reduce the number + of bytes stored: each file is split into a number of variable length chunks + and only chunks that have never been seen before are added to the repository. - The content-defined chunking based deduplication is applied to remove - duplicate chunks within: + To deduplicate, all the chunks in the same repository are considered, no + matter whether they come from different machines, from previous backups, + from the same backup or even from the same single file. - * the current backup data set (even inside single files / streams) - * current and previous backups of same machine - * all the chunks in the same repository, even if coming from other machines + Compared to other deduplication approaches, this method does NOT depend on: - This advanced deduplication method does NOT depend on: + * file/directory names staying the same - * file/directory names staying the same (so you can move your stuff around - without killing the deduplication, even between machines sharing a repo) - * complete files or time stamps staying the same (if a big file changes a - little, only a few new chunks will be stored - this is great for VMs or - raw disks) - * the absolute position of a data chunk inside a file (stuff may get shifted - and will still be found by the deduplication algorithm) + So you can move your stuff around without killing the deduplication, + even between machines sharing a repo. -Optional data encryption - All data can be protected using 256-bit AES encryption and data integrity - and authenticity is verified using HMAC-SHA256. + * complete files or time stamps staying the same -Optional compression - All data can be compressed (by zlib, level 0-9). + If a big file changes a little, only a few new chunks will be stored - + this is great for VMs or raw disks. -Off-site backups - Borg can store data on any remote host accessible over SSH. This is - most efficient if Borg is also installed on the remote host. If you can't - install Borg there, you can also use some network filesystem (sshfs, nfs, - ...), but it will be less efficient. + * the absolute position of a data chunk inside a file -Backups mountable as filesystems - Backup archives are mountable as userspace filesystems for easy backup - verification and restores. + Stuff may get shifted and will still be found by the deduplication + algorithm. -Platforms Borg works on +**Speed** + * performance critical code (chunking, compression, encryption) is + implemented in C/Cython + * local caching of files/chunks index data + * quick detection of unmodified files + +**Data encryption** + All data can be protected using 256-bit AES encryption, data integrity and + authenticity is verified using HMAC-SHA256. + +**Compression** + All data can be compressed by zlib, level 0-9. + +**Off-site backups** + Borg can store data on any remote host accessible over SSH. If Borg is + installed on the remote host, big performance gains can be achieved + compared to using a network filesystem (sshfs, nfs, ...). + +**Backups mountable as filesystems** + Backup archives are mountable as userspace filesystems for easy interactive + backup examination and restores (e.g. by using a regular file manager). + +**Platforms Borg works on** * Linux * FreeBSD * Mac OS X * Cygwin (unsupported) +**Free and Open Source Software** + * security and functionality can be audited independently + * licensed under the BSD (3-clause) license + Easy to use ~~~~~~~~~~~ @@ -70,38 +85,28 @@ Now doing another backup, just to show off the great deduplication:: $ borg create --stats /mnt/backup::Tuesday ~/Documents Archive name: Tuesday - Archive fingerprint: 387a5e3f9b0e792e91ce87134b0f4bfe17677d9248cb5337f3fbf3a8e157942a + Archive fingerprint: 387a5e3f9b0e792e91c... Start time: Tue Mar 25 12:00:10 2014 End time: Tue Mar 25 12:00:10 2014 Duration: 0.08 seconds Number of files: 358 - Original size Compressed size Deduplicated size - This archive: 57.16 MB 46.78 MB 151.67 kB - All archives: 114.02 MB 93.46 MB 44.81 MB + Original size Compressed size Deduplicated size + This archive: 57.16 MB 46.78 MB 151.67 kB <--- ! + All archives: 114.02 MB 93.46 MB 44.81 MB For a graphical frontend refer to our complementary project `BorgWeb `_. -How to proceed from here ------------------------- -Everything about requirements, installation, getting a quick start, usage -reference, FAQ, support info, internals and developer infos is in our -documentation: - -See `our online documentation `_ -or alternatively read it in raw text form in the `docs/*.rst` files. - - Notes ----- -Build status: -|build| - Borg is a fork of `Attic `_ and maintained by "`The Borg Collective `_". +Read `issue #1 `_ about the initial +considerations regarding project goals and policy of the Borg project. + BORG IS NOT COMPATIBLE WITH ORIGINAL ATTIC. EXPECT THAT WE WILL BREAK COMPATIBILITY REPEATEDLY WHEN MAJOR RELEASE NUMBER CHANGES (like when going from 0.x.y to 1.0.0). Please read CHANGES document. @@ -110,12 +115,10 @@ NOT RELEASED DEVELOPMENT VERSIONS HAVE UNKNOWN COMPATIBILITY PROPERTIES. THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. -Read `issue #1 `_ on the issue -tracker, goals are being defined there. - For more information, please also see the `LICENSE `_. +|build| .. |build| image:: https://travis-ci.org/borgbackup/borg.svg :alt: Build Status From 7e21d95deddc2030d0f00063013121dc7a4568a8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 6 Aug 2015 16:40:38 +0200 Subject: [PATCH 038/142] fix CHANGES.rst filename in MANIFEST.in --- MANIFEST.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 480b1088a..d74d9e2c4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ -include README.rst AUTHORS LICENSE CHANGES MANIFEST.in versioneer.py +include README.rst AUTHORS LICENSE CHANGES.rst MANIFEST.in versioneer.py recursive-include borg *.pyx recursive-include docs * recursive-exclude docs *.pyc From a1e039ba215b09f68b75d4727a67a144382ccbe3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 6 Aug 2015 23:32:53 +0200 Subject: [PATCH 039/142] reimplement the chunk index merging in C the python code could take a rather long time and likely most of it was converting stuff from python to C and back. --- borg/_hashindex.c | 19 +++++++++++++++++++ borg/cache.py | 3 +-- borg/hashindex.pyx | 4 ++++ borg/testsuite/hashindex.py | 22 ++++++++++++++++++++++ 4 files changed, 46 insertions(+), 2 deletions(-) diff --git a/borg/_hashindex.c b/borg/_hashindex.c index 2eebd09d9..128ab5b2f 100644 --- a/borg/_hashindex.c +++ b/borg/_hashindex.c @@ -385,3 +385,22 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs *total_unique_chunks = unique_chunks; *total_chunks = chunks; } + +static void +hashindex_merge(HashIndex *index, HashIndex *other) +{ + int32_t key_size = index->key_size; + const int32_t *other_values; + int32_t *my_values; + void *key = NULL; + + while((key = hashindex_next_key(other, key))) { + other_values = key + key_size; + my_values = hashindex_get(index, key); + if(my_values == NULL) { + hashindex_set(index, key, other_values); + } else { + *my_values += *other_values; + } + } +} diff --git a/borg/cache.py b/borg/cache.py index d64cdfb14..f50b456eb 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -309,8 +309,7 @@ class Cache: tf_in.extract(archive_id_hex, tmp_dir) chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8') archive_chunk_idx = ChunkIndex.read(chunk_idx_path) - for chunk_id, (count, size, csize) in archive_chunk_idx.iteritems(): - add(chunk_idx, chunk_id, size, csize, incr=count) + chunk_idx.merge(archive_chunk_idx) os.unlink(chunk_idx_path) self.begin_txn() diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index c44fe3947..83416bcdf 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -14,6 +14,7 @@ cdef extern from "_hashindex.c": void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize, long long *total_unique_chunks, long long *total_chunks) + void hashindex_merge(HashIndex *index, HashIndex *other) int hashindex_get_size(HashIndex *index) int hashindex_write(HashIndex *index, char *path) void *hashindex_get(HashIndex *index, void *key) @@ -190,6 +191,9 @@ cdef class ChunkIndex(IndexBase): &total_unique_chunks, &total_chunks) return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks + def merge(self, ChunkIndex other): + hashindex_merge(self.index, other.index) + cdef class ChunkKeyIterator: cdef ChunkIndex idx diff --git a/borg/testsuite/hashindex.py b/borg/testsuite/hashindex.py index 41c019d61..bbefeb05e 100644 --- a/borg/testsuite/hashindex.py +++ b/borg/testsuite/hashindex.py @@ -6,6 +6,11 @@ from ..hashindex import NSIndex, ChunkIndex from . import BaseTestCase +def H(x): + # make some 32byte long thing that depends on x + return bytes('%-0.32d' % x, 'ascii') + + class HashIndexTestCase(BaseTestCase): def _generic_test(self, cls, make_value, sha): @@ -78,3 +83,20 @@ class HashIndexTestCase(BaseTestCase): second_half = list(idx.iteritems(marker=all[49][0])) self.assert_equal(len(second_half), 50) self.assert_equal(second_half, all[50:]) + + def test_chunkindex_merge(self): + idx1 = ChunkIndex() + idx1[H(1)] = 1, 100, 100 + idx1[H(2)] = 2, 200, 200 + idx1[H(3)] = 3, 300, 300 + # no H(4) entry + idx2 = ChunkIndex() + idx2[H(1)] = 4, 100, 100 + idx2[H(2)] = 5, 200, 200 + # no H(3) entry + idx2[H(4)] = 6, 400, 400 + idx1.merge(idx2) + assert idx1[H(1)] == (5, 100, 100) + assert idx1[H(2)] == (7, 200, 200) + assert idx1[H(3)] == (3, 300, 300) + assert idx1[H(4)] == (6, 400, 400) From ba753563141d4cf2ecf0beb141cf37d1538bb0e3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 7 Aug 2015 15:17:07 +0200 Subject: [PATCH 040/142] add OS X to travis --- .travis.yml | 55 +++++++++++++++++++++++++++++++------- .travis/install.sh | 43 +++++++++++++++++++++++++++++ .travis/run.sh | 23 ++++++++++++++++ .travis/upload_coverage.sh | 10 +++++++ 4 files changed, 121 insertions(+), 10 deletions(-) create mode 100755 .travis/install.sh create mode 100755 .travis/run.sh create mode 100755 .travis/upload_coverage.sh diff --git a/.travis.yml b/.travis.yml index 87d3afb02..89fdbbff1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,12 +1,47 @@ +sudo: required + language: python -python: - - "3.2" - - "3.3" - - "3.4" -# command to install dependencies + +cache: + directories: + - $HOME/.cache/pip + +matrix: + include: + - python: 3.2 + os: linux + env: TOXENV=py32 + - python: 3.3 + os: linux + env: TOXENV=py33 + - python: 3.4 + os: linux + env: TOXENV=py34 + - language: generic + os: osx + osx_image: beta-xcode6.3 + env: TOXENV=py32 + - language: generic + os: osx + osx_image: beta-xcode6.3 + env: TOXENV=py33 + - language: generic + os: osx + osx_image: beta-xcode6.3 + env: TOXENV=py34 + install: - - "sudo apt-get install -y libacl1-dev" - - "pip install --use-mirrors Cython" - - "pip install -e ." -# command to run tests -script: fakeroot -u py.test + - ./.travis/install.sh + +script: + - ./.travis/run.sh + +after_success: + - ./.travis/upload_coverage.sh + +notifications: + irc: + channels: + - "irc.freenode.org#borgbackup" + use_notice: true + skip_join: true diff --git a/.travis/install.sh b/.travis/install.sh new file mode 100755 index 000000000..21ff76000 --- /dev/null +++ b/.travis/install.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +set -e +set -x + +if [[ "$(uname -s)" == 'Darwin' ]]; then + brew update || brew update + + if [[ "${OPENSSL}" != "0.9.8" ]]; then + brew outdated openssl || brew upgrade openssl + fi + + if which pyenv > /dev/null; then + eval "$(pyenv init -)" + fi + + brew outdated pyenv || brew upgrade pyenv + + case "${TOXENV}" in + py32) + pyenv install 3.2.6 + pyenv global 3.2.6 + ;; + py33) + pyenv install 3.3.6 + pyenv global 3.3.6 + ;; + py34) + pyenv install 3.4.3 + pyenv global 3.4.3 + ;; + esac + pyenv rehash + python -m pip install --user virtualenv +else + pip install virtualenv + sudo apt-get install -y libacl1-dev +fi + +python -m virtualenv ~/.venv +source ~/.venv/bin/activate +pip install tox pytest codecov Cython +pip install -e . diff --git a/.travis/run.sh b/.travis/run.sh new file mode 100755 index 000000000..cf504ac51 --- /dev/null +++ b/.travis/run.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -e +set -x + +if [[ "$(uname -s)" == "Darwin" ]]; then + eval "$(pyenv init -)" + if [[ "${OPENSSL}" != "0.9.8" ]]; then + # set our flags to use homebrew openssl + export ARCHFLAGS="-arch x86_64" + export LDFLAGS="-L/usr/local/opt/openssl/lib" + export CFLAGS="-I/usr/local/opt/openssl/include" + fi +fi + +source ~/.venv/bin/activate + +if [[ "$(uname -s)" == "Darwin" ]]; then + # no fakeroot on OS X + sudo tox -e $TOXENV +else + fakeroot -u tox +fi diff --git a/.travis/upload_coverage.sh b/.travis/upload_coverage.sh new file mode 100755 index 000000000..73584acfb --- /dev/null +++ b/.travis/upload_coverage.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -e +set -x + +NO_COVERAGE_TOXENVS=(pep8) +if ! [[ "${NO_COVERAGE_TOXENVS[*]}" =~ "${TOXENV}" ]]; then + source ~/.venv/bin/activate + bash <(curl -s https://codecov.io/bash) -e TRAVIS_OS_NAME,TOXENV +fi From 5864bd76ebc2985f82fd49d69aa58895cb0698a8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 01:55:46 +0200 Subject: [PATCH 041/142] fix test coverage / codecov.io, use xcode6.4 --- .coveragerc | 13 +++++++++++++ .gitignore | 1 + .travis.yml | 6 +++--- .travis/install.sh | 2 +- .travis/upload_coverage.sh | 3 ++- requirements.d/development.txt | 1 + tox.ini | 2 +- 7 files changed, 22 insertions(+), 6 deletions(-) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 000000000..9056361b9 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,13 @@ +[run] +branch = True +source = borg + +[report] +exclude_lines = + pragma: no cover + def __repr__ + raise AssertionError + raise NotImplementedError + if 0: + if __name__ == .__main__.: +ignore_errors = True diff --git a/.gitignore b/.gitignore index f3564a429..97df7c610 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,4 @@ docs/usage/*.inc borg.build/ borg.dist/ borg.exe +.coverage diff --git a/.travis.yml b/.travis.yml index 89fdbbff1..497bc7c04 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,15 +19,15 @@ matrix: env: TOXENV=py34 - language: generic os: osx - osx_image: beta-xcode6.3 + osx_image: xcode6.4 env: TOXENV=py32 - language: generic os: osx - osx_image: beta-xcode6.3 + osx_image: xcode6.4 env: TOXENV=py33 - language: generic os: osx - osx_image: beta-xcode6.3 + osx_image: xcode6.4 env: TOXENV=py34 install: diff --git a/.travis/install.sh b/.travis/install.sh index 21ff76000..80b39226f 100755 --- a/.travis/install.sh +++ b/.travis/install.sh @@ -39,5 +39,5 @@ fi python -m virtualenv ~/.venv source ~/.venv/bin/activate -pip install tox pytest codecov Cython +pip install tox pytest pytest-cov codecov Cython pip install -e . diff --git a/.travis/upload_coverage.sh b/.travis/upload_coverage.sh index 73584acfb..c2aa91bd2 100755 --- a/.travis/upload_coverage.sh +++ b/.travis/upload_coverage.sh @@ -6,5 +6,6 @@ set -x NO_COVERAGE_TOXENVS=(pep8) if ! [[ "${NO_COVERAGE_TOXENVS[*]}" =~ "${TOXENV}" ]]; then source ~/.venv/bin/activate - bash <(curl -s https://codecov.io/bash) -e TRAVIS_OS_NAME,TOXENV + ln .tox/.coverage .coverage + codecov -e TRAVIS_OS_NAME,TOXENV fi diff --git a/requirements.d/development.txt b/requirements.d/development.txt index 6d2928a92..37677a00f 100644 --- a/requirements.d/development.txt +++ b/requirements.d/development.txt @@ -1,4 +1,5 @@ tox mock pytest +pytest-cov<2.0.0 Cython diff --git a/tox.ini b/tox.ini index c1a9e019f..a120a237a 100644 --- a/tox.ini +++ b/tox.ini @@ -9,6 +9,6 @@ envlist = py32, py33, py34 # not really matter, should be just different from the toplevel dir. changedir = {toxworkdir} deps = -rrequirements.d/development.txt -commands = py.test --pyargs {posargs:borg.testsuite} +commands = py.test --cov=borg --pyargs {posargs:borg.testsuite} # fakeroot -u needs some env vars: passenv = * From 6164640ecce359d5303d78e7b984619afea53666 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 02:34:42 +0200 Subject: [PATCH 042/142] add codecov.io badge --- README.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 0d4ee1dac..22320d3fe 100644 --- a/README.rst +++ b/README.rst @@ -118,8 +118,12 @@ THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. For more information, please also see the `LICENSE `_. -|build| +|build| |coverage| .. |build| image:: https://travis-ci.org/borgbackup/borg.svg :alt: Build Status :target: https://travis-ci.org/borgbackup/borg + +.. |coverage| image:: http://codecov.io/github/borgbackup/borg/coverage.svg?branch=master + :alt: Test Coverage + :target: http://codecov.io/github/borgbackup/borg?branch=master From 40801d74a6a05f6a47ace486022f0f17f2b0629c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 19:03:37 +0200 Subject: [PATCH 043/142] remove old unittest discover / runner code, we use py.test now --- borg/testsuite/__init__.py | 27 +-------------------------- borg/testsuite/run.py | 11 ----------- 2 files changed, 1 insertion(+), 37 deletions(-) delete mode 100644 borg/testsuite/run.py diff --git a/borg/testsuite/__init__.py b/borg/testsuite/__init__.py index e1eb37eaa..9872edeb6 100644 --- a/borg/testsuite/__init__.py +++ b/borg/testsuite/__init__.py @@ -73,7 +73,7 @@ class BaseTestCase(unittest.TestCase): d1 = [filename] + [getattr(s1, a) for a in attrs] d2 = [filename] + [getattr(s2, a) for a in attrs] if not os.path.islink(path1) or utime_supports_fd: - # Older versions of llfuse does not support ns precision properly + # Older versions of llfuse do not support ns precision properly if fuse and not have_fuse_mtime_ns: d1.append(round(st_mtime_ns(s1), -4)) d2.append(round(st_mtime_ns(s2), -4)) @@ -94,28 +94,3 @@ class BaseTestCase(unittest.TestCase): return time.sleep(.1) raise Exception('wait_for_mount(%s) timeout' % path) - - -def get_tests(suite): - """Generates a sequence of tests from a test suite - """ - for item in suite: - try: - # TODO: This could be "yield from..." with Python 3.3+ - for i in get_tests(item): - yield i - except TypeError: - yield item - - -class TestLoader(unittest.TestLoader): - """A customized test loader that properly detects and filters our test cases - """ - - def loadTestsFromName(self, pattern, module=None): - suite = self.discover('borg.testsuite', '*.py') - tests = unittest.TestSuite() - for test in get_tests(suite): - if pattern.lower() in test.id().lower(): - tests.addTest(test) - return tests diff --git a/borg/testsuite/run.py b/borg/testsuite/run.py deleted file mode 100644 index 19d87699b..000000000 --- a/borg/testsuite/run.py +++ /dev/null @@ -1,11 +0,0 @@ -import unittest - -from . import TestLoader - - -def main(): - unittest.main(testLoader=TestLoader(), defaultTest='') - - -if __name__ == '__main__': - main() From a9027a033d21d69b67553c896e66c12b8ddcb5e7 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 19:12:14 +0200 Subject: [PATCH 044/142] coverage: omit some infrastructure / generated files --- .coveragerc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.coveragerc b/.coveragerc index 9056361b9..620f29fef 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,6 +1,10 @@ [run] branch = True source = borg +omit = + borg/__init__.py + borg/__main__.py + borg/_version.py [report] exclude_lines = From 60e34968b023a3411941fa9f17f3bd2ac332fb92 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 19:22:25 +0200 Subject: [PATCH 045/142] codecov: fixes for env vars and osx --- .travis/upload_coverage.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis/upload_coverage.sh b/.travis/upload_coverage.sh index c2aa91bd2..4cb8273cf 100755 --- a/.travis/upload_coverage.sh +++ b/.travis/upload_coverage.sh @@ -7,5 +7,7 @@ NO_COVERAGE_TOXENVS=(pep8) if ! [[ "${NO_COVERAGE_TOXENVS[*]}" =~ "${TOXENV}" ]]; then source ~/.venv/bin/activate ln .tox/.coverage .coverage - codecov -e TRAVIS_OS_NAME,TOXENV + # on osx, tests run as root, need access to .coverage + sudo chmod 666 .coverage + codecov -e TRAVIS_OS_NAME TOXENV fi From 616d16a9b028bdeff8a9c5f6d0d2e63566095059 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 20:50:21 +0200 Subject: [PATCH 046/142] add help string for --no-files-cache, fixes #140 --- borg/archiver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/borg/archiver.py b/borg/archiver.py index 8230677da..393609df2 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -509,7 +509,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") common_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', default=False, help='verbose output') - common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false') + common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false', + help='do not load/update the file metadata cache used to detect unchanged files') common_parser.add_argument('--umask', dest='umask', type=lambda s: int(s, 8), default=0o077, metavar='M', help='set umask to M (local and remote, default: 0o077)') common_parser.add_argument('--remote-path', dest='remote_path', default='borg', metavar='PATH', From cce0d20dad2ef3ca3fe6473786027877c8d483b3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 20:52:05 +0200 Subject: [PATCH 047/142] test whether borg extract can process unusual filenames --- borg/testsuite/archiver.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index b466d6ad6..7a2b75780 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -243,6 +243,19 @@ class ArchiverTestCase(ArchiverTestCaseBase): if sparse_support and hasattr(st, 'st_blocks'): self.assert_true(st.st_blocks * 512 < total_len / 10) # is output sparse? + def test_unusual_filenames(self): + filenames = ['normal', 'with some blanks', '(with_parens)', ] + for filename in filenames: + filename = os.path.join(self.input_path, filename) + with open(filename, 'wb') as fd: + pass + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::test', 'input') + for filename in filenames: + with changedir('output'): + self.cmd('extract', self.repository_location + '::test', os.path.join('input', filename)) + assert os.path.exists(os.path.join('output', 'input', filename)) + def test_repository_swap_detection(self): self.create_test_files() os.environ['BORG_PASSPHRASE'] = 'passphrase' From 35b0f38f5ce94cb2b6ef7bfcce94fe224b6b0566 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 21:14:13 +0200 Subject: [PATCH 048/142] cache sync: show progress indication sync can take quite long, so show what we are doing. --- borg/cache.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/borg/cache.py b/borg/cache.py index f50b456eb..a480d708f 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -306,10 +306,15 @@ class Cache: chunk_idx.clear() for tarinfo in tf_in: archive_id_hex = tarinfo.name + archive_name = tarinfo.pax_headers['archive_name'] + print("- processing archive: %s -> extract, " % archive_name, end='') ; sys.stdout.flush() tf_in.extract(archive_id_hex, tmp_dir) chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8') + print("read, ", end='') ; sys.stdout.flush() archive_chunk_idx = ChunkIndex.read(chunk_idx_path) + print("merge, ", end='') ; sys.stdout.flush() chunk_idx.merge(archive_chunk_idx) + print("done.") os.unlink(chunk_idx_path) self.begin_txn() From 03f39c2663745e54a720191e9ca6d280e4c02720 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 8 Aug 2015 22:11:40 +0200 Subject: [PATCH 049/142] borg check: give a named single archive to it, fixes #139 --- borg/archive.py | 23 +++++++++++++++-------- borg/archiver.py | 13 ++++++++----- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index a133af7bb..8798c4fdb 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -609,7 +609,7 @@ class ArchiveChecker: self.error_found = False self.possibly_superseded = set() - def check(self, repository, repair=False, last=None): + def check(self, repository, repair=False, archive=None, last=None): self.report_progress('Starting archive consistency check...') self.repair = repair self.repository = repository @@ -619,8 +619,8 @@ class ArchiveChecker: self.manifest = self.rebuild_manifest() else: self.manifest, _ = Manifest.load(repository, key=self.key) - self.rebuild_refcounts(last=last) - if last is None: + self.rebuild_refcounts(archive=archive, last=last) + if last is None and archive is None: self.verify_chunks() else: self.report_progress('Orphaned objects check skipped (needs all archives checked)') @@ -680,7 +680,7 @@ class ArchiveChecker: self.report_progress('Manifest rebuild complete', error=True) return manifest - def rebuild_refcounts(self, last=None): + def rebuild_refcounts(self, archive=None, last=None): """Rebuild object reference counts by walking the metadata Missing and/or incorrect data is repaired when detected @@ -762,10 +762,17 @@ class ArchiveChecker: yield item repository = cache_if_remote(self.repository) - num_archives = len(self.manifest.archives) - archive_items = sorted(self.manifest.archives.items(), reverse=True, - key=lambda name_info: name_info[1][b'time']) - end = None if last is None else min(num_archives, last) + if archive is None: + # we need last N or all archives + archive_items = sorted(self.manifest.archives.items(), reverse=True, + key=lambda name_info: name_info[1][b'time']) + num_archives = len(self.manifest.archives) + end = None if last is None else min(num_archives, last) + else: + # we only want one specific archive + archive_items = [item for item in self.manifest.archives.items() if item[0] == archive] + num_archives = 1 + end = 1 for i, (name, info) in enumerate(archive_items[:end]): self.report_progress('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives)) archive_id = info[b'id'] diff --git a/borg/archiver.py b/borg/archiver.py index 393609df2..9e2917ea3 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -85,8 +85,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") print('Repository check complete, no problems found.') else: return 1 - if not args.repo_only and not ArchiveChecker().check(repository, repair=args.repair, last=args.last): - return 1 + if not args.repo_only and not ArchiveChecker().check( + repository, repair=args.repair, archive=args.repository.archive, last=args.last): + return 1 return 0 def do_change_passphrase(self, args): @@ -554,6 +555,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") and other types of damage. After that the consistency and correctness of the archive metadata is verified. + By giving an archive name, you can specifically check that archive. + The archive metadata checks can be time consuming and requires access to the key file and/or passphrase if encryption is enabled. These checks can be skipped using the --repository-only option. @@ -563,9 +566,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") epilog=check_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) subparser.set_defaults(func=self.do_check) - subparser.add_argument('repository', metavar='REPOSITORY', - type=location_validator(archive=False), - help='repository to check consistency of') + subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE', + type=location_validator(), + help='repository or archive to check consistency of') subparser.add_argument('--repository-only', dest='repo_only', action='store_true', default=False, help='only perform repository checks') From 4f6c43baecb3dabc68070f22123861d3de415e19 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 00:36:17 +0200 Subject: [PATCH 050/142] document what borg check does, fixes #138 --- borg/archive.py | 2 +- borg/archiver.py | 39 +++++++++++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 8798c4fdb..82fd57cb8 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -631,7 +631,7 @@ class ArchiveChecker: def init_chunks(self): """Fetch a list of all object keys from repository """ - # Explicity set the initial hash table capacity to avoid performance issues + # Explicitly set the initial hash table capacity to avoid performance issues # due to hash table "resonance" capacity = int(len(self.repository) * 1.2) self.chunks = ChunkIndex(capacity) diff --git a/borg/archiver.py b/borg/archiver.py index 9e2917ea3..0b9fa0432 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -550,16 +550,39 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") help='select encryption method') check_epilog = textwrap.dedent(""" - The check command verifies the consistency of a repository and the corresponding - archives. The underlying repository data files are first checked to detect bit rot - and other types of damage. After that the consistency and correctness of the archive - metadata is verified. + The check command verifies the consistency of a repository and the corresponding archives. - By giving an archive name, you can specifically check that archive. + First, the underlying repository data files are checked: + - For all segments the segment magic (header) is checked + - For all objects stored in the segments, all metadata (e.g. crc and size) and + all data is read. The read data is checked by size and CRC. Bit rot and other + types of accidental damage can be detected this way. + - If we are in repair mode and a integrity error is detected for a segment, + we try to recover as many objects from the segment as possible. + - In repair mode, it makes sure that the index is consistent with the data + stored in the segments. + - If you use a remote repo server via ssh:, the repo check is executed on the + repo server without causing significant network traffic. + - The repository check can be skipped using the --archives-only option. - The archive metadata checks can be time consuming and requires access to the key - file and/or passphrase if encryption is enabled. These checks can be skipped using - the --repository-only option. + Second, the consistency and correctness of the archive metadata is verified: + - Is the repo manifest present? If not, it is rebuilt from archive metadata + chunks. + - Check if archive metadata chunk is present. if not, remove archive from + manifest. + - For all files (items) in the archive, for all chunks referenced by these + files, check if chunk is present (if not and we are in repair mode, replace + it with a chunk of zeros). + - Rebuild the chunks cache (refcounts) within the given archives in memory. + - If we are in repair mode and we checked all the archives: delete orphaned + chunks from the repo, write the repo manifest + - if you use a remote repo server via ssh:, the archive check is executed on + the client machine (because if encryption is enabled, the checks will require + decryption and this is always done client-side, because key access will be + required). Archive and file (item) metadata will get fetched over the network, + but not content data. + - The archive checks can be time consuming, they can be skipped using the + --repository-only option. """) subparser = subparsers.add_parser('check', parents=[common_parser], description=self.do_check.__doc__, From 80ee8b98af6be14131d56673165e0cefb266d6ff Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 12:43:57 +0200 Subject: [PATCH 051/142] fix the repair mode if one used --last (or since shortly: gave an archive name), verify_chunks (old method name) was not called because it requires all archives having been checked. the problem was that also the final manifest.write() and repository.commit() was done in that method, so all other repair work did not get committed in that case. I moved these calls that to a separate finish() method. --- borg/archive.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 82fd57cb8..e214c7857 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -611,6 +611,7 @@ class ArchiveChecker: def check(self, repository, repair=False, archive=None, last=None): self.report_progress('Starting archive consistency check...') + self.check_all = archive is None and last is None self.repair = repair self.repository = repository self.init_chunks() @@ -620,10 +621,8 @@ class ArchiveChecker: else: self.manifest, _ = Manifest.load(repository, key=self.key) self.rebuild_refcounts(archive=archive, last=last) - if last is None and archive is None: - self.verify_chunks() - else: - self.report_progress('Orphaned objects check skipped (needs all archives checked)') + self.orphan_chunks_check() + self.finish() if not self.error_found: self.report_progress('Archive consistency check complete, no problems found.') return self.repair or not self.error_found @@ -803,16 +802,22 @@ class ArchiveChecker: add_reference(new_archive_id, len(data), len(cdata), cdata) info[b'id'] = new_archive_id - def verify_chunks(self): - unused = set() - for id_, (count, size, csize) in self.chunks.iteritems(): - if count == 0: - unused.add(id_) - orphaned = unused - self.possibly_superseded - if orphaned: - self.report_progress('{} orphaned objects found'.format(len(orphaned)), error=True) + def orphan_chunks_check(self): + if self.check_all: + unused = set() + for id_, (count, size, csize) in self.chunks.iteritems(): + if count == 0: + unused.add(id_) + orphaned = unused - self.possibly_superseded + if orphaned: + self.report_progress('{} orphaned objects found'.format(len(orphaned)), error=True) + if self.repair: + for id_ in unused: + self.repository.delete(id_) + else: + self.report_progress('Orphaned objects check skipped (needs all archives checked)') + + def finish(self): if self.repair: - for id_ in unused: - self.repository.delete(id_) self.manifest.write() self.repository.commit() From e74c87d5b54556875786232bf9d96866db02e4b8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 12:52:39 +0200 Subject: [PATCH 052/142] update borg check help --- borg/archiver.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 0b9fa0432..af1e1446f 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -567,20 +567,19 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") Second, the consistency and correctness of the archive metadata is verified: - Is the repo manifest present? If not, it is rebuilt from archive metadata - chunks. + chunks (this requires reading and decrypting of all metadata and data). - Check if archive metadata chunk is present. if not, remove archive from manifest. - For all files (items) in the archive, for all chunks referenced by these files, check if chunk is present (if not and we are in repair mode, replace - it with a chunk of zeros). - - Rebuild the chunks cache (refcounts) within the given archives in memory. + it with a same-size chunk of zeros). This requires reading of archive and + file metadata, but not data. - If we are in repair mode and we checked all the archives: delete orphaned - chunks from the repo, write the repo manifest + chunks from the repo. - if you use a remote repo server via ssh:, the archive check is executed on the client machine (because if encryption is enabled, the checks will require decryption and this is always done client-side, because key access will be - required). Archive and file (item) metadata will get fetched over the network, - but not content data. + required). - The archive checks can be time consuming, they can be skipped using the --repository-only option. """) From 74e586050861c42192a1e3e1e097051ca6768335 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 13:47:36 +0200 Subject: [PATCH 053/142] document that passphrase(-only) mode is deprecated --- CHANGES.rst | 7 ++++++- borg/archiver.py | 6 ++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index f19b7f8aa..8235de3aa 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -16,6 +16,11 @@ Incompatible changes (compared to 0.23): you accidentially give access permissions for group and/or others to files created by borg (e.g. the repository). +Deprecations: + +- "--encryption passphrase" mode is deprecated, see #85 and #97. + See the new "--encryption repokey" mode for a replacement. + New features: - borg create --chunker-params ... to configure the chunker, fixes #16 @@ -28,7 +33,7 @@ New features: - borg create --compression 0..9 to select zlib compression level, fixes #66 (attic #295). - borg init --encryption repokey (to store the encryption key into the repo), - deprecate --encryption passphrase, fixes #85 + fixes #85 - improve at-end error logging, always log exceptions and set exit_code=1 - LoggedIO: better error checks / exceptions / exception handling - implement --remote-path to allow non-default-path borg locations, #125 diff --git a/borg/archiver.py b/borg/archiver.py index af1e1446f..38d270647 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -537,6 +537,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") This command initializes an empty repository. A repository is a filesystem directory containing the deduplicated data from zero or more archives. Encryption can be enabled at repository init time. + Please note that the 'passphrase' encryption mode is DEPRECATED (instead of it, + consider using 'repokey'). """) subparser = subparsers.add_parser('init', parents=[common_parser], description=self.do_init.__doc__, epilog=init_epilog, @@ -546,8 +548,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") type=location_validator(archive=False), help='repository to create') subparser.add_argument('-e', '--encryption', dest='encryption', - choices=('none', 'passphrase', 'keyfile', 'repokey'), default='none', - help='select encryption method') + choices=('none', 'keyfile', 'repokey', 'passphrase'), default='none', + help='select encryption key mode') check_epilog = textwrap.dedent(""" The check command verifies the consistency of a repository and the corresponding archives. From 7ffdfe1716f38233fba834c656d721d2d06191a0 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 14:10:53 +0200 Subject: [PATCH 054/142] update CHANGES --- CHANGES.rst | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 8235de3aa..199bea31b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -13,7 +13,7 @@ Incompatible changes (compared to 0.23): server and client(s) to 0.24. - the default umask is 077 now (if you do not specify via --umask) which might be a different one as you used previously. The default umask avoids that - you accidentially give access permissions for group and/or others to files + you accidentally give access permissions for group and/or others to files created by borg (e.g. the repository). Deprecations: @@ -38,11 +38,15 @@ New features: - LoggedIO: better error checks / exceptions / exception handling - implement --remote-path to allow non-default-path borg locations, #125 - implement --umask M and use 077 as default umask for better security, #117 +- borg check: give a named single archive to it, fixes #139 +- cache sync: show progress indication +- cache sync: reimplement the chunk index merging in C Bug fixes: - fix segfault that happened for unreadable files (chunker: n needs to be a signed size_t), #116 +- fix the repair mode, #144 - repo delete: add destroy to allowed rpc methods, fixes issue #114 - more compatible repository locking code (based on mkdir), maybe fixes #92 (attic #317, attic #201). @@ -51,6 +55,7 @@ Bug fixes: fixes attic #326. - fix Traceback when running check --repair, attic #232 - clarify help text, fixes #73. +- add help string for --no-files-cache, fixes #140 Other changes: @@ -66,11 +71,13 @@ Other changes: - add FAQ entries about redundancy / integrity - clarify that borg extract uses the cwd as extraction target - update internals doc about chunker params, memory usage and compression - - add some words about resource usage - - document how to backup raw disk + - add some words about resource usage in general + - document how to backup a raw disk - add note about how to run borg from virtual env - add solutions for (ll)fuse installation problems - - tested and updated cygwin docs + - document what borg check does, fixes #138 + - reorganize borgbackup.github.io sidebar, prev/next at top + - deduplicate and refactor the docs / README.rst - use borg-tmp as prefix for temporary files / directories - short prune options without "keep-" are deprecated, do not suggest them @@ -79,7 +86,9 @@ Other changes: - use entrypoints instead of scripts, for better use of the wheel format and modern installs - add requirements.d/development.txt and modify tox.ini - +- use travis-ci for linux and OS X testing +- use coverage.py, pytest-cov and codecov.io for test coverage support + I forgot to list some stuff already implemented in 0.23.0, here they are: New features: From 4c668a85b63955ccd41b8f7b1c151373da1a4924 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 14:16:56 +0200 Subject: [PATCH 055/142] update docs copyright (to be same as project copyright) --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 9c0e84cb8..5962d1cab 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,7 @@ master_doc = 'index' # General information about the project. project = 'Borg - Deduplicating Archiver' -copyright = '2010-2014, Jonas Borgström' +copyright = '2010-2014, Jonas Borgström, 2015 The Borg Collective (see AUTHORS file)' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the From 955ac9c44c36cfe5163f9f9b78578190843a0666 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 14:26:54 +0200 Subject: [PATCH 056/142] get rid of testsuite.mock, directly import from mock this was left over from times when we either used mock from stdlib or pypi mock. but as we only use pypi mock now, the indirection is not needed any more. --- borg/testsuite/archive.py | 2 +- borg/testsuite/archiver.py | 3 ++- borg/testsuite/mock.py | 14 -------------- borg/testsuite/repository.py | 3 ++- 4 files changed, 5 insertions(+), 17 deletions(-) delete mode 100644 borg/testsuite/mock.py diff --git a/borg/testsuite/archive.py b/borg/testsuite/archive.py index 9a20e9f6e..a963573ec 100644 --- a/borg/testsuite/archive.py +++ b/borg/testsuite/archive.py @@ -1,12 +1,12 @@ from datetime import datetime, timezone import msgpack +from mock import Mock from ..archive import Archive, CacheChunkBuffer, RobustUnpacker from ..key import PlaintextKey from ..helpers import Manifest from . import BaseTestCase -from .mock import Mock class MockCache: diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 7a2b75780..20e76a7e0 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -11,6 +11,8 @@ import time import unittest from hashlib import sha256 +from mock import patch + from .. import xattr from ..archive import Archive, ChunkBuffer, CHUNK_MAX_EXP from ..archiver import Archiver @@ -20,7 +22,6 @@ from ..helpers import Manifest from ..remote import RemoteRepository, PathNotAllowed from ..repository import Repository from . import BaseTestCase -from .mock import patch try: import llfuse diff --git a/borg/testsuite/mock.py b/borg/testsuite/mock.py deleted file mode 100644 index bdd030b10..000000000 --- a/borg/testsuite/mock.py +++ /dev/null @@ -1,14 +0,0 @@ -""" -Mocking - -Note: unittest.mock is broken on at least python 3.3.6 and 3.4.0. - it silently ignores mistyped method names starting with assert_..., - does nothing and just succeeds. - The issue was fixed in the separately distributed "mock" lib, you - get an AttributeError there. So, always use that one! - -Details: - -http://engineeringblog.yelp.com/2015/02/assert_called_once-threat-or-menace.html -""" -from mock import * diff --git a/borg/testsuite/repository.py b/borg/testsuite/repository.py index 1c9fd072d..74996b717 100644 --- a/borg/testsuite/repository.py +++ b/borg/testsuite/repository.py @@ -2,13 +2,14 @@ import os import shutil import tempfile +from mock import patch + from ..hashindex import NSIndex from ..helpers import Location, IntegrityError from ..locking import UpgradableLock from ..remote import RemoteRepository, InvalidRPCMethod from ..repository import Repository from . import BaseTestCase -from .mock import patch class RepositoryTestCaseBase(BaseTestCase): From 197ca9c0d30f77bf77e48b51754384455255ddfe Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 16:19:53 +0200 Subject: [PATCH 057/142] C merge code: cast to correct pointer type, silences warning --- borg/_hashindex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/_hashindex.c b/borg/_hashindex.c index 128ab5b2f..33d12ca03 100644 --- a/borg/_hashindex.c +++ b/borg/_hashindex.c @@ -396,7 +396,7 @@ hashindex_merge(HashIndex *index, HashIndex *other) while((key = hashindex_next_key(other, key))) { other_values = key + key_size; - my_values = hashindex_get(index, key); + my_values = (int32_t *)hashindex_get(index, key); if(my_values == NULL) { hashindex_set(index, key, other_values); } else { From 69456e07c46b09d50cbde363d6c5eb1625df3fdb Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 19:02:35 +0200 Subject: [PATCH 058/142] cache sync: change progress output to separate lines printing without \n plus sys.stdout.flush() didn't work as expected. --- borg/cache.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/borg/cache.py b/borg/cache.py index a480d708f..2391be275 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -307,14 +307,13 @@ class Cache: for tarinfo in tf_in: archive_id_hex = tarinfo.name archive_name = tarinfo.pax_headers['archive_name'] - print("- processing archive: %s -> extract, " % archive_name, end='') ; sys.stdout.flush() + print("- extracting archive %s ..." % archive_name) tf_in.extract(archive_id_hex, tmp_dir) chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8') - print("read, ", end='') ; sys.stdout.flush() + print("- reading archive ...") archive_chunk_idx = ChunkIndex.read(chunk_idx_path) - print("merge, ", end='') ; sys.stdout.flush() + print("- merging archive ...") chunk_idx.merge(archive_chunk_idx) - print("done.") os.unlink(chunk_idx_path) self.begin_txn() From 1e35f5ce4a7f38917afdaddeebf61a94bc7478aa Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 21:22:55 +0200 Subject: [PATCH 059/142] minor fixes to CHANGES --- CHANGES.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 199bea31b..b333ba48a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -71,6 +71,7 @@ Other changes: - add FAQ entries about redundancy / integrity - clarify that borg extract uses the cwd as extraction target - update internals doc about chunker params, memory usage and compression + - added docs about development - add some words about resource usage in general - document how to backup a raw disk - add note about how to run borg from virtual env @@ -81,12 +82,12 @@ Other changes: - use borg-tmp as prefix for temporary files / directories - short prune options without "keep-" are deprecated, do not suggest them -- improved tox configuration, documented there how to invoke it +- improved tox configuration - remove usage of unittest.mock, always use mock from pypi - use entrypoints instead of scripts, for better use of the wheel format and modern installs - add requirements.d/development.txt and modify tox.ini -- use travis-ci for linux and OS X testing +- use travis-ci for testing based on Linux and (new) OS X - use coverage.py, pytest-cov and codecov.io for test coverage support I forgot to list some stuff already implemented in 0.23.0, here they are: From 822379048f0a0fdec0bc8b541b1a113e2b9bba01 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 9 Aug 2015 22:32:14 +0200 Subject: [PATCH 060/142] added some sidebar links --- docs/_themes/local/sidebarusefullinks.html | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/_themes/local/sidebarusefullinks.html b/docs/_themes/local/sidebarusefullinks.html index 2f71b275d..368dee25f 100644 --- a/docs/_themes/local/sidebarusefullinks.html +++ b/docs/_themes/local/sidebarusefullinks.html @@ -5,6 +5,8 @@
    • Main Web Site
    • PyPI packages
    • +
    • Binary Packages
    • +
    • Current ChangeLog
    • GitHub
    • Issue Tracker
    • Bounties & Fundraisers
    • From abe29583f2f6f0d93313be66747251cd9b983175 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 10 Aug 2015 00:03:19 +0200 Subject: [PATCH 061/142] install lz4 from brew --- .travis/install.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis/install.sh b/.travis/install.sh index e25ab1288..27eb668db 100755 --- a/.travis/install.sh +++ b/.travis/install.sh @@ -14,6 +14,7 @@ if [[ "$(uname -s)" == 'Darwin' ]]; then eval "$(pyenv init -)" fi + brew install lz4 brew outdated pyenv || brew upgrade pyenv case "${TOXENV}" in From 8b1d46caa403488dd7e6f7ef166349bb2d6c4b8d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 10 Aug 2015 20:36:21 +0200 Subject: [PATCH 062/142] docs: more about compression --- docs/internals.rst | 25 ++++++++++++++++++++----- docs/quickstart.rst | 23 +++++++++++++++++++++++ docs/usage.rst | 16 ++++++++++++++++ 3 files changed, 59 insertions(+), 5 deletions(-) diff --git a/docs/internals.rst b/docs/internals.rst index 6dfc8ba9b..0ea68098b 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -382,10 +382,25 @@ representation of the repository id. Compression ----------- -|project_name| currently always pipes all data through a zlib compressor which -supports compression levels 0 (no compression, fast) to 9 (high compression, slow). +|project_name| supports the following compression methods: + +- none (no compression, pass through data 1:1) +- lz4 (low compression, but super fast) +- zlib (level 1-9, level 1 is low, level 9 is high compression) +- lzma (level 0-9, level 0 is low, level 9 is high compression. + +Speed: none > lz4 > zlib > lzma +Compression: lzma > zlib > lz4 > none + +The overall speed of course also depends on the speed of your target storage. +If that is slow, using a higher compression level might yield better overall +performance. You need to experiment a bit. Maybe just watch your CPU load, if +that is relatively low, increase compression until 1 core is 70-100% loaded. + +Be careful, higher zlib and especially lzma compression levels might take a +lot of resources (CPU and memory). + +Compression is applied after deduplication, thus using different compression +methods in one repo does not influence deduplication. See ``borg create --help`` about how to specify the compression level and its default. - -Note: zlib level 0 creates a little bit more output data than it gets as input, -due to zlib protocol overhead. diff --git a/docs/quickstart.rst b/docs/quickstart.rst index fcb223503..9abe4fb6a 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -89,6 +89,29 @@ certain number of old archives:: # and 6 monthly archives. borg prune -v $REPOSITORY --keep-daily=7 --keep-weekly=4 --keep-monthly=6 +.. backup_compression: + +Backup compression +------------------ + +Default is no compression, but we support different methods with high speed +or high compression: + +If you have a quick repo storage and you want a little compression: + + $ borg create --compression lz4 /mnt/backup::repo ~ + +If you have a medium fast repo storage and you want a bit more compression (N=0..9): + + $ borg create --compression zlib,N /mnt/backup::repo ~ + +If you have a very slow repo storage and you want high compression (N=0..9): + + $ borg create --compression lzma,N /mnt/backup::repo ~ + +You'll need to experiment a bit to find the best compression for your use case. +Keep an eye on CPU load and throughput. + .. _encrypted_repos: Repository encryption diff --git a/docs/usage.rst b/docs/usage.rst index fcbee5fef..a68d67c3f 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -76,8 +76,12 @@ Resource Usage |project_name| might use a lot of resources depending on the size of the data set it is dealing with. CPU: it won't go beyond 100% of 1 core as the code is currently single-threaded. + Especially higher zlib and lzma compression uses significant amounts of CPU + cycles. Memory (RAM): the chunks index and the files index are read into memory for performance reasons. + compression, esp. lzma compression with high levels might need substantial amounts + of memory. Temporary files: reading data and metadata from a FUSE mounted repository will consume about the same space as the deduplicated chunks used to represent them in the repository. @@ -175,6 +179,18 @@ Examples # Backup a raw device (must not be active/in use/mounted at that time) $ dd if=/dev/sda bs=10M | borg create /mnt/backup::my-sda - + # No compression (default) + $ borg create /mnt/backup::repo ~ + + # Super fast, low compression + $ borg create --compression lz4 /mnt/backup::repo ~ + + # Less fast, higher compression (N = 0..9) + $ borg create --compression zlib,N /mnt/backup::repo ~ + + # Even slower, even higher compression (N = 0..9) + $ borg create --compression lzma,N /mnt/backup::repo ~ + .. include:: usage/extract.rst.inc From 1724241d0ca676f3de6bad38dda33c0be1296818 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 10 Aug 2015 20:45:15 +0200 Subject: [PATCH 063/142] README: mention lzma and lz4 compression --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 22320d3fe..3d27de85c 100644 --- a/README.rst +++ b/README.rst @@ -51,7 +51,7 @@ Main features authenticity is verified using HMAC-SHA256. **Compression** - All data can be compressed by zlib, level 0-9. + All data can be compressed by lz4, zlib or lzma. **Off-site backups** Borg can store data on any remote host accessible over SSH. If Borg is From 57845c07ed4e772abf4ddf353db8d08a2cca6fa0 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Mon, 10 Aug 2015 16:48:19 +0100 Subject: [PATCH 064/142] Clean up fds of segments we delete (during compaction) When we delete a segment, let's close its fd as well. Note as well wasting the fd, this was forcing the filesystem to preserve the deleted file until we exited. I noticed roughly 20 open fds of deleted files when attic saved 10G of data. --- attic/repository.py | 3 +++ 1 file changed, 3 insertions(+) mode change 100644 => 100755 attic/repository.py diff --git a/attic/repository.py b/attic/repository.py old mode 100644 new mode 100755 index eed85dc43..1d2bb0f0f --- a/attic/repository.py +++ b/attic/repository.py @@ -478,6 +478,9 @@ class LoggedIO(object): return fd def delete_segment(self, segment): + fd = self.fds.pop(segment) + if fd != None: + fd.close() try: os.unlink(self.segment_filename(segment)) except OSError: From 3321a887d34d607fc59e9d2d19f07b5862295908 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Mon, 10 Aug 2015 22:37:32 +0100 Subject: [PATCH 065/142] io.write_commit() already implies io.close_segment() --- attic/repository.py | 1 - 1 file changed, 1 deletion(-) diff --git a/attic/repository.py b/attic/repository.py index 1d2bb0f0f..bd10e3212 100755 --- a/attic/repository.py +++ b/attic/repository.py @@ -300,7 +300,6 @@ class Repository(object): report_error('Adding commit tag to segment {}'.format(transaction_id)) self.io.segment = transaction_id + 1 self.io.write_commit() - self.io.close_segment() if current_index and not repair: if len(current_index) != len(self.index): report_error('Index object count mismatch. {} != {}'.format(len(current_index), len(self.index))) From e06b0b36129d42f979a09456ed66a6e9b2d9a8ad Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 01:04:03 +0200 Subject: [PATCH 066/142] use C99's uintmax_t and %ju format whatever size_t and off_t is, should even fit in there --- borg/_hashindex.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/borg/_hashindex.c b/borg/_hashindex.c index 33d12ca03..aa1881f18 100644 --- a/borg/_hashindex.c +++ b/borg/_hashindex.c @@ -145,10 +145,12 @@ hashindex_read(const char *path) bytes_read = fread(&header, 1, sizeof(HashHeader), fd); if(bytes_read != sizeof(HashHeader)) { if(ferror(fd)) { - EPRINTF_PATH(path, "fread header failed (expected %ld, got %ld)", sizeof(HashHeader), bytes_read); + EPRINTF_PATH(path, "fread header failed (expected %ju, got %ju)", + (uintmax_t) sizeof(HashHeader), (uintmax_t) bytes_read); } else { - EPRINTF_MSG_PATH(path, "fread header failed (expected %ld, got %ld)", sizeof(HashHeader), bytes_read); + EPRINTF_MSG_PATH(path, "fread header failed (expected %ju, got %ju)", + (uintmax_t) sizeof(HashHeader), (uintmax_t) bytes_read); } goto fail; } @@ -170,7 +172,8 @@ hashindex_read(const char *path) } buckets_length = (off_t)_le32toh(header.num_buckets) * (header.key_size + header.value_size); if(length != sizeof(HashHeader) + buckets_length) { - EPRINTF_MSG_PATH(path, "Incorrect file length (expected %ld, got %ld)", sizeof(HashHeader) + buckets_length, length); + EPRINTF_MSG_PATH(path, "Incorrect file length (expected %ju, got %ju)", + (uintmax_t) sizeof(HashHeader) + buckets_length, (uintmax_t) length); goto fail; } if(!(index = malloc(sizeof(HashIndex)))) { @@ -186,10 +189,12 @@ hashindex_read(const char *path) bytes_read = fread(index->buckets, 1, buckets_length, fd); if(bytes_read != buckets_length) { if(ferror(fd)) { - EPRINTF_PATH(path, "fread buckets failed (expected %ld, got %ld)", buckets_length, bytes_read); + EPRINTF_PATH(path, "fread buckets failed (expected %ju, got %ju)", + (uintmax_t) buckets_length, (uintmax_t) bytes_read); } else { - EPRINTF_MSG_PATH(path, "fread buckets failed (expected %ld, got %ld)", buckets_length, bytes_read); + EPRINTF_MSG_PATH(path, "fread buckets failed (expected %ju, got %ju)", + (uintmax_t) buckets_length, (uintmax_t) bytes_read); } free(index->buckets); free(index); From b16dc03e365d3eb0e47a608449255cd6a812928c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 02:27:41 +0200 Subject: [PATCH 067/142] tests for CompressionSpec --- borg/testsuite/helpers.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 26b422b0c..bb2400a94 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -2,11 +2,12 @@ import hashlib from time import mktime, strptime from datetime import datetime, timezone, timedelta +import pytest import msgpack from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \ prune_within, prune_split, \ - StableDict, int_to_bigint, bigint_to_int, parse_timestamp + StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec from . import BaseTestCase @@ -104,6 +105,35 @@ class PatternTestCase(BaseTestCase): ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']) +def test_compression_specs(): + with pytest.raises(ValueError): + CompressionSpec('') + assert CompressionSpec('0') == dict(name='null') + assert CompressionSpec('1') == dict(name='zlib', level=1) + assert CompressionSpec('9') == dict(name='zlib', level=9) + assert CompressionSpec('10') == dict(name='lz4') + with pytest.raises(ValueError): + CompressionSpec('11') + assert CompressionSpec('20') == dict(name='lzma', level=0) + assert CompressionSpec('29') == dict(name='lzma', level=9) + with pytest.raises(ValueError): + CompressionSpec('30') + assert CompressionSpec('null') == dict(name='null') + assert CompressionSpec('lz4') == dict(name='lz4') + assert CompressionSpec('zlib') == dict(name='zlib', level=6) + assert CompressionSpec('zlib,0') == dict(name='zlib', level=0) + assert CompressionSpec('zlib,9') == dict(name='zlib', level=9) + with pytest.raises(ValueError): + CompressionSpec('zlib,9,invalid') + assert CompressionSpec('lzma') == dict(name='lzma', level=6) + assert CompressionSpec('lzma,0') == dict(name='lzma', level=0) + assert CompressionSpec('lzma,9') == dict(name='lzma', level=9) + with pytest.raises(ValueError): + CompressionSpec('lzma,9,invalid') + with pytest.raises(ValueError): + CompressionSpec('invalid') + + class MakePathSafeTestCase(BaseTestCase): def test(self): From feff0f0c9421c7487e618eb7f771bbf1a2568603 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 03:15:44 +0200 Subject: [PATCH 068/142] install docs: replace hack for llfuse with proper solution found out why it could not install llfuse into virtual env: it always complained about not being able to find fuse.pc - which is part of libfuse-dev / fuse-devel and was missing. once one adds the fuse dev stuff, llfuse installs to virtual env without problems. --- docs/installation.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 90bd33f84..3cd4e13b6 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -62,13 +62,11 @@ Some of the steps detailled below might be useful also for non-git installs. # if you do not have gcc / make / etc. yet apt-get install build-essential - # optional: lowlevel FUSE py binding - to mount backup archives + # optional: FUSE support - to mount backup archives # in case you get complaints about permission denied on /etc/fuse.conf: # on ubuntu this means your user is not in the "fuse" group. just add # yourself there, log out and log in again. - # if it complains about not being able to find llfuse: make a symlink - # borg-env/lib/python3.4/site-packages/llfuse -> /usr/lib/python3/dist-packages/llfuse - apt-get install python3-llfuse fuse + apt-get install libfuse-dev fuse # optional: for unit testing apt-get install fakeroot @@ -84,6 +82,7 @@ Some of the steps detailled below might be useful also for non-git installs. pip install cython # compile .pyx -> .c pip install tox pytest # optional, for running unit tests pip install sphinx # optional, to build the docs + pip install llfuse # optional, for FUSE support cd borg pip install -e . # in-place editable mode @@ -108,8 +107,8 @@ Some of the steps detailled below might be useful also for non-git installs. # ACL support Headers + Library sudo dnf install libacl-devel libacl - # optional: lowlevel FUSE py binding - to mount backup archives - sudo dnf install python3-llfuse fuse + # optional: FUSE support - to mount backup archives + sudo dnf install fuse-devel fuse # optional: for unit testing sudo dnf install fakeroot @@ -125,6 +124,7 @@ Some of the steps detailled below might be useful also for non-git installs. pip install cython # compile .pyx -> .c pip install tox pytest # optional, for running unit tests pip install sphinx # optional, to build the docs + pip install llfuse # optional, for FUSE support cd borg pip install -e . # in-place editable mode From 4d8949e66a6f0183e50b07d7f68827b86f22641b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 04:09:36 +0200 Subject: [PATCH 069/142] archiver: more tests --- borg/archiver.py | 6 +++--- borg/testsuite/archiver.py | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 38d270647..deed03786 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -859,7 +859,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") return args.func(args) -def sig_info_handler(signum, stack): +def sig_info_handler(signum, stack): # pragma: no cover """search the stack for infos about the currently processed file and print them""" for frame in inspect.getouterframes(stack): func, loc = frame[3], frame[0].f_locals @@ -882,7 +882,7 @@ def sig_info_handler(signum, stack): break -def setup_signal_handlers(): +def setup_signal_handlers(): # pragma: no cover sigs = [] if hasattr(signal, 'SIGUSR1'): sigs.append(signal.SIGUSR1) # kill -USR1 pid @@ -892,7 +892,7 @@ def setup_signal_handlers(): signal.signal(sig, sig_info_handler) -def main(): +def main(): # pragma: no cover # Make sure stdout and stderr have errors='replace') to avoid unicode # issues when print()-ing unicode file names sys.stdout = io.TextIOWrapper(sys.stdout.buffer, sys.stdout.encoding, 'replace', line_buffering=True) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 20e76a7e0..489f3f69f 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -183,7 +183,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.create_test_files() self.cmd('init', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') - self.cmd('create', self.repository_location + '::test.2', 'input') + self.cmd('create', '--stats', self.repository_location + '::test.2', 'input') with changedir('output'): self.cmd('extract', self.repository_location + '::test') self.assert_equal(len(self.cmd('list', self.repository_location).splitlines()), 2) @@ -403,7 +403,7 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.cmd('extract', '--dry-run', self.repository_location + '::test.2') self.cmd('delete', self.repository_location + '::test') self.cmd('extract', '--dry-run', self.repository_location + '::test.2') - self.cmd('delete', self.repository_location + '::test.2') + self.cmd('delete', '--stats', self.repository_location + '::test.2') # Make sure all data except the manifest has been deleted repository = Repository(self.repository_path) self.assert_equal(len(repository), 1) @@ -470,10 +470,38 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.assert_not_in('test1', output) self.assert_in('test2', output) + def test_prune_repository_prefix(self): + self.cmd('init', self.repository_location) + self.cmd('create', self.repository_location + '::foo-2015-08-12-10:00', src_dir) + self.cmd('create', self.repository_location + '::foo-2015-08-12-20:00', src_dir) + self.cmd('create', self.repository_location + '::bar-2015-08-12-10:00', src_dir) + self.cmd('create', self.repository_location + '::bar-2015-08-12-20:00', src_dir) + output = self.cmd('prune', '-v', '--dry-run', self.repository_location, '--keep-daily=2', '--prefix=foo-') + self.assert_in('Keeping archive: foo-2015-08-12-20:00', output) + self.assert_in('Would prune: foo-2015-08-12-10:00', output) + output = self.cmd('list', self.repository_location) + self.assert_in('foo-2015-08-12-10:00', output) + self.assert_in('foo-2015-08-12-20:00', output) + self.assert_in('bar-2015-08-12-10:00', output) + self.assert_in('bar-2015-08-12-20:00', output) + self.cmd('prune', self.repository_location, '--keep-daily=2', '--prefix=foo-') + output = self.cmd('list', self.repository_location) + self.assert_not_in('foo-2015-08-12-10:00', output) + self.assert_in('foo-2015-08-12-20:00', output) + self.assert_in('bar-2015-08-12-10:00', output) + self.assert_in('bar-2015-08-12-20:00', output) + def test_usage(self): self.assert_raises(SystemExit, lambda: self.cmd()) self.assert_raises(SystemExit, lambda: self.cmd('-h')) + def test_help(self): + assert 'Borg' in self.cmd('help') + assert 'patterns' in self.cmd('help', 'patterns') + assert 'Initialize' in self.cmd('help', 'init') + assert 'positional arguments' not in self.cmd('help', 'init', '--epilog-only') + assert 'This command initializes' not in self.cmd('help', 'init', '--usage-only') + @unittest.skipUnless(has_llfuse, 'llfuse not installed') def test_fuse_mount_repository(self): mountpoint = os.path.join(self.tmpdir, 'mountpoint') From 8300efb1dbfe17d9964c68fe790480acbc453e51 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 04:28:31 +0200 Subject: [PATCH 070/142] remote: pragma: no cover for the stuff we can't test --- borg/remote.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/remote.py b/borg/remote.py index 1d7ae84e2..3a274b214 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -28,7 +28,7 @@ class InvalidRPCMethod(Error): """RPC method is not valid""" -class RepositoryServer: +class RepositoryServer: # pragma: no cover rpc_methods = ( '__len__', 'check', @@ -129,7 +129,7 @@ class RemoteRepository: umask = ['--umask', '%03o' % self.umask] if location.host == '__testsuite__': args = [sys.executable, '-m', 'borg.archiver', 'serve'] + umask + self.extra_test_args - else: + else: # pragma: no cover args = ['ssh'] if location.port: args += ['-p', str(location.port)] From d83b919d52d40af54b5d353e9f408c550f714358 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 12 Aug 2015 11:20:02 +0100 Subject: [PATCH 071/142] Style fix in added code PEP8 says to prefer "is not None" --- attic/repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attic/repository.py b/attic/repository.py index bd10e3212..a926d7a78 100755 --- a/attic/repository.py +++ b/attic/repository.py @@ -478,7 +478,7 @@ class LoggedIO(object): def delete_segment(self, segment): fd = self.fds.pop(segment) - if fd != None: + if fd is not None: fd.close() try: os.unlink(self.segment_filename(segment)) From 04887439a0261388d0e3088f851299f075a0e4a5 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 12 Aug 2015 11:32:12 +0100 Subject: [PATCH 072/142] recover_segment(): don't assume we have an fd for segment Suggested by @ThomasWaldmann. Avoiding a complex assumption should make the code easier to understand and maintain. (Technically we do have an fd for the segment, because the only caller opens the segment and checks it before calling for repair.) --- attic/repository.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/attic/repository.py b/attic/repository.py index a926d7a78..ad7031654 100755 --- a/attic/repository.py +++ b/attic/repository.py @@ -515,7 +515,9 @@ class LoggedIO(object): header = fd.read(self.header_fmt.size) def recover_segment(self, segment, filename): - self.fds.pop(segment).close() + fd = self.fds.pop(segment) + if fd is not None: + fd.close() # FIXME: save a copy of the original file with open(filename, 'rb') as fd: data = memoryview(fd.read()) From 2194d9837e4021370402ead33d5724ceb78b0735 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 16:04:41 +0200 Subject: [PATCH 073/142] update CHANGES --- CHANGES.rst | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index b333ba48a..13dfdb4ce 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,30 @@ Borg Changelog ============== +Version 0.25.0 (not released yet) +--------------------------------- + +Incompatible changes (compared to 0.24): + +- none yet + +Deprecations: + +- none yet + +New features: + +- honor the nodump flag (UF_NODUMP) and do not backup such items + +Bug fixes: + +- close fds of segments we delete (during compaction) + +Other changes: + +- none yet + + Version 0.24.0 -------------- From 04814241289c4febdfb9c497783d1cf3d7a51538 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 16:41:30 +0200 Subject: [PATCH 074/142] fix archiver test to not expect backup of the UF_NODUMP file --- borg/testsuite/archiver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 489f3f69f..eb707ade1 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -187,7 +187,8 @@ class ArchiverTestCase(ArchiverTestCaseBase): with changedir('output'): self.cmd('extract', self.repository_location + '::test') self.assert_equal(len(self.cmd('list', self.repository_location).splitlines()), 2) - self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), 11) + file_count = 10 if has_lchflags else 11 # one file is UF_NODUMP + self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), file_count) self.assert_dirs_equal('input', 'output/input') info_output = self.cmd('info', self.repository_location + '::test') self.assert_in('Number of files: 4', info_output) From 3100fac3617851d4d67096df31f74a96f9fd2e86 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 12 Aug 2015 17:03:30 +0200 Subject: [PATCH 075/142] fix archiver test to not expect backup of the UF_NODUMP file, try 2 --- borg/testsuite/archiver.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index eb707ade1..2ed2f7821 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -187,11 +187,15 @@ class ArchiverTestCase(ArchiverTestCaseBase): with changedir('output'): self.cmd('extract', self.repository_location + '::test') self.assert_equal(len(self.cmd('list', self.repository_location).splitlines()), 2) - file_count = 10 if has_lchflags else 11 # one file is UF_NODUMP - self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), file_count) + item_count = 10 if has_lchflags else 11 # one file is UF_NODUMP + self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), item_count) + if has_lchflags: + # remove the file we did not backup, so input and output become equal + os.remove(os.path.join('input', 'flagfile')) self.assert_dirs_equal('input', 'output/input') info_output = self.cmd('info', self.repository_location + '::test') - self.assert_in('Number of files: 4', info_output) + item_count = 3 if has_lchflags else 4 # one file is UF_NODUMP + self.assert_in('Number of files: %d' % item_count, info_output) shutil.rmtree(self.cache_path) with environment_variable(BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK='1'): info_output2 = self.cmd('info', self.repository_location + '::test') From e3f671c4fb8a83118b5f4508b709a8e694f34639 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 12 Aug 2015 23:13:34 +0100 Subject: [PATCH 076/142] We forgot to close files which fell out the lrucache The initializer now takes a dispose function. lrucache claims ownership of the items it contains and will dispose deleted items. Ownership can naturally be reclaimed by calling pop() for the item. --- attic/lrucache.py | 22 ++++++++++++++++------ attic/repository.py | 16 +++++++--------- attic/testsuite/lrucache.py | 12 +++--------- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/attic/lrucache.py b/attic/lrucache.py index 3bb49fbc4..d4ea8a490 100644 --- a/attic/lrucache.py +++ b/attic/lrucache.py @@ -1,15 +1,16 @@ class LRUCache(dict): - def __init__(self, capacity): + def __init__(self, capacity, dispose): super(LRUCache, self).__init__() self._lru = [] self._capacity = capacity + self._dispose = dispose def __setitem__(self, key, value): - try: - self._lru.remove(key) - except ValueError: - pass + assert key not in self, ( + "Unexpected attempt to replace a cached item." + " If this is intended, please delete or pop the old item first." + " The dispose function will be called on delete (but not pop).") self._lru.append(key) while len(self._lru) > self._capacity: del self[self._lru[0]] @@ -28,7 +29,11 @@ class LRUCache(dict): self._lru.remove(key) except ValueError: pass - return super(LRUCache, self).__delitem__(key) + error = KeyError(key) + removed = super(LRUCache, self).pop(key, error) + if removed == error: + raise error + self._dispose(removed) def pop(self, key, default=None): try: @@ -37,6 +42,11 @@ class LRUCache(dict): pass return super(LRUCache, self).pop(key, default) + def clear(self): + for value in self.values(): + self._dispose(value) + super(LRUCache, self).clear() + def _not_implemented(self, *args, **kw): raise NotImplementedError popitem = setdefault = update = _not_implemented diff --git a/attic/repository.py b/attic/repository.py index ad7031654..7b6066e0b 100755 --- a/attic/repository.py +++ b/attic/repository.py @@ -393,7 +393,8 @@ class LoggedIO(object): def __init__(self, path, limit, segments_per_dir, capacity=90): self.path = path - self.fds = LRUCache(capacity) + self.fds = LRUCache(capacity, + dispose=lambda fd: fd.close()) self.segment = 0 self.limit = limit self.segments_per_dir = segments_per_dir @@ -401,9 +402,8 @@ class LoggedIO(object): self._write_fd = None def close(self): - for segment in list(self.fds.keys()): - self.fds.pop(segment).close() self.close_segment() + self.fds.clear() self.fds = None # Just to make sure we're disabled def segment_iterator(self, reverse=False): @@ -477,9 +477,8 @@ class LoggedIO(object): return fd def delete_segment(self, segment): - fd = self.fds.pop(segment) - if fd is not None: - fd.close() + if segment in self.fds: + del self.fds[segment] try: os.unlink(self.segment_filename(segment)) except OSError: @@ -515,9 +514,8 @@ class LoggedIO(object): header = fd.read(self.header_fmt.size) def recover_segment(self, segment, filename): - fd = self.fds.pop(segment) - if fd is not None: - fd.close() + if segment in self.fds: + del self.fds[segment] # FIXME: save a copy of the original file with open(filename, 'rb') as fd: data = memoryview(fd.read()) diff --git a/attic/testsuite/lrucache.py b/attic/testsuite/lrucache.py index 9b51a7aab..60ceb41c3 100644 --- a/attic/testsuite/lrucache.py +++ b/attic/testsuite/lrucache.py @@ -5,7 +5,7 @@ from attic.testsuite import AtticTestCase class LRUCacheTestCase(AtticTestCase): def test(self): - c = LRUCache(2) + c = LRUCache(2, dispose=lambda _: None) self.assert_equal(len(c), 0) for i, x in enumerate('abc'): c[x] = i @@ -21,19 +21,13 @@ class LRUCacheTestCase(AtticTestCase): self.assert_equal(len(c), 2) self.assert_equal(c['c'], 2) self.assert_equal(c['d'], 3) - c['c'] = 22 - c['e'] = 4 - self.assert_equal(len(c), 2) - self.assert_raises(KeyError, lambda: c['d']) - self.assert_equal(c['c'], 22) - self.assert_equal(c['e'], 4) del c['c'] self.assert_equal(len(c), 1) self.assert_raises(KeyError, lambda: c['c']) - self.assert_equal(c['e'], 4) + self.assert_equal(c['d'], 3) def test_pop(self): - c = LRUCache(2) + c = LRUCache(2, dispose=lambda _: None) c[1] = 1 c[2] = 2 c.pop(1) From 9ba7daa9c706a76cc3eaf80661540457f0d80c56 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 12 Aug 2015 23:30:26 +0100 Subject: [PATCH 077/142] lrucache - simpler _not_ to inherit from dict We need to make sure dispose() is always called when necessary. Using inheritance it's just too easy to forget a method, that we needed to override. I also find it confusing when an override method calls another method, and you have yet to see whether the latter method is overridden or not. It didn't help that most of these methods are actually operator overloads. This turns out to require _less_ code :-). (Admittedly the code could have been reduced a bit anyway because python3's super() can be called without any arguments). --- attic/lrucache.py | 38 ++++++++++++++++++------------------- attic/testsuite/lrucache.py | 10 +--------- 2 files changed, 19 insertions(+), 29 deletions(-) diff --git a/attic/lrucache.py b/attic/lrucache.py index d4ea8a490..e02e1a6d0 100644 --- a/attic/lrucache.py +++ b/attic/lrucache.py @@ -1,20 +1,19 @@ -class LRUCache(dict): - +class LRUCache: def __init__(self, capacity, dispose): - super(LRUCache, self).__init__() + self._cache = {} self._lru = [] self._capacity = capacity self._dispose = dispose def __setitem__(self, key, value): - assert key not in self, ( + assert key not in self._cache, ( "Unexpected attempt to replace a cached item." - " If this is intended, please delete or pop the old item first." - " The dispose function will be called on delete (but not pop).") + " If this is intended, please delete the old item first." + " The dispose function will be called on delete.") self._lru.append(key) while len(self._lru) > self._capacity: del self[self._lru[0]] - return super(LRUCache, self).__setitem__(key, value) + self._cache[key] = value def __getitem__(self, key): try: @@ -22,7 +21,7 @@ class LRUCache(dict): self._lru.append(key) except ValueError: pass - return super(LRUCache, self).__getitem__(key) + return self._cache[key] def __delitem__(self, key): try: @@ -30,23 +29,22 @@ class LRUCache(dict): except ValueError: pass error = KeyError(key) - removed = super(LRUCache, self).pop(key, error) + removed = self._cache.pop(key, error) if removed == error: raise error self._dispose(removed) - def pop(self, key, default=None): - try: - self._lru.remove(key) - except ValueError: - pass - return super(LRUCache, self).pop(key, default) + def __contains__(self, key): + return key in self._cache def clear(self): - for value in self.values(): + for value in self._cache.values(): self._dispose(value) - super(LRUCache, self).clear() + self._cache.clear() - def _not_implemented(self, *args, **kw): - raise NotImplementedError - popitem = setdefault = update = _not_implemented + # useful for testing + def items(self): + return self._cache.items() + + def __len__(self): + return len(self._cache) diff --git a/attic/testsuite/lrucache.py b/attic/testsuite/lrucache.py index 60ceb41c3..b0dd2856f 100644 --- a/attic/testsuite/lrucache.py +++ b/attic/testsuite/lrucache.py @@ -10,8 +10,7 @@ class LRUCacheTestCase(AtticTestCase): for i, x in enumerate('abc'): c[x] = i self.assert_equal(len(c), 2) - self.assert_equal(set(c), set(['b', 'c'])) - self.assert_equal(set(c.items()), set([('b', 1), ('c', 2)])) + self.assert_equal(c.items(), set([('b', 1), ('c', 2)])) self.assert_equal(False, 'a' in c) self.assert_equal(True, 'b' in c) self.assert_raises(KeyError, lambda: c['a']) @@ -25,10 +24,3 @@ class LRUCacheTestCase(AtticTestCase): self.assert_equal(len(c), 1) self.assert_raises(KeyError, lambda: c['c']) self.assert_equal(c['d'], 3) - - def test_pop(self): - c = LRUCache(2, dispose=lambda _: None) - c[1] = 1 - c[2] = 2 - c.pop(1) - c[3] = 3 From db298268e41bb61b801dc009a24a8687a561ed6c Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Thu, 13 Aug 2015 11:02:00 +0100 Subject: [PATCH 078/142] Cleanup error-raising in added code At least one programmer is confused by my abuse of KeyError() as a sentinel value. Let's call the sentinel value _NotFound instead, and let's avoid re-creating it on each call. I have a new favourite line of code, "if item is _NotFound" :). Thanks to @ThomasWaldmann for all these review suggestions. --- attic/lrucache.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/attic/lrucache.py b/attic/lrucache.py index e02e1a6d0..0e1a2eea5 100644 --- a/attic/lrucache.py +++ b/attic/lrucache.py @@ -1,3 +1,6 @@ +class _NotFound: + pass + class LRUCache: def __init__(self, capacity, dispose): self._cache = {} @@ -7,9 +10,8 @@ class LRUCache: def __setitem__(self, key, value): assert key not in self._cache, ( - "Unexpected attempt to replace a cached item." - " If this is intended, please delete the old item first." - " The dispose function will be called on delete.") + "Unexpected attempt to replace a cached item," + " without first deleting the old item.") self._lru.append(key) while len(self._lru) > self._capacity: del self[self._lru[0]] @@ -28,11 +30,10 @@ class LRUCache: self._lru.remove(key) except ValueError: pass - error = KeyError(key) - removed = self._cache.pop(key, error) - if removed == error: - raise error - self._dispose(removed) + item = self._cache.pop(key, _NotFound) + if item is _NotFound: + raise KeyError(key) + self._dispose(item) def __contains__(self, key): return key in self._cache From 76f6737e9daefc102d9d6a3b6eb0b56be5785935 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Fri, 14 Aug 2015 10:14:17 +0100 Subject: [PATCH 079/142] lrucache: cleanup (-10 lines) dict.pop() will raise KeyError for us if necessary. I was confused because we used to have lrucache.pop() with a bug, that returned None instead. Great catch by @ThomasWaldmann. --- attic/lrucache.py | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/attic/lrucache.py b/attic/lrucache.py index 0e1a2eea5..4d3ba73b7 100644 --- a/attic/lrucache.py +++ b/attic/lrucache.py @@ -1,6 +1,3 @@ -class _NotFound: - pass - class LRUCache: def __init__(self, capacity, dispose): self._cache = {} @@ -18,22 +15,15 @@ class LRUCache: self._cache[key] = value def __getitem__(self, key): - try: - self._lru.remove(key) - self._lru.append(key) - except ValueError: - pass - return self._cache[key] + value = self._cache[key] # raise KeyError if not found + self._lru.remove(key) + self._lru.append(key) + return value def __delitem__(self, key): - try: - self._lru.remove(key) - except ValueError: - pass - item = self._cache.pop(key, _NotFound) - if item is _NotFound: - raise KeyError(key) - self._dispose(item) + value = self._cache.pop(key) # raise KeyError if not found + self._dispose(value) + self._lru.remove(key) def __contains__(self, key): return key in self._cache From 0ee78240ee5d3036382cb89df0e7850db988e1a3 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Fri, 14 Aug 2015 11:59:26 +0100 Subject: [PATCH 080/142] lrucache: test added code Tests saved my butt, so I'd better contribute :). These tests have been tested - substituting a null dispose function causes an immediate failure. --- borg/testsuite/lrucache.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/borg/testsuite/lrucache.py b/borg/testsuite/lrucache.py index ee8de226b..2ad16c532 100644 --- a/borg/testsuite/lrucache.py +++ b/borg/testsuite/lrucache.py @@ -1,5 +1,6 @@ from ..lrucache import LRUCache from . import BaseTestCase +from tempfile import TemporaryFile class LRUCacheTestCase(BaseTestCase): @@ -24,3 +25,25 @@ class LRUCacheTestCase(BaseTestCase): self.assert_equal(len(c), 1) self.assert_raises(KeyError, lambda: c['c']) self.assert_equal(c['d'], 3) + c.clear() + self.assert_equal(c.items(), set()) + + def test_dispose(self): + c = LRUCache(2, dispose=lambda f: f.close()) + f1 = TemporaryFile() + f2 = TemporaryFile() + f3 = TemporaryFile() + c[1] = f1 + c[2] = f2 + self.assert_equal(False, f2.closed) + c[3] = f3 + self.assert_equal(False, 1 in c) + self.assert_equal(True, f1.closed) + self.assert_equal(True, 2 in c) + self.assert_equal(False, f2.closed) + del c[2] + self.assert_equal(False, 2 in c) + self.assert_equal(True, f2.closed) + c.clear() + self.assert_equal(c.items(), set()) + self.assert_equal(True, f3.closed) From 02b3fbb401f1818e977698b7b29732a6d77d007e Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Fri, 14 Aug 2015 14:44:32 +0100 Subject: [PATCH 081/142] lrucache: change test case to py.test I re-wrote lrucache (and it seems like no-one had looked at it much before :). I was told my test function would have been simpler in native py.test, so let's have a go converting it all. We can avoid any reference to unittest, because lrucache doesn't write files so it doesn't need any of our custom assertion helpers. --- borg/testsuite/lrucache.py | 57 ++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/borg/testsuite/lrucache.py b/borg/testsuite/lrucache.py index 2ad16c532..153250d8a 100644 --- a/borg/testsuite/lrucache.py +++ b/borg/testsuite/lrucache.py @@ -1,32 +1,35 @@ from ..lrucache import LRUCache -from . import BaseTestCase +import pytest from tempfile import TemporaryFile -class LRUCacheTestCase(BaseTestCase): +class TestLRUCache: - def test(self): + def test_lrucache(self): c = LRUCache(2, dispose=lambda _: None) - self.assert_equal(len(c), 0) + assert len(c) == 0 + assert c.items() == set() for i, x in enumerate('abc'): c[x] = i - self.assert_equal(len(c), 2) - self.assert_equal(c.items(), set([('b', 1), ('c', 2)])) - self.assert_equal(False, 'a' in c) - self.assert_equal(True, 'b' in c) - self.assert_raises(KeyError, lambda: c['a']) - self.assert_equal(c['b'], 1) - self.assert_equal(c['c'], 2) + assert len(c) == 2 + assert c.items() == set([('b', 1), ('c', 2)]) + assert 'a' not in c + assert 'b' in c + with pytest.raises(KeyError): + c['a'] + assert c['b'] == 1 + assert c['c'] == 2 c['d'] = 3 - self.assert_equal(len(c), 2) - self.assert_equal(c['c'], 2) - self.assert_equal(c['d'], 3) + assert len(c) == 2 + assert c['c'] == 2 + assert c['d'] == 3 del c['c'] - self.assert_equal(len(c), 1) - self.assert_raises(KeyError, lambda: c['c']) - self.assert_equal(c['d'], 3) + assert len(c) == 1 + with pytest.raises(KeyError): + c['c'] + assert c['d'] == 3 c.clear() - self.assert_equal(c.items(), set()) + assert c.items() == set() def test_dispose(self): c = LRUCache(2, dispose=lambda f: f.close()) @@ -35,15 +38,15 @@ class LRUCacheTestCase(BaseTestCase): f3 = TemporaryFile() c[1] = f1 c[2] = f2 - self.assert_equal(False, f2.closed) + assert not f2.closed c[3] = f3 - self.assert_equal(False, 1 in c) - self.assert_equal(True, f1.closed) - self.assert_equal(True, 2 in c) - self.assert_equal(False, f2.closed) + assert 1 not in c + assert f1.closed + assert 2 in c + assert not f2.closed del c[2] - self.assert_equal(False, 2 in c) - self.assert_equal(True, f2.closed) + assert 2 not in c + assert f2.closed c.clear() - self.assert_equal(c.items(), set()) - self.assert_equal(True, f3.closed) + assert not c.items() + assert f3.closed From a6b6712d6a312f8e839212ea246a896391c90abc Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 14 Aug 2015 23:00:04 +0200 Subject: [PATCH 082/142] deprecate the numeric --compression argument, rename null compression to none, update CHANGES --- CHANGES.rst | 32 ++++++++++++++++++++++++++++++++ borg/archiver.py | 14 +++++--------- borg/compress.pyx | 10 +++++----- borg/helpers.py | 12 +++--------- borg/key.py | 2 +- borg/testsuite/compress.py | 10 +++++----- borg/testsuite/helpers.py | 11 +++-------- 7 files changed, 54 insertions(+), 37 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index b333ba48a..7245371d4 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,38 @@ Borg Changelog ============== +Compression branch +------------------ + +Compatibility notes: + +- the new compression code is very compatible: as long as you stay with zlib + compression, older borg releases will still be able to read data from a + repo/archive made with the new code (note: this is not the case for the + default "none" compression, use "zlib,0" if you want a "no compression" mode + that can be read by older borg). Also the new code is able to read repos and + archives made with older borg versions (for all zlib levels 0..9). + +Deprecations: + +- --compression N (with N being a number, as in 0.24) is deprecated. + We keep the --compression 0..9 for now to not break scripts, but it is + deprecated and will be removed later, so better fix your scripts now: + --compression 0 (as in 0.24) is the same as --compression zlib,0 (now). + BUT: if you do not want compression, you rather want --compression none + (which is the default). + --compression 1 (in 0.24) is the same as --compression zlib,1 (now) + --compression 9 (in 0.24) is the same as --compression zlib,9 (now) + +New features: + +- create --compression none (default, means: do not compress, just pass through + data "as is". this is more efficient than zlib level 0 as used in borg 0.24) +- create --compression lz4 (super-fast, but not very high compression) + Please note that borgbackup needs lz4 library as additional requirement. +- create --compression zlib,N (slower, higher compression, default for N is 6) +- create --compression lzma,N (slowest, highest compression, default N is 6) + Version 0.24.0 -------------- diff --git a/borg/archiver.py b/borg/archiver.py index 768dc5361..1f0dc1d39 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -668,17 +668,13 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE', help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS) subparser.add_argument('-C', '--compression', dest='compression', - type=CompressionSpec, default=dict(name='null'), metavar='COMPRESSION', - help='select compression algorithm and level, by giving a number: ' - '0 == no compression [default], ' - '1..9 == zlib level 1..9, ' - '10 == lz4, ' - '20-29 == lzma level 0..9.' - 'Alternatively, you can also give a name and optionally additional args: ' - 'null == no compression, ' + type=CompressionSpec, default=dict(name='none'), metavar='COMPRESSION', + help='select compression algorithm (and level): ' + 'none == no compression (default), ' + 'lz4 == lz4, ' 'zlib == zlib (default level 6), ' 'zlib,0 .. zlib,9 == zlib (with level 0..9), ' - 'lz4 == lz4, ' + 'lzma == lzma (default level 6), ' 'lzma,0 .. lzma,9 == lzma (with level 0..9).') subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), diff --git a/borg/compress.pyx b/borg/compress.pyx index c1bdeff82..2285b55d8 100644 --- a/borg/compress.pyx +++ b/borg/compress.pyx @@ -35,12 +35,12 @@ cdef class CompressorBase: return data[2:] -class CNULL(CompressorBase): +class CNONE(CompressorBase): """ - null compression, just pass through data + none - no compression, just pass through data """ ID = b'\x00\x00' - name = 'null' + name = 'none' def compress(self, data): return super().compress(data) @@ -161,12 +161,12 @@ class ZLIB(CompressorBase): COMPRESSOR_TABLE = { - CNULL.name: CNULL, + CNONE.name: CNONE, LZ4.name: LZ4, ZLIB.name: ZLIB, LZMA.name: LZMA, } -COMPRESSOR_LIST = [LZ4, CNULL, ZLIB, LZMA, ] # check fast stuff first +COMPRESSOR_LIST = [LZ4, CNONE, ZLIB, LZMA, ] # check fast stuff first def get_compressor(name, **kwargs): cls = COMPRESSOR_TABLE[name] diff --git a/borg/helpers.py b/borg/helpers.py index 020c263e7..8643166f6 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -295,20 +295,14 @@ def CompressionSpec(s): compression = int(compression) if count > 1: raise ValueError - # it is just --compression N - if compression == 0: - return dict(name='null') - if 1 <= compression <= 9: + # DEPRECATED: it is just --compression N + if 0 <= compression <= 9: return dict(name='zlib', level=compression) - if compression == 10: - return dict(name='lz4') - if 20 <= compression <= 29: - return dict(name='lzma', level=compression-20) raise ValueError except ValueError: # --compression algo[,...] name = compression - if name in ('null', 'lz4', ): + if name in ('none', 'lz4', ): return dict(name=name) if name in ('zlib', 'lzma', ): if count < 2: diff --git a/borg/key.py b/borg/key.py index fcf083586..7067a4454 100644 --- a/borg/key.py +++ b/borg/key.py @@ -68,7 +68,7 @@ class KeyBase: self.TYPE_STR = bytes([self.TYPE]) self.repository = repository self.target = None # key location file path / repo obj - self.compressor = Compressor('null', buffer=COMPR_BUFFER) + self.compressor = Compressor('none', buffer=COMPR_BUFFER) def id_hash(self, data): """Return HMAC hash using the "id" HMAC key diff --git a/borg/testsuite/compress.py b/borg/testsuite/compress.py index 6d7319c1b..8019925b2 100644 --- a/borg/testsuite/compress.py +++ b/borg/testsuite/compress.py @@ -6,7 +6,7 @@ except ImportError: import pytest -from ..compress import get_compressor, Compressor, CNULL, ZLIB, LZ4 +from ..compress import get_compressor, Compressor, CNONE, ZLIB, LZ4 buffer = bytes(2**16) @@ -15,8 +15,8 @@ params = dict(name='zlib', level=6, buffer=buffer) def test_get_compressor(): - c = get_compressor(name='null') - assert isinstance(c, CNULL) + c = get_compressor(name='none') + assert isinstance(c, CNONE) c = get_compressor(name='lz4', buffer=buffer) assert isinstance(c, LZ4) c = get_compressor(name='zlib') @@ -26,7 +26,7 @@ def test_get_compressor(): def test_cnull(): - c = get_compressor(name='null') + c = get_compressor(name='none') cdata = c.compress(data) assert len(cdata) > len(data) assert data in cdata # it's not compressed and just in there 1:1 @@ -83,7 +83,7 @@ def test_zlib_compat(): def test_compressor(): params_list = [ - dict(name='null', buffer=buffer), + dict(name='none', buffer=buffer), dict(name='lz4', buffer=buffer), dict(name='zlib', level=0, buffer=buffer), dict(name='zlib', level=6, buffer=buffer), diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index bb2400a94..76bafb5b7 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -108,17 +108,12 @@ class PatternTestCase(BaseTestCase): def test_compression_specs(): with pytest.raises(ValueError): CompressionSpec('') - assert CompressionSpec('0') == dict(name='null') + assert CompressionSpec('0') == dict(name='zlib', level=0) assert CompressionSpec('1') == dict(name='zlib', level=1) assert CompressionSpec('9') == dict(name='zlib', level=9) - assert CompressionSpec('10') == dict(name='lz4') with pytest.raises(ValueError): - CompressionSpec('11') - assert CompressionSpec('20') == dict(name='lzma', level=0) - assert CompressionSpec('29') == dict(name='lzma', level=9) - with pytest.raises(ValueError): - CompressionSpec('30') - assert CompressionSpec('null') == dict(name='null') + CompressionSpec('10') + assert CompressionSpec('none') == dict(name='none') assert CompressionSpec('lz4') == dict(name='lz4') assert CompressionSpec('zlib') == dict(name='zlib', level=6) assert CompressionSpec('zlib,0') == dict(name='zlib', level=0) From 1d16e7a37c74aa965772b0867f0277d2aca08388 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 15 Aug 2015 15:45:15 +0200 Subject: [PATCH 083/142] compression: update / refine docs --- README.rst | 3 ++- docs/internals.rst | 18 ++++++++++++++---- docs/quickstart.rst | 6 ++++-- docs/support.rst | 3 +++ docs/usage.rst | 3 +-- 5 files changed, 24 insertions(+), 9 deletions(-) diff --git a/README.rst b/README.rst index 3d27de85c..8180fd2ab 100644 --- a/README.rst +++ b/README.rst @@ -51,7 +51,8 @@ Main features authenticity is verified using HMAC-SHA256. **Compression** - All data can be compressed by lz4, zlib or lzma. + All data can be compressed by lz4 (super fast, low compression), zlib + (medium speed and compression) or lzma (low speed, high compression). **Off-site backups** Borg can store data on any remote host accessible over SSH. If Borg is diff --git a/docs/internals.rst b/docs/internals.rst index 0ea68098b..845dff131 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -386,19 +386,29 @@ Compression - none (no compression, pass through data 1:1) - lz4 (low compression, but super fast) -- zlib (level 1-9, level 1 is low, level 9 is high compression) -- lzma (level 0-9, level 0 is low, level 9 is high compression. +- zlib (level 0-9, level 0 is no compression [but still adding zlib overhead], + level 1 is low, level 9 is high compression) +- lzma (level 0-9, level 0 is low, level 9 is high compression). Speed: none > lz4 > zlib > lzma Compression: lzma > zlib > lz4 > none +Be careful, higher zlib and especially lzma compression levels might take a +lot of resources (CPU and memory). + The overall speed of course also depends on the speed of your target storage. If that is slow, using a higher compression level might yield better overall performance. You need to experiment a bit. Maybe just watch your CPU load, if that is relatively low, increase compression until 1 core is 70-100% loaded. -Be careful, higher zlib and especially lzma compression levels might take a -lot of resources (CPU and memory). +Even if your target storage is rather fast, you might see interesting effects: +while doing no compression at all (none) is a operation that takes no time, it +likely will need to store more data to the storage compared to using lz4. +The time needed to transfer and store the additional data might be much more +than if you had used lz4 (which is super fast, but still might compress your +data about 2:1). This is assuming your data is compressible (if you backup +already compressed data, trying to compress them at backup time is usually +pointless). Compression is applied after deduplication, thus using different compression methods in one repo does not influence deduplication. diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 9abe4fb6a..4b78fefbb 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -101,11 +101,13 @@ If you have a quick repo storage and you want a little compression: $ borg create --compression lz4 /mnt/backup::repo ~ -If you have a medium fast repo storage and you want a bit more compression (N=0..9): +If you have a medium fast repo storage and you want a bit more compression (N=0..9, +0 means no compression, 9 means high compression): $ borg create --compression zlib,N /mnt/backup::repo ~ -If you have a very slow repo storage and you want high compression (N=0..9): +If you have a very slow repo storage and you want high compression (N=0..9, 0 means +low compression, 9 means high compression): $ borg create --compression lzma,N /mnt/backup::repo ~ diff --git a/docs/support.rst b/docs/support.rst index 5e953f202..f53c01285 100644 --- a/docs/support.rst +++ b/docs/support.rst @@ -4,6 +4,9 @@ Support ======= +Please first read the docs and the FAQ section in the docs, a lot of stuff is +documented / explained there. + Issue Tracker ------------- diff --git a/docs/usage.rst b/docs/usage.rst index a68d67c3f..c4e2fa80f 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -76,8 +76,7 @@ Resource Usage |project_name| might use a lot of resources depending on the size of the data set it is dealing with. CPU: it won't go beyond 100% of 1 core as the code is currently single-threaded. - Especially higher zlib and lzma compression uses significant amounts of CPU - cycles. + Especially higher zlib and lzma compression levels use significant amounts of CPU cycles. Memory (RAM): the chunks index and the files index are read into memory for performance reasons. compression, esp. lzma compression with high levels might need substantial amounts From e1de3dce7b7981ec15d58a40eaf17532fa346125 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 15 Aug 2015 15:49:11 +0200 Subject: [PATCH 084/142] integrate compression branch changes into change history for 0.25 --- CHANGES.rst | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index d33f28ddb..439ee4c37 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,8 +1,9 @@ Borg Changelog ============== -Compression branch ------------------- + +Version 0.25.0 (not released yet) +--------------------------------- Compatibility notes: @@ -24,6 +25,7 @@ Deprecations: --compression 1 (in 0.24) is the same as --compression zlib,1 (now) --compression 9 (in 0.24) is the same as --compression zlib,9 (now) + New features: - create --compression none (default, means: do not compress, just pass through @@ -32,21 +34,6 @@ New features: Please note that borgbackup needs lz4 library as additional requirement. - create --compression zlib,N (slower, higher compression, default for N is 6) - create --compression lzma,N (slowest, highest compression, default N is 6) - - -Version 0.25.0 (not released yet) ---------------------------------- - -Incompatible changes (compared to 0.24): - -- none yet - -Deprecations: - -- none yet - -New features: - - honor the nodump flag (UF_NODUMP) and do not backup such items Bug fixes: From e5b647fbd1b336c21392e2bcab74a92e618243f8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 15 Aug 2015 16:15:10 +0200 Subject: [PATCH 085/142] minor lrucache test fix --- borg/testsuite/lrucache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/testsuite/lrucache.py b/borg/testsuite/lrucache.py index 153250d8a..2ed2ef9b3 100644 --- a/borg/testsuite/lrucache.py +++ b/borg/testsuite/lrucache.py @@ -48,5 +48,5 @@ class TestLRUCache: assert 2 not in c assert f2.closed c.clear() - assert not c.items() + assert c.items() == set() assert f3.closed From 738ed5d91b9c2ae35e8ad4f4f1e6738fb3b6d31d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 15 Aug 2015 17:07:09 +0200 Subject: [PATCH 086/142] 2 small archiver testsuite fixes environment context manager: if a env var was not present before, it should not be present afterwards teardown: cd out of the tmpdir before deleting it --- borg/testsuite/archiver.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 2ed2f7821..1f1be2bf0 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -58,7 +58,9 @@ class environment_variable: def __exit__(self, *args, **kw): for k, v in self.old_values.items(): - if v is not None: + if v is None: + del os.environ[k] + else: os.environ[k] = v @@ -89,8 +91,8 @@ class ArchiverTestCaseBase(BaseTestCase): os.chdir(self.tmpdir) def tearDown(self): - shutil.rmtree(self.tmpdir) os.chdir(self._old_wd) + shutil.rmtree(self.tmpdir) def cmd(self, *args, **kw): exit_code = kw.get('exit_code', 0) From 608c0935e09c64ecd477d0839986d88d36bc0f35 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 15 Aug 2015 20:52:14 +0200 Subject: [PATCH 087/142] borg list --short, remove requirement for fakeroot, xfail a test borg list --short just spills out the list of files / dirs - better for some tests and also useful on the commandline for interactive use. the tests previously needed fakeroot because in the test setup it always made calls to mknod and chown, which require (fake)root. now, the tests adapt to whether it detects (fake)root or not - to run the the tests completely, you still need fakeroot, but it won't fail all the archiver tests just due to failing test setup. also, a test not working correctly due to fakeroot was found: it should detect whether a read-only repo is usable, but it failed to do that because with (fake)root, there is no "read only" (at least not via taking away the w permission bits). --- borg/archiver.py | 57 +++++++++++++++++++++----------------- borg/testsuite/archiver.py | 52 +++++++++++++++++++++++++++------- 2 files changed, 74 insertions(+), 35 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 3f4876943..166050dcd 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -337,34 +337,38 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") repository = self.open_repository(args.src) manifest, key = Manifest.load(repository) if args.src.archive: - tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 0o10: '-', 0o12: 'l', 0o14: 's'} archive = Archive(repository, key, manifest, args.src.archive) - for item in archive.iter_items(): - type = tmap.get(item[b'mode'] // 4096, '?') - mode = format_file_mode(item[b'mode']) - size = 0 - if type == '-': + if args.short: + for item in archive.iter_items(): + print(remove_surrogates(item[b'path'])) + else: + tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 0o10: '-', 0o12: 'l', 0o14: 's'} + for item in archive.iter_items(): + type = tmap.get(item[b'mode'] // 4096, '?') + mode = format_file_mode(item[b'mode']) + size = 0 + if type == '-': + try: + size = sum(size for _, size, _ in item[b'chunks']) + except KeyError: + pass try: - size = sum(size for _, size, _ in item[b'chunks']) - except KeyError: - pass - try: - mtime = datetime.fromtimestamp(bigint_to_int(item[b'mtime']) / 1e9) - except ValueError: - # likely a broken mtime and datetime did not want to go beyond year 9999 - mtime = datetime(9999, 12, 31, 23, 59, 59) - if b'source' in item: - if type == 'l': - extra = ' -> %s' % item[b'source'] + mtime = datetime.fromtimestamp(bigint_to_int(item[b'mtime']) / 1e9) + except ValueError: + # likely a broken mtime and datetime did not want to go beyond year 9999 + mtime = datetime(9999, 12, 31, 23, 59, 59) + if b'source' in item: + if type == 'l': + extra = ' -> %s' % item[b'source'] + else: + type = 'h' + extra = ' link to %s' % item[b'source'] else: - type = 'h' - extra = ' link to %s' % item[b'source'] - else: - extra = '' - print('%s%s %-6s %-6s %8d %s %s%s' % ( - type, mode, item[b'user'] or item[b'uid'], - item[b'group'] or item[b'gid'], size, format_time(mtime), - remove_surrogates(item[b'path']), extra)) + extra = '' + print('%s%s %-6s %-6s %8d %s %s%s' % ( + type, mode, item[b'user'] or item[b'uid'], + item[b'group'] or item[b'gid'], size, format_time(mtime), + remove_surrogates(item[b'path']), extra)) else: for archive_info in manifest.list_archive_infos(sort_by='ts'): print(format_archive(archive_info)) @@ -766,6 +770,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") epilog=list_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) subparser.set_defaults(func=self.do_list) + subparser.add_argument('--short', dest='short', + action='store_true', default=False, + help='only print file/directory names, nothing else') subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', type=location_validator(), help='repository/archive to list contents of') mount_epilog = textwrap.dedent(""" diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 1f1be2bf0..e635d1b0c 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -12,6 +12,7 @@ import unittest from hashlib import sha256 from mock import patch +import pytest from .. import xattr from ..archive import Archive, ChunkBuffer, CHUNK_MAX_EXP @@ -33,6 +34,12 @@ has_lchflags = hasattr(os, 'lchflags') src_dir = os.path.join(os.getcwd(), os.path.dirname(__file__), '..') +# Python <= 3.2 raises OSError instead of PermissionError (See #164) +try: + PermissionError = PermissionError +except NameError: + PermissionError = OSError + class changedir: def __init__(self, dir): @@ -154,15 +161,8 @@ class ArchiverTestCase(ArchiverTestCaseBase): self.create_regular_file('flagfile', size=1024) # Directory self.create_regular_file('dir2/file2', size=1024 * 80) - # File owner - os.chown('input/file1', 100, 200) # File mode os.chmod('input/file1', 0o7755) - os.chmod('input/dir2', 0o555) - # Block device - os.mknod('input/bdev', 0o600 | stat.S_IFBLK, os.makedev(10, 20)) - # Char device - os.mknod('input/cdev', 0o600 | stat.S_IFCHR, os.makedev(30, 40)) # Hard link os.link(os.path.join(self.input_path, 'file1'), os.path.join(self.input_path, 'hardlink')) @@ -180,20 +180,50 @@ class ArchiverTestCase(ArchiverTestCaseBase): os.mkfifo(os.path.join(self.input_path, 'fifo1')) if has_lchflags: os.lchflags(os.path.join(self.input_path, 'flagfile'), stat.UF_NODUMP) + try: + # Block device + os.mknod('input/bdev', 0o600 | stat.S_IFBLK, os.makedev(10, 20)) + # Char device + os.mknod('input/cdev', 0o600 | stat.S_IFCHR, os.makedev(30, 40)) + # File mode + os.chmod('input/dir2', 0o555) # if we take away write perms, we need root to remove contents + # File owner + os.chown('input/file1', 100, 200) + have_root = True # we have (fake)root + except PermissionError: + have_root = False + return have_root def test_basic_functionality(self): - self.create_test_files() + have_root = self.create_test_files() self.cmd('init', self.repository_location) self.cmd('create', self.repository_location + '::test', 'input') self.cmd('create', '--stats', self.repository_location + '::test.2', 'input') with changedir('output'): self.cmd('extract', self.repository_location + '::test') self.assert_equal(len(self.cmd('list', self.repository_location).splitlines()), 2) - item_count = 10 if has_lchflags else 11 # one file is UF_NODUMP - self.assert_equal(len(self.cmd('list', self.repository_location + '::test').splitlines()), item_count) + expected = [ + 'input', + 'input/bdev', + 'input/cdev', + 'input/dir2', + 'input/dir2/file2', + 'input/empty', + 'input/fifo1', + 'input/file1', + 'input/flagfile', + 'input/hardlink', + 'input/link1', + ] + if not have_root: + # we could not create these device files without (fake)root + expected.remove('input/bdev') + expected.remove('input/cdev') if has_lchflags: # remove the file we did not backup, so input and output become equal + expected.remove('input/flagfile') # this file is UF_NODUMP os.remove(os.path.join('input', 'flagfile')) + self.assert_equal(self.cmd('list', '--short', self.repository_location + '::test').splitlines(), expected) self.assert_dirs_equal('input', 'output/input') info_output = self.cmd('info', self.repository_location + '::test') item_count = 3 if has_lchflags else 4 # one file is UF_NODUMP @@ -436,6 +466,8 @@ class ArchiverTestCase(ArchiverTestCaseBase): fd.write(b'XXXX') self.cmd('check', self.repository_location, exit_code=1) + # we currently need to be able to create a lock directory inside the repo: + @pytest.mark.xfail(reason="we need to be able to create the lock directory inside the repo") def test_readonly_repository(self): self.cmd('init', self.repository_location) self.create_src_archive('test') From b180158876a67a60b74c60e9d3441d3094edc9f5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 16 Aug 2015 14:51:15 +0200 Subject: [PATCH 088/142] generalize hashindex code for any key length currently, we only use sha256 hashes as key, so key length is always 32. but instead of hardcoding 32 everywhere, using key_length is just better readable and also more flexible for the future. --- borg/hashindex.pyx | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index 83416bcdf..6652e057f 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -32,9 +32,10 @@ cimport cython @cython.internal cdef class IndexBase: cdef HashIndex *index - key_size = 32 + cdef int key_size - def __cinit__(self, capacity=0, path=None): + def __cinit__(self, capacity=0, path=None, key_size=32): + self.key_size = key_size if path: self.index = hashindex_read(os.fsencode(path)) if not self.index: @@ -67,7 +68,7 @@ cdef class IndexBase: self[key] = value def __delitem__(self, key): - assert len(key) == 32 + assert len(key) == self.key_size if not hashindex_delete(self.index, key): raise Exception('hashindex_delete failed') @@ -96,14 +97,14 @@ cdef class NSIndex(IndexBase): value_size = 8 def __getitem__(self, key): - assert len(key) == 32 + assert len(key) == self.key_size data = hashindex_get(self.index, key) if not data: raise KeyError return _le32toh(data[0]), _le32toh(data[1]) def __setitem__(self, key, value): - assert len(key) == 32 + assert len(key) == self.key_size cdef int[2] data data[0] = _htole32(value[0]) data[1] = _htole32(value[1]) @@ -111,20 +112,20 @@ cdef class NSIndex(IndexBase): raise Exception('hashindex_set failed') def __contains__(self, key): - assert len(key) == 32 + assert len(key) == self.key_size data = hashindex_get(self.index, key) return data != NULL def iteritems(self, marker=None): cdef const void *key - iter = NSKeyIterator() + iter = NSKeyIterator(self.key_size) iter.idx = self iter.index = self.index if marker: key = hashindex_get(self.index, marker) if marker is None: raise IndexError - iter.key = key - 32 + iter.key = key - self.key_size return iter @@ -132,9 +133,11 @@ cdef class NSKeyIterator: cdef NSIndex idx cdef HashIndex *index cdef const void *key + cdef int key_size - def __cinit__(self): + def __cinit__(self, key_size): self.key = NULL + self.key_size = key_size def __iter__(self): return self @@ -143,8 +146,8 @@ cdef class NSKeyIterator: self.key = hashindex_next_key(self.index, self.key) if not self.key: raise StopIteration - cdef int *value = (self.key + 32) - return (self.key)[:32], (_le32toh(value[0]), _le32toh(value[1])) + cdef int *value = (self.key + self.key_size) + return (self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1])) cdef class ChunkIndex(IndexBase): @@ -152,14 +155,14 @@ cdef class ChunkIndex(IndexBase): value_size = 12 def __getitem__(self, key): - assert len(key) == 32 + assert len(key) == self.key_size data = hashindex_get(self.index, key) if not data: raise KeyError return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2]) def __setitem__(self, key, value): - assert len(key) == 32 + assert len(key) == self.key_size cdef int[3] data data[0] = _htole32(value[0]) data[1] = _htole32(value[1]) @@ -168,20 +171,20 @@ cdef class ChunkIndex(IndexBase): raise Exception('hashindex_set failed') def __contains__(self, key): - assert len(key) == 32 + assert len(key) == self.key_size data = hashindex_get(self.index, key) return data != NULL def iteritems(self, marker=None): cdef const void *key - iter = ChunkKeyIterator() + iter = ChunkKeyIterator(self.key_size) iter.idx = self iter.index = self.index if marker: key = hashindex_get(self.index, marker) if marker is None: raise IndexError - iter.key = key - 32 + iter.key = key - self.key_size return iter def summarize(self): @@ -199,9 +202,11 @@ cdef class ChunkKeyIterator: cdef ChunkIndex idx cdef HashIndex *index cdef const void *key + cdef int key_size - def __cinit__(self): + def __cinit__(self, key_size): self.key = NULL + self.key_size = key_size def __iter__(self): return self @@ -210,5 +215,5 @@ cdef class ChunkKeyIterator: self.key = hashindex_next_key(self.index, self.key) if not self.key: raise StopIteration - cdef int *value = (self.key + 32) - return (self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2])) + cdef int *value = (self.key + self.key_size) + return (self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2])) From 1c7b5b23f4b6ee22255554d9e925ea934e99f38a Mon Sep 17 00:00:00 2001 From: Per Guth Date: Mon, 17 Aug 2015 11:31:42 +0200 Subject: [PATCH 089/142] minor change in copyright notice --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 5962d1cab..d327e2c5a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,7 @@ master_doc = 'index' # General information about the project. project = 'Borg - Deduplicating Archiver' -copyright = '2010-2014, Jonas Borgström, 2015 The Borg Collective (see AUTHORS file)' +copyright = '2010-2014 Jonas Borgström, 2015 The Borg Collective (see AUTHORS file)' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the From 93a89d97faed2f5091ad7d229f15049c63c3ee46 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 17 Aug 2015 11:50:47 +0200 Subject: [PATCH 090/142] ChunkerParams: fix parameter order the parser for the --chunker-params argument had a wrong parameter order. fixed the order so it conforms to the help text and the docs. also added some tests for it and a text for the ValueError exception. --- borg/helpers.py | 6 +++--- borg/testsuite/helpers.py | 9 ++++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index 8643166f6..6d2b81736 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -277,12 +277,12 @@ def timestamp(s): def ChunkerParams(s): - window_size, chunk_mask, chunk_min, chunk_max = s.split(',') + chunk_min, chunk_max, chunk_mask, window_size = s.split(',') if int(chunk_max) > 23: # do not go beyond 2**23 (8MB) chunk size now, # COMPR_BUFFER can only cope with up to this size - raise ValueError - return int(window_size), int(chunk_mask), int(chunk_min), int(chunk_max) + raise ValueError('max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)') + return int(chunk_min), int(chunk_max), int(chunk_mask), int(window_size) def CompressionSpec(s): diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 76bafb5b7..b61a8268f 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -7,7 +7,7 @@ import msgpack from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \ prune_within, prune_split, \ - StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec + StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams from . import BaseTestCase @@ -129,6 +129,13 @@ def test_compression_specs(): CompressionSpec('invalid') +def test_chunkerparams(): + assert ChunkerParams('19,23,21,4095') == (19, 23, 21, 4095) + assert ChunkerParams('10,23,16,4095') == (10, 23, 16, 4095) + with pytest.raises(ValueError): + ChunkerParams('19,24,21,4095') + + class MakePathSafeTestCase(BaseTestCase): def test(self): From 8cf0ead693600caa0d336545fe3261f689053714 Mon Sep 17 00:00:00 2001 From: Per Guth Date: Mon, 17 Aug 2015 12:53:15 +0200 Subject: [PATCH 091/142] docs: added `favicon.ico` --- docs/_themes/local/static/favicon.ico | Bin 0 -> 38926 bytes docs/conf.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 docs/_themes/local/static/favicon.ico diff --git a/docs/_themes/local/static/favicon.ico b/docs/_themes/local/static/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..5db148ff4032bd193869b586db71df7ef3fa93b8 GIT binary patch literal 38926 zcmeI5O^g&p6vu0JKo$f=7C{&J5CjoLQ4n2SSlGqgb$3A)#m^?e5WW&T5j>Ibb>u>V z7fyr(AtaDMLc(XrgaamAxIqFT*`s>$=*ji}ukN>7+jMo$%+9dG@GALJHPx?Qzxws7 z?w(#!V@%HE&8AHT^*l567h_f$gU=24oHGLxjOj#QxV_Q*Ip3H?`;1vk8|XB|{VGs4 z!pCpWAJE^>V4aF%;}|(sj=2(LGMpTN{(|b2Th_QZb}}G~a>~!x>3QZC=Svo3Qc7V? z$NlK%kO|ooQ+|h$GD~MbMrlR!FrA;9U%tHL+;{D?oy{YDz2v<}?>wr-n;o-GHILk70;#K-#I+3 zT&U;wHcyM89;f(utE`lj=78pa=78pa=78pa=78pa=78pa=78pa=78pa=78pa=78ov z={Yb1e^V(yk2BP(e|z!%{iz9TZe{Ru`SvNxM3${3FchBamE{vGr-I39kJ`|G6B2%I z-Hv{}h8D*l2Uh1j9QS2#Y%Iu!SH zAZ91WK9gV@-#6)VQcN7{ENHAaa!fv!N%0DUB#zv61LA$a9e*V`ro}{-&Cponqi~6{ zOpBNM_&6r9tQ5zrn0V}tDsfy0zh@)gJWtd;Ncg$+e)Km$%%yD}o7?E$3(bbMK*u20 zhqO88r$F>oDUQCFuHYEILcIRXhpLf}jqvNvF}9CDGG4dloE#t7^V;QZllF;dyZBt; zv7`Om$hJ}(eKB!75)X4$IS%s>E^e(HlXII~Tuxc-vE(=nvGV!feQrI8&oWQl`Z)UY z5Xa+-xtjC39mfq4v^*oGr!lXyo*#uH*-nh&9Se@UuAPG<9=8@wzH=+v8F#g=kHvCa z=`d7|BhTj@uql?~eb_E_So+R=wdQrL=VK|)Jy*eTT*PuKYIkl)`vLkdKue&bknjB! zpXnw;%XXFuj+DIDGB zN9M;4k6*^s0$uZLXFu<65^oyE4v!BRJM&+jX?-z~$}iQDDEpMrZkQ{9v?}5(>Rv<{W|G5_1>4pvDo|dA)L2-3FodJnU0pD zG7PUDqv5Cwldr+`INx+kdagAGGzT;XGzT;XGzT;XGzT;XGzT;XGzT;XYCQ*Z&6LsS zsH~}5>*u=G*d0>uBn_ zuI1$;bJ1Pbl{u;F442L_zRoZ?vgo?k#k2ISdmWNAM%Q7_W|_4PyRg)}^s~&$Lt!X2 z=e|A<4f7-m-}O+K`D)&acwc{>d2;Mz5Wg>@tE?hxJJ z1^b`G?5DPZ`A_zoTQL7hdqTndBkg>_3`jdSvR#mVELd=GsJ+J99x~B}5~u?L2ZzSs z&-ucT&B(bkApMwA?p$-{pgTAD<Ul)!UDU~s75`Y=?q9imVU z%Hi3}fsyz3#`G&1Z~qNY9vTOYhq%i`XcEM4T2r8@&@`wCnhwo?W9?NA5Q33Wp~P%qR6t%v%d0NMg=g|BABQ~yorTUp=b;PGMd%WA z8M*>pg|0!@p&QUm=oVCfZbNsVyU;!8KJ);32t9%xLt2!85>Nt4KnW-TC7=Y9fD%vw zbxz>ruY;((U8#{y*(5H5U~z?<&KY(Y1n*SF>5KDhd9&a8=J(@@m0Xwg$?w%+S!_RV zemxq-=lQTZx11fbH{hTCm)O-}4c3KQqkpZ@=4n|zgV1T{GV~d2zUb=e`p%B;(+2atZG7`_(|fCJYr~|3I76epPgcI_@%aJ~ zwOR)o$D4Jwu?AhUY%7?a;4?JBn+_k(XT|5_IY{5NcA#zf^s&!>m!&x}N*TN4XArEN zU{ZbNI_;5LOY!*z&P>`lD#z#duT{#YbKcjFGfVC8e(0XLD!iKYO_W?}{<7ysyYYj_ zsC7wC&ri08@7cl<&3DHq@-w&0TFE@xfLD0YJX#QL2-b*E>x!u7E%9hfqED0i&|2DS zNbnV0iqFP&yP=ma46Tv)r0^0~-%ME^uS zN6vTGpp16pb2vXgi>{z$_>x`XJ}cl-&XL4f36~PC)=EGLC;=s)1eAahPy$Lo2`GWu GBk(`O$unC3 literal 0 HcmV?d00001 diff --git a/docs/conf.py b/docs/conf.py index d327e2c5a..772d88498 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -116,7 +116,7 @@ html_theme_path = ['_themes'] # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +html_favicon = 'favicon.ico' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, From 0926c2821f1d255737c32340279e4687739defbf Mon Sep 17 00:00:00 2001 From: Per Guth Date: Mon, 17 Aug 2015 17:10:37 +0200 Subject: [PATCH 092/142] docs: installation: korora/fedora -> `lz4-devel` --- docs/installation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/installation.rst b/docs/installation.rst index d08863b7f..da79d88f3 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -112,7 +112,7 @@ Some of the steps detailled below might be useful also for non-git installs. sudo dnf install libacl-devel libacl # lz4 super fast compression support Headers + Library - sudo dnf install lz4 + sudo dnf install lz4-devel # optional: FUSE support - to mount backup archives sudo dnf install fuse-devel fuse From d3d78f7ae344e06a0182ded20be65522b9607465 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 20 Aug 2015 05:33:51 +0200 Subject: [PATCH 093/142] call fadvise DONTNEED for the byterange we actually have read, fixes #158 avoid throwing away potential readahead data the OS might have read into the cache. --- borg/_chunker.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 4db21b75b..5e599ed89 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -128,6 +128,7 @@ static int chunker_fill(Chunker *c) { ssize_t n; + size_t offset, length; PyObject *data; memmove(c->data, c->data + c->last, c->position + c->remaining - c->last); c->position -= c->last; @@ -137,6 +138,7 @@ chunker_fill(Chunker *c) return 1; } if(c->fh >= 0) { + offset = c->bytes_read; // if we have a os-level file descriptor, use os-level API n = read(c->fh, c->data + c->position + c->remaining, n); if(n > 0) { @@ -151,13 +153,16 @@ chunker_fill(Chunker *c) // some error happened return 0; } + length = c->bytes_read - offset; #if ( _XOPEN_SOURCE >= 600 || _POSIX_C_SOURCE >= 200112L ) - // We tell the OS that we do not need the data of this file any more - // that it maybe has in the cache. This avoids that we spoil the + // We tell the OS that we do not need the data that we just have read any + // more (that it maybe has in the cache). This avoids that we spoil the // complete cache with data that we only read once and (due to cache // size limit) kick out data from the cache that might be still useful // for the OS or other processes. - posix_fadvise(c->fh, (off_t) 0, (off_t) 0, POSIX_FADV_DONTNEED); + if (length > 0) { + posix_fadvise(c->fh, (off_t) offset, (off_t) length, POSIX_FADV_DONTNEED); + } #endif } else { From 7c6f3ece662709ab596469d173ec5f4826e601ce Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Thu, 20 Aug 2015 15:55:12 +0100 Subject: [PATCH 094/142] Initialize chunker fd to -1, so it's not equal to STDIN_FILENO (0) --- borg/_chunker.c | 1 + 1 file changed, 1 insertion(+) diff --git a/borg/_chunker.c b/borg/_chunker.c index 5e599ed89..9dbed1fa5 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -96,6 +96,7 @@ chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32 c->table = buzhash_init_table(seed); c->buf_size = max_size; c->data = malloc(c->buf_size); + c->fh = -1; return c; } From ce3e67cb96f5a189a2f93d5d4847d7dd4b5aea78 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Thu, 20 Aug 2015 17:19:48 +0100 Subject: [PATCH 095/142] chunker - fix 4GB files on 32-bit systems From code inspection - effect not actually tested. --- borg/_chunker.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 9dbed1fa5..8242a2243 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -83,7 +83,8 @@ typedef struct { PyObject *fd; int fh; int done, eof; - size_t remaining, bytes_read, bytes_yielded, position, last; + size_t remaining, position, last; + off_t bytes_read, bytes_yielded; } Chunker; static Chunker * From 0a2bd8dad557bd76d2d684db8f30f9e9d48bafb6 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 20 Aug 2015 18:40:24 +0200 Subject: [PATCH 096/142] lock roster: catch file not found in remove() method and ignore it --- borg/locking.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/borg/locking.py b/borg/locking.py index 9bed13e19..8e4f1a41f 100644 --- a/borg/locking.py +++ b/borg/locking.py @@ -176,7 +176,11 @@ class LockRoster: json.dump(data, f) def remove(self): - os.unlink(self.path) + try: + os.unlink(self.path) + except OSError as e: + if e.errno != errno.ENOENT: + raise def get(self, key): roster = self.load() From 59a44296e4e6aace3c0fe0154fc1a27a7a75bee6 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Thu, 20 Aug 2015 17:48:59 +0100 Subject: [PATCH 097/142] chunker - cast from size_t to off_t can now be removed Sorry, this should really have been part of the previous commit - it's why I noticed a problem. --- borg/_chunker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 8242a2243..23abf1e6c 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -163,7 +163,7 @@ chunker_fill(Chunker *c) // size limit) kick out data from the cache that might be still useful // for the OS or other processes. if (length > 0) { - posix_fadvise(c->fh, (off_t) offset, (off_t) length, POSIX_FADV_DONTNEED); + posix_fadvise(c->fh, offset, length, POSIX_FADV_DONTNEED); } #endif } From 1e11e24fc43a369141b163a7fb744e6e91f951f4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 20 Aug 2015 22:37:38 +0200 Subject: [PATCH 098/142] document locking system --- docs/internals.rst | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/docs/internals.rst b/docs/internals.rst index 845dff131..4792e5045 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -41,6 +41,32 @@ lock.roster and lock.exclusive/* used by the locking system to manage shared and exclusive locks +Lock files +---------- + +|project_name| uses locks to get (exclusive or shared) access to the cache and +the repository. + +The locking system is based on creating a directory `lock.exclusive` (for +exclusive locks). Inside the lock directory, there is a file indication +hostname, process id and thread id of the lock holder. + +There is also a json file `lock.roster` that keeps a directory of all shared +and exclusive lockers. + +If the process can create the `lock.exclusive` directory for a resource, it has +the lock for it. If creation fails (because the directory has already been +created by some other process), lock acquisition fails. + +The cache lock is usually in `~/.cache/borg/REPOID/lock.*`. +The repository lock is in `repository/lock.*`. + +In case you run into troubles with the locks, you can just delete the `lock.*` +directory and file IF you first make sure that no |project_name| process is +running on any machine that accesses this resource. Be very careful, the cache +or repository might get damaged if multiple processes use it at the same time. + + Config file ----------- From 3d974d28b322c8a7e45a3ee4a428c14f9dc8d923 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 20 Aug 2015 23:27:24 +0200 Subject: [PATCH 099/142] update CHANGES --- CHANGES.rst | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 439ee4c37..c44b44324 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -25,24 +25,41 @@ Deprecations: --compression 1 (in 0.24) is the same as --compression zlib,1 (now) --compression 9 (in 0.24) is the same as --compression zlib,9 (now) - New features: - create --compression none (default, means: do not compress, just pass through data "as is". this is more efficient than zlib level 0 as used in borg 0.24) - create --compression lz4 (super-fast, but not very high compression) - Please note that borgbackup needs lz4 library as additional requirement. + Please note that borgbackup needs lz4 library as additional requirement (#156). - create --compression zlib,N (slower, higher compression, default for N is 6) - create --compression lzma,N (slowest, highest compression, default N is 6) - honor the nodump flag (UF_NODUMP) and do not backup such items +- list --short just outputs a simple list of the files/directories in an archive Bug fixes: +- fixed --chunker-params parameter order confusion / malfunction, fixes #154 - close fds of segments we delete (during compaction) +- close files which fell out the lrucache +- fadvise DONTNEED now is only called for the byte range actually read, not for + the whole file, fixes #158. Other changes: -- none yet +- remove fakeroot requirement for tests, tests run faster without fakeroot + (test setup does not fail any more without fakeroot, so you can run with or + without fakeroot), fixes #151 and #91. +- more tests for archiver +- recover_segment(): don't assume we have an fd for segment +- lrucache refactoring / cleanup, add dispose function, py.test tests +- generalize hashindex code for any key length (less hardcoding) +- lock roster: catch file not found in remove() method and ignore it +- improved docs: + + - replace hack for llfuse with proper solution (install libfuse-dev) + - update docs about compression + - internals: add some words about lock files / locking system + Version 0.24.0 From 2402a2269e6a151cc0be31a0ca7d71ccf7c21403 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 20 Aug 2015 23:39:40 +0200 Subject: [PATCH 100/142] update docs about fakeroot --- CHANGES.rst | 1 + docs/development.rst | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index c44b44324..07afec379 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -58,6 +58,7 @@ Other changes: - replace hack for llfuse with proper solution (install libfuse-dev) - update docs about compression + - update development docs about fakeroot - internals: add some words about lock files / locking system diff --git a/docs/development.rst b/docs/development.rst index 6c06eeb9e..be8405c18 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -26,7 +26,9 @@ Running the tests The tests are in the borg/testsuite package. -To run them, you need to have fakeroot, tox and pytest installed. +To run all the tests, you need to have fakeroot installed. If you do not have +fakeroot, you still will be able to run most tests, just leave away the +`fakeroot -u` from the given command lines. To run the test suite use the following command:: @@ -47,7 +49,6 @@ Some more advanced examples:: Important notes: -- Without fakeroot -u some tests will fail. - When using -- to give options to py.test, you MUST also give borg.testsuite[.module]. Building the docs with Sphinx From fdc18eb0a02da2e4c6c05b1ffed8013436bfc895 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 21 Aug 2015 00:25:24 +0200 Subject: [PATCH 101/142] support docs: write some words about BountySource --- CHANGES.rst | 2 +- docs/support.rst | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 07afec379..883878962 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -60,7 +60,7 @@ Other changes: - update docs about compression - update development docs about fakeroot - internals: add some words about lock files / locking system - + - support: mention BountySource and for what it can be used Version 0.24.0 diff --git a/docs/support.rst b/docs/support.rst index f53c01285..02f3b2be5 100644 --- a/docs/support.rst +++ b/docs/support.rst @@ -35,3 +35,23 @@ to the confirmation mail. To unsubscribe, send an email to borgbackup-unsubscribe@librelist.com and reply to the confirmation mail. + +Bounties and Fundraisers +------------------------ + +We use `BountySource `_ to allow +monetary contributions to the project and the developers, who push it forward. + +There, you can give general funds to the borgbackup members (the developers will +then spend the funds as they deem fit). If you do not have some specific bounty +(see below), you can use this as a general way to say "Thank You!" and support +the software / project you like. + +If you want to encourage developers to fix some specific issue or implement some +specific feature suggestion, you can post a bounty or back an existing one (they +always refer to an issue in our `issue tracker`_). + +As a developer, you can become a Bounty Hunter and win bounties by contributing +to |project_name|, a free and open source software project. + +We might also use BountySource to fund raise for some bigger goals. From 034e3477bdb09c9f78924ac8afdb7a4c45ef30ce Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 21 Aug 2015 01:10:34 +0200 Subject: [PATCH 102/142] travis: use requirements file --- .travis/install.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis/install.sh b/.travis/install.sh index 27eb668db..360453187 100755 --- a/.travis/install.sh +++ b/.travis/install.sh @@ -43,5 +43,6 @@ fi python -m virtualenv ~/.venv source ~/.venv/bin/activate -pip install tox pytest pytest-cov codecov Cython +pip install -r requirements.d/development.txt +pip install codecov pip install -e . From c7023ed12a7fb562bc339048d8abd5dedc34165d Mon Sep 17 00:00:00 2001 From: Ronny Pfannschmidt Date: Sat, 22 Aug 2015 14:58:06 +0200 Subject: [PATCH 103/142] replace versioneer with setuptools_scm --- setup.py | 24 +- versioneer.py | 1046 ------------------------------------------------- 2 files changed, 9 insertions(+), 1061 deletions(-) delete mode 100644 versioneer.py diff --git a/setup.py b/setup.py index 87de52b71..11982ac39 100644 --- a/setup.py +++ b/setup.py @@ -3,14 +3,6 @@ import os import sys from glob import glob -import versioneer -versioneer.VCS = 'git' -versioneer.style = 'pep440' -versioneer.versionfile_source = 'borg/_version.py' -versioneer.versionfile_build = 'borg/_version.py' -versioneer.tag_prefix = '' -versioneer.parentdir_prefix = 'borgbackup-' # dirname like 'myproject-1.2.0' - min_python = (3, 2) if sys.version_info < min_python: print("Borg requires Python %d.%d or later" % min_python) @@ -18,6 +10,8 @@ if sys.version_info < min_python: from setuptools import setup, Extension +from setuptools.command.sdist import sdist + compress_source = 'borg/compress.pyx' crypto_source = 'borg/crypto.pyx' @@ -31,11 +25,11 @@ try: from Cython.Distutils import build_ext import Cython.Compiler.Main as cython_compiler - class Sdist(versioneer.cmd_sdist): + class Sdist(sdist): def __init__(self, *args, **kwargs): for src in glob('borg/*.pyx'): cython_compiler.compile(src, cython_compiler.default_options) - versioneer.cmd_sdist.__init__(self, *args, **kwargs) + super().__init__(*args, **kwargs) def make_distribution(self): self.filelist.extend([ @@ -50,7 +44,7 @@ try: super().make_distribution() except ImportError: - class Sdist(versioneer.cmd_sdist): + class Sdist(sdist): def __init__(self, *args, **kwargs): raise Exception('Cython is required to run sdist') @@ -90,8 +84,7 @@ library_dirs = [os.path.join(ssl_prefix, 'lib')] with open('README.rst', 'r') as fd: long_description = fd.read() -cmdclass = versioneer.get_cmdclass() -cmdclass.update({'build_ext': build_ext, 'sdist': Sdist}) +cmdclass = {'build_ext': build_ext, 'sdist': Sdist} ext_modules = [ Extension('borg.compress', [compress_source], libraries=['lz4']), @@ -108,7 +101,7 @@ elif sys.platform == 'darwin': setup( name='borgbackup', - version=versioneer.get_version(), + use_scm_version=True, author='The Borg Collective (see AUTHORS file)', author_email='borgbackup@librelist.com', url='https://borgbackup.github.io/', @@ -140,7 +133,8 @@ setup( }, cmdclass=cmdclass, ext_modules=ext_modules, + setup_requires=['setuptools_scm>=1.7'], # msgpack pure python data corruption was fixed in 0.4.6. # Also, we might use some rather recent API features. - install_requires=['msgpack-python>=0.4.6'] + install_requires=['msgpack-python>=0.4.6'], ) diff --git a/versioneer.py b/versioneer.py deleted file mode 100644 index c00770fe4..000000000 --- a/versioneer.py +++ /dev/null @@ -1,1046 +0,0 @@ - -# Version: 0.14 - -""" -The Versioneer -============== - -* like a rocketeer, but for versions! -* https://github.com/warner/python-versioneer -* Brian Warner -* License: Public Domain -* Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, and pypy -* [![Latest Version] -(https://pypip.in/version/versioneer/badge.svg?style=flat) -](https://pypi.python.org/pypi/versioneer/) -* [![Build Status] -(https://travis-ci.org/warner/python-versioneer.png?branch=master) -](https://travis-ci.org/warner/python-versioneer) - -This is a tool for managing a recorded version number in distutils-based -python projects. The goal is to remove the tedious and error-prone "update -the embedded version string" step from your release process. Making a new -release should be as easy as recording a new tag in your version-control -system, and maybe making new tarballs. - - -## Quick Install - -* `pip install versioneer` to somewhere to your $PATH -* run `versioneer-installer` in your source tree: this installs `versioneer.py` -* follow the instructions below (also in the `versioneer.py` docstring) - -## Version Identifiers - -Source trees come from a variety of places: - -* a version-control system checkout (mostly used by developers) -* a nightly tarball, produced by build automation -* a snapshot tarball, produced by a web-based VCS browser, like github's - "tarball from tag" feature -* a release tarball, produced by "setup.py sdist", distributed through PyPI - -Within each source tree, the version identifier (either a string or a number, -this tool is format-agnostic) can come from a variety of places: - -* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows - about recent "tags" and an absolute revision-id -* the name of the directory into which the tarball was unpacked -* an expanded VCS keyword ($Id$, etc) -* a `_version.py` created by some earlier build step - -For released software, the version identifier is closely related to a VCS -tag. Some projects use tag names that include more than just the version -string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool -needs to strip the tag prefix to extract the version identifier. For -unreleased software (between tags), the version identifier should provide -enough information to help developers recreate the same tree, while also -giving them an idea of roughly how old the tree is (after version 1.2, before -version 1.3). Many VCS systems can report a description that captures this, -for example 'git describe --tags --dirty --always' reports things like -"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the -0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has -uncommitted changes. - -The version identifier is used for multiple purposes: - -* to allow the module to self-identify its version: `myproject.__version__` -* to choose a name and prefix for a 'setup.py sdist' tarball - -## Theory of Operation - -Versioneer works by adding a special `_version.py` file into your source -tree, where your `__init__.py` can import it. This `_version.py` knows how to -dynamically ask the VCS tool for version information at import time. However, -when you use "setup.py build" or "setup.py sdist", `_version.py` in the new -copy is replaced by a small static file that contains just the generated -version data. - -`_version.py` also contains `$Revision$` markers, and the installation -process marks `_version.py` to have this marker rewritten with a tag name -during the "git archive" command. As a result, generated tarballs will -contain enough information to get the proper version. - - -## Installation - -First, decide on values for the following configuration variables: - -* `VCS`: the version control system you use. Currently accepts "git". - -* `versionfile_source`: - - A project-relative pathname into which the generated version strings should - be written. This is usually a `_version.py` next to your project's main - `__init__.py` file, so it can be imported at runtime. If your project uses - `src/myproject/__init__.py`, this should be `src/myproject/_version.py`. - This file should be checked in to your VCS as usual: the copy created below - by `setup.py versioneer` will include code that parses expanded VCS - keywords in generated tarballs. The 'build' and 'sdist' commands will - replace it with a copy that has just the calculated version string. - - This must be set even if your project does not have any modules (and will - therefore never import `_version.py`), since "setup.py sdist" -based trees - still need somewhere to record the pre-calculated version strings. Anywhere - in the source tree should do. If there is a `__init__.py` next to your - `_version.py`, the `setup.py versioneer` command (described below) will - append some `__version__`-setting assignments, if they aren't already - present. - -* `versionfile_build`: - - Like `versionfile_source`, but relative to the build directory instead of - the source directory. These will differ when your setup.py uses - 'package_dir='. If you have `package_dir={'myproject': 'src/myproject'}`, - then you will probably have `versionfile_build='myproject/_version.py'` and - `versionfile_source='src/myproject/_version.py'`. - - If this is set to None, then `setup.py build` will not attempt to rewrite - any `_version.py` in the built tree. If your project does not have any - libraries (e.g. if it only builds a script), then you should use - `versionfile_build = None` and override `distutils.command.build_scripts` - to explicitly insert a copy of `versioneer.get_version()` into your - generated script. - -* `tag_prefix`: - - a string, like 'PROJECTNAME-', which appears at the start of all VCS tags. - If your tags look like 'myproject-1.2.0', then you should use - tag_prefix='myproject-'. If you use unprefixed tags like '1.2.0', this - should be an empty string. - -* `parentdir_prefix`: - - a string, frequently the same as tag_prefix, which appears at the start of - all unpacked tarball filenames. If your tarball unpacks into - 'myproject-1.2.0', this should be 'myproject-'. - -This tool provides one script, named `versioneer-installer`. That script does -one thing: write a copy of `versioneer.py` into the current directory. - -To versioneer-enable your project: - -* 1: Run `versioneer-installer` to copy `versioneer.py` into the top of your - source tree. - -* 2: add the following lines to the top of your `setup.py`, with the - configuration values you decided earlier: - - ```` - import versioneer - versioneer.VCS = 'git' - versioneer.versionfile_source = 'src/myproject/_version.py' - versioneer.versionfile_build = 'myproject/_version.py' - versioneer.tag_prefix = '' # tags are like 1.2.0 - versioneer.parentdir_prefix = 'myproject-' # dirname like 'myproject-1.2.0' - ```` - -* 3: add the following arguments to the setup() call in your setup.py: - - version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), - -* 4: now run `setup.py versioneer`, which will create `_version.py`, and will - modify your `__init__.py` (if one exists next to `_version.py`) to define - `__version__` (by calling a function from `_version.py`). It will also - modify your `MANIFEST.in` to include both `versioneer.py` and the generated - `_version.py` in sdist tarballs. - -* 5: commit these changes to your VCS. To make sure you won't forget, - `setup.py versioneer` will mark everything it touched for addition. - -## Post-Installation Usage - -Once established, all uses of your tree from a VCS checkout should get the -current version string. All generated tarballs should include an embedded -version string (so users who unpack them will not need a VCS tool installed). - -If you distribute your project through PyPI, then the release process should -boil down to two steps: - -* 1: git tag 1.0 -* 2: python setup.py register sdist upload - -If you distribute it through github (i.e. users use github to generate -tarballs with `git archive`), the process is: - -* 1: git tag 1.0 -* 2: git push; git push --tags - -Currently, all version strings must be based upon a tag. Versioneer will -report "unknown" until your tree has at least one tag in its history. This -restriction will be fixed eventually (see issue #12). - -## Version-String Flavors - -Code which uses Versioneer can learn about its version string at runtime by -importing `_version` from your main `__init__.py` file and running the -`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can -import the top-level `versioneer.py` and run `get_versions()`. - -Both functions return a dictionary with different keys for different flavors -of the version string: - -* `['version']`: A condensed PEP440-compliant string, equal to the - un-prefixed tag name for actual releases, and containing an additional - "local version" section with more detail for in-between builds. For Git, - this is TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe - --tags --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates - that the tree is like the "1076c97" commit but has uncommitted changes - (".dirty"), and that this commit is two revisions ("+2") beyond the "0.11" - tag. For released software (exactly equal to a known tag), the identifier - will only contain the stripped tag, e.g. "0.11". - -* `['full']`: detailed revision identifier. For Git, this is the full SHA1 - commit id, followed by ".dirty" if the tree contains uncommitted changes, - e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac.dirty". - -Some variants are more useful than others. Including `full` in a bug report -should allow developers to reconstruct the exact code being tested (or -indicate the presence of local changes that should be shared with the -developers). `version` is suitable for display in an "about" box or a CLI -`--version` output: it can be easily compared against release notes and lists -of bugs fixed in various releases. - -The `setup.py versioneer` command adds the following text to your -`__init__.py` to place a basic version in `YOURPROJECT.__version__`: - - from ._version import get_versions - __version__ = get_versions()['version'] - del get_versions - -## Updating Versioneer - -To upgrade your project to a new release of Versioneer, do the following: - -* install the new Versioneer (`pip install -U versioneer` or equivalent) -* re-run `versioneer-installer` in your source tree to replace your copy of - `versioneer.py` -* edit `setup.py`, if necessary, to include any new configuration settings - indicated by the release notes -* re-run `setup.py versioneer` to replace `SRC/_version.py` -* commit any changed files - -### Upgrading from 0.10 to 0.11 - -You must add a `versioneer.VCS = "git"` to your `setup.py` before re-running -`setup.py versioneer`. This will enable the use of additional version-control -systems (SVN, etc) in the future. - -### Upgrading from 0.11 to 0.12 - -Nothing special. - -## Upgrading to 0.14 - -0.14 changes the format of the version string. 0.13 and earlier used -hyphen-separated strings like "0.11-2-g1076c97-dirty". 0.14 and beyond use a -plus-separated "local version" section strings, with dot-separated -components, like "0.11+2.g1076c97". PEP440-strict tools did not like the old -format, but should be ok with the new one. - -## Future Directions - -This tool is designed to make it easily extended to other version-control -systems: all VCS-specific components are in separate directories like -src/git/ . The top-level `versioneer.py` script is assembled from these -components by running make-versioneer.py . In the future, make-versioneer.py -will take a VCS name as an argument, and will construct a version of -`versioneer.py` that is specific to the given VCS. It might also take the -configuration arguments that are currently provided manually during -installation by editing setup.py . Alternatively, it might go the other -direction and include code from all supported VCS systems, reducing the -number of intermediate scripts. - - -## License - -To make Versioneer easier to embed, all its code is hereby released into the -public domain. The `_version.py` that it creates is also in the public -domain. - -""" - -import errno -import os -import re -import subprocess -import sys -from distutils.command.build import build as _build -from distutils.command.sdist import sdist as _sdist -from distutils.core import Command - -# these configuration settings will be overridden by setup.py after it -# imports us -versionfile_source = None -versionfile_build = None -tag_prefix = None -parentdir_prefix = None -VCS = None - -# these dictionaries contain VCS-specific tools -LONG_VERSION_PY = {} - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): - assert isinstance(commands, list) - p = None - for c in commands: - try: - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % args[0]) - print(e) - return None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: - if verbose: - print("unable to run %s (error)" % args[0]) - return None - return stdout -LONG_VERSION_PY['git'] = ''' -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. Generated by -# versioneer-0.14 (https://github.com/warner/python-versioneer) - -import errno -import os -import re -import subprocess -import sys - -# these strings will be replaced by git during git-archive -git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" -git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" - -# these strings are filled in when 'setup.py versioneer' creates _version.py -tag_prefix = "%(TAG_PREFIX)s" -parentdir_prefix = "%(PARENTDIR_PREFIX)s" -versionfile_source = "%(VERSIONFILE_SOURCE)s" - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): - assert isinstance(commands, list) - p = None - for c in commands: - try: - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %%s" %% args[0]) - print(e) - return None - else: - if verbose: - print("unable to find command, tried %%s" %% (commands,)) - return None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: - if verbose: - print("unable to run %%s (error)" %% args[0]) - return None - return stdout - - -def versions_from_parentdir(parentdir_prefix, root, verbose=False): - # Source tarballs conventionally unpack into a directory that includes - # both the project name and a version string. - dirname = os.path.basename(root) - if not dirname.startswith(parentdir_prefix): - if verbose: - print("guessing rootdir is '%%s', but '%%s' doesn't start with " - "prefix '%%s'" %% (root, dirname, parentdir_prefix)) - return None - return {"version": dirname[len(parentdir_prefix):], "full": ""} - - -def git_get_keywords(versionfile_abs): - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - f.close() - except EnvironmentError: - pass - return keywords - - -def git_versions_from_keywords(keywords, tag_prefix, verbose=False): - if not keywords: - return {} # keyword-finding function failed to find keywords - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - return {} # unexpanded, so not in an unpacked git-archive tarball - refs = set([r.strip() for r in refnames.strip("()").split(",")]) - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %%d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) - if verbose: - print("discarding '%%s', no digits" %% ",".join(refs-tags)) - if verbose: - print("likely tags: %%s" %% ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - if verbose: - print("picking %%s" %% r) - return {"version": r, - "full": keywords["full"].strip()} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full": keywords["full"].strip()} - - -def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False): - # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens. - - # dirty - dirty = git_describe.endswith("-dirty") - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - dirty_suffix = ".dirty" if dirty else "" - - # now we have TAG-NUM-gHEX or HEX - - if "-" not in git_describe: # just HEX - return "0+untagged.g"+git_describe+dirty_suffix, dirty - - # just TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - return "0+unparseable"+dirty_suffix, dirty - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%%s' doesn't start with prefix '%%s'" - print(fmt %% (full_tag, tag_prefix)) - return None, dirty - tag = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - distance = int(mo.group(2)) - - # commit: short hex revision ID - commit = mo.group(3) - - # now build up version string, with post-release "local version - # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a - # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you - # can always test version.endswith(".dirty"). - version = tag - if distance or dirty: - version += "+%%d.g%%s" %% (distance, commit) + dirty_suffix - - return version, dirty - - -def git_versions_from_vcs(tag_prefix, root, verbose=False): - # this runs 'git' from the root of the source tree. This only gets called - # if the git-archive 'subst' keywords were *not* expanded, and - # _version.py hasn't already been rewritten with a short version string, - # meaning we're inside a checked out source tree. - - if not os.path.exists(os.path.join(root, ".git")): - if verbose: - print("no .git in %%s" %% root) - return {} # get_versions() will try next method - - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - # if there is a tag, this yields TAG-NUM-gHEX[-dirty] - # if there are no tags, this yields HEX[-dirty] (no NUM) - stdout = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long"], - cwd=root) - # --long was added in git-1.5.5 - if stdout is None: - return {} # try next method - version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose) - - # build "full", which is FULLHEX[.dirty] - stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) - if stdout is None: - return {} - full = stdout.strip() - if dirty: - full += ".dirty" - - return {"version": version, "full": full} - - -def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False): - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - keywords = {"refnames": git_refnames, "full": git_full} - ver = git_versions_from_keywords(keywords, tag_prefix, verbose) - if ver: - return ver - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for i in versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return default - - return (git_versions_from_vcs(tag_prefix, root, verbose) - or versions_from_parentdir(parentdir_prefix, root, verbose) - or default) -''' - - -def git_get_keywords(versionfile_abs): - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - f.close() - except EnvironmentError: - pass - return keywords - - -def git_versions_from_keywords(keywords, tag_prefix, verbose=False): - if not keywords: - return {} # keyword-finding function failed to find keywords - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - return {} # unexpanded, so not in an unpacked git-archive tarball - refs = set([r.strip() for r in refnames.strip("()").split(",")]) - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) - if verbose: - print("discarding '%s', no digits" % ",".join(refs-tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - if verbose: - print("picking %s" % r) - return {"version": r, - "full": keywords["full"].strip()} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full": keywords["full"].strip()} - - -def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False): - # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens. - - # dirty - dirty = git_describe.endswith("-dirty") - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - dirty_suffix = ".dirty" if dirty else "" - - # now we have TAG-NUM-gHEX or HEX - - if "-" not in git_describe: # just HEX - return "0+untagged.g"+git_describe+dirty_suffix, dirty - - # just TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - return "0+unparseable"+dirty_suffix, dirty - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - return None, dirty - tag = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - distance = int(mo.group(2)) - - # commit: short hex revision ID - commit = mo.group(3) - - # now build up version string, with post-release "local version - # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a - # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you - # can always test version.endswith(".dirty"). - version = tag - if distance or dirty: - version += "+%d.g%s" % (distance, commit) + dirty_suffix - - return version, dirty - - -def git_versions_from_vcs(tag_prefix, root, verbose=False): - # this runs 'git' from the root of the source tree. This only gets called - # if the git-archive 'subst' keywords were *not* expanded, and - # _version.py hasn't already been rewritten with a short version string, - # meaning we're inside a checked out source tree. - - if not os.path.exists(os.path.join(root, ".git")): - if verbose: - print("no .git in %s" % root) - return {} # get_versions() will try next method - - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - # if there is a tag, this yields TAG-NUM-gHEX[-dirty] - # if there are no tags, this yields HEX[-dirty] (no NUM) - stdout = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long"], - cwd=root) - # --long was added in git-1.5.5 - if stdout is None: - return {} # try next method - version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose) - - # build "full", which is FULLHEX[.dirty] - stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) - if stdout is None: - return {} - full = stdout.strip() - if dirty: - full += ".dirty" - - return {"version": version, "full": full} - - -def do_vcs_install(manifest_in, versionfile_source, ipy): - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - files = [manifest_in, versionfile_source] - if ipy: - files.append(ipy) - try: - me = __file__ - if me.endswith(".pyc") or me.endswith(".pyo"): - me = os.path.splitext(me)[0] + ".py" - versioneer_file = os.path.relpath(me) - except NameError: - versioneer_file = "versioneer.py" - files.append(versioneer_file) - present = False - try: - f = open(".gitattributes", "r") - for line in f.readlines(): - if line.strip().startswith(versionfile_source): - if "export-subst" in line.strip().split()[1:]: - present = True - f.close() - except EnvironmentError: - pass - if not present: - f = open(".gitattributes", "a+") - f.write("%s export-subst\n" % versionfile_source) - f.close() - files.append(".gitattributes") - run_command(GITS, ["add", "--"] + files) - - -def versions_from_parentdir(parentdir_prefix, root, verbose=False): - # Source tarballs conventionally unpack into a directory that includes - # both the project name and a version string. - dirname = os.path.basename(root) - if not dirname.startswith(parentdir_prefix): - if verbose: - print("guessing rootdir is '%s', but '%s' doesn't start with " - "prefix '%s'" % (root, dirname, parentdir_prefix)) - return None - return {"version": dirname[len(parentdir_prefix):], "full": ""} - -SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.14) from -# revision-control system data, or from the parent directory name of an -# unpacked source archive. Distribution tarballs contain a pre-generated copy -# of this file. - -version_version = '%(version)s' -version_full = '%(full)s' -def get_versions(default={}, verbose=False): - return {'version': version_version, 'full': version_full} - -""" - -DEFAULT = {"version": "0+unknown", "full": "unknown"} - - -def versions_from_file(filename): - versions = {} - try: - with open(filename) as f: - for line in f.readlines(): - mo = re.match("version_version = '([^']+)'", line) - if mo: - versions["version"] = mo.group(1) - mo = re.match("version_full = '([^']+)'", line) - if mo: - versions["full"] = mo.group(1) - except EnvironmentError: - return {} - - return versions - - -def write_to_version_file(filename, versions): - with open(filename, "w") as f: - f.write(SHORT_VERSION_PY % versions) - - print("set %s to '%s'" % (filename, versions["version"])) - - -def get_root(): - try: - return os.path.dirname(os.path.abspath(__file__)) - except NameError: - return os.path.dirname(os.path.abspath(sys.argv[0])) - - -def vcs_function(vcs, suffix): - return getattr(sys.modules[__name__], '%s_%s' % (vcs, suffix), None) - - -def get_versions(default=DEFAULT, verbose=False): - # returns dict with two keys: 'version' and 'full' - assert versionfile_source is not None, \ - "please set versioneer.versionfile_source" - assert tag_prefix is not None, "please set versioneer.tag_prefix" - assert parentdir_prefix is not None, \ - "please set versioneer.parentdir_prefix" - assert VCS is not None, "please set versioneer.VCS" - - # I am in versioneer.py, which must live at the top of the source tree, - # which we use to compute the root directory. py2exe/bbfreeze/non-CPython - # don't have __file__, in which case we fall back to sys.argv[0] (which - # ought to be the setup.py script). We prefer __file__ since that's more - # robust in cases where setup.py was invoked in some weird way (e.g. pip) - root = get_root() - versionfile_abs = os.path.join(root, versionfile_source) - - # extract version from first of _version.py, VCS command (e.g. 'git - # describe'), parentdir. This is meant to work for developers using a - # source checkout, for users of a tarball created by 'setup.py sdist', - # and for users of a tarball/zipball created by 'git archive' or github's - # download-from-tag feature or the equivalent in other VCSes. - - get_keywords_f = vcs_function(VCS, "get_keywords") - versions_from_keywords_f = vcs_function(VCS, "versions_from_keywords") - if get_keywords_f and versions_from_keywords_f: - vcs_keywords = get_keywords_f(versionfile_abs) - ver = versions_from_keywords_f(vcs_keywords, tag_prefix) - if ver: - if verbose: - print("got version from expanded keyword %s" % ver) - return ver - - ver = versions_from_file(versionfile_abs) - if ver: - if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) - return ver - - versions_from_vcs_f = vcs_function(VCS, "versions_from_vcs") - if versions_from_vcs_f: - ver = versions_from_vcs_f(tag_prefix, root, verbose) - if ver: - if verbose: - print("got version from VCS %s" % ver) - return ver - - ver = versions_from_parentdir(parentdir_prefix, root, verbose) - if ver: - if verbose: - print("got version from parentdir %s" % ver) - return ver - - if verbose: - print("got version from default %s" % default) - return default - - -def get_version(verbose=False): - return get_versions(verbose=verbose)["version"] - - -class cmd_version(Command): - description = "report generated version string" - user_options = [] - boolean_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - ver = get_version(verbose=True) - print("Version is currently: %s" % ver) - - -class cmd_build(_build): - def run(self): - versions = get_versions(verbose=True) - _build.run(self) - # now locate _version.py in the new build/ directory and replace it - # with an updated value - if versionfile_build: - target_versionfile = os.path.join(self.build_lib, - versionfile_build) - print("UPDATING %s" % target_versionfile) - os.unlink(target_versionfile) - with open(target_versionfile, "w") as f: - f.write(SHORT_VERSION_PY % versions) - -if 'cx_Freeze' in sys.modules: # cx_freeze enabled? - from cx_Freeze.dist import build_exe as _build_exe - - class cmd_build_exe(_build_exe): - def run(self): - versions = get_versions(verbose=True) - target_versionfile = versionfile_source - print("UPDATING %s" % target_versionfile) - os.unlink(target_versionfile) - with open(target_versionfile, "w") as f: - f.write(SHORT_VERSION_PY % versions) - - _build_exe.run(self) - os.unlink(target_versionfile) - with open(versionfile_source, "w") as f: - assert VCS is not None, "please set versioneer.VCS" - LONG = LONG_VERSION_PY[VCS] - f.write(LONG % {"DOLLAR": "$", - "TAG_PREFIX": tag_prefix, - "PARENTDIR_PREFIX": parentdir_prefix, - "VERSIONFILE_SOURCE": versionfile_source, - }) - - -class cmd_sdist(_sdist): - def run(self): - versions = get_versions(verbose=True) - self._versioneer_generated_versions = versions - # unless we update this, the command will keep using the old version - self.distribution.metadata.version = versions["version"] - return _sdist.run(self) - - def make_release_tree(self, base_dir, files): - _sdist.make_release_tree(self, base_dir, files) - # now locate _version.py in the new base_dir directory (remembering - # that it may be a hardlink) and replace it with an updated value - target_versionfile = os.path.join(base_dir, versionfile_source) - print("UPDATING %s" % target_versionfile) - os.unlink(target_versionfile) - with open(target_versionfile, "w") as f: - f.write(SHORT_VERSION_PY % self._versioneer_generated_versions) - -INIT_PY_SNIPPET = """ -from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions -""" - - -class cmd_update_files(Command): - description = ("install/upgrade Versioneer files: " - "__init__.py SRC/_version.py") - user_options = [] - boolean_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - print(" creating %s" % versionfile_source) - with open(versionfile_source, "w") as f: - assert VCS is not None, "please set versioneer.VCS" - LONG = LONG_VERSION_PY[VCS] - f.write(LONG % {"DOLLAR": "$", - "TAG_PREFIX": tag_prefix, - "PARENTDIR_PREFIX": parentdir_prefix, - "VERSIONFILE_SOURCE": versionfile_source, - }) - - ipy = os.path.join(os.path.dirname(versionfile_source), "__init__.py") - if os.path.exists(ipy): - try: - with open(ipy, "r") as f: - old = f.read() - except EnvironmentError: - old = "" - if INIT_PY_SNIPPET not in old: - print(" appending to %s" % ipy) - with open(ipy, "a") as f: - f.write(INIT_PY_SNIPPET) - else: - print(" %s unmodified" % ipy) - else: - print(" %s doesn't exist, ok" % ipy) - ipy = None - - # Make sure both the top-level "versioneer.py" and versionfile_source - # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so - # they'll be copied into source distributions. Pip won't be able to - # install the package without this. - manifest_in = os.path.join(get_root(), "MANIFEST.in") - simple_includes = set() - try: - with open(manifest_in, "r") as f: - for line in f: - if line.startswith("include "): - for include in line.split()[1:]: - simple_includes.add(include) - except EnvironmentError: - pass - # That doesn't cover everything MANIFEST.in can do - # (http://docs.python.org/2/distutils/sourcedist.html#commands), so - # it might give some false negatives. Appending redundant 'include' - # lines is safe, though. - if "versioneer.py" not in simple_includes: - print(" appending 'versioneer.py' to MANIFEST.in") - with open(manifest_in, "a") as f: - f.write("include versioneer.py\n") - else: - print(" 'versioneer.py' already in MANIFEST.in") - if versionfile_source not in simple_includes: - print(" appending versionfile_source ('%s') to MANIFEST.in" % - versionfile_source) - with open(manifest_in, "a") as f: - f.write("include %s\n" % versionfile_source) - else: - print(" versionfile_source already in MANIFEST.in") - - # Make VCS-specific changes. For git, this means creating/changing - # .gitattributes to mark _version.py for export-time keyword - # substitution. - do_vcs_install(manifest_in, versionfile_source, ipy) - - -def get_cmdclass(): - cmds = {'version': cmd_version, - 'versioneer': cmd_update_files, - 'build': cmd_build, - 'sdist': cmd_sdist, - } - if 'cx_Freeze' in sys.modules: # cx_freeze enabled? - cmds['build_exe'] = cmd_build_exe - del cmds['build'] - - return cmds From 8b6ca0d9123bb045ec96efafbbd3e719a5153125 Mon Sep 17 00:00:00 2001 From: Ronny Pfannschmidt Date: Sat, 22 Aug 2015 15:54:40 +0200 Subject: [PATCH 104/142] propperly handle borg._version using setuptools_scm --- .gitignore | 1 + borg/__init__.py | 4 +- borg/_version.py | 239 ----------------------------------------------- setup.py | 4 +- 4 files changed, 5 insertions(+), 243 deletions(-) delete mode 100644 borg/_version.py diff --git a/.gitignore b/.gitignore index ab98dc85a..5debd74ed 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,7 @@ platform_linux.c docs/usage/*.inc .idea/ .cache/ +borg/_version.py borg.build/ borg.dist/ borg.exe diff --git a/borg/__init__.py b/borg/__init__.py index b0578c472..e292841a6 100644 --- a/borg/__init__.py +++ b/borg/__init__.py @@ -1,5 +1,3 @@ # This is a python package -from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions +from ._version import version as __version__ diff --git a/borg/_version.py b/borg/_version.py deleted file mode 100644 index 7a94d800f..000000000 --- a/borg/_version.py +++ /dev/null @@ -1,239 +0,0 @@ - -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. Generated by -# versioneer-0.14 (https://github.com/warner/python-versioneer) - -import errno -import os -import re -import subprocess -import sys - -# these strings will be replaced by git during git-archive -git_refnames = "$Format:%d$" -git_full = "$Format:%H$" - -# these strings are filled in when 'setup.py versioneer' creates _version.py -tag_prefix = "" -parentdir_prefix = "borgbackup-" -versionfile_source = "borg/_version.py" - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): - assert isinstance(commands, list) - p = None - for c in commands: - try: - # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) - break - except EnvironmentError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % args[0]) - print(e) - return None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None - stdout = p.communicate()[0].strip() - if sys.version_info[0] >= 3: - stdout = stdout.decode() - if p.returncode != 0: - if verbose: - print("unable to run %s (error)" % args[0]) - return None - return stdout - - -def versions_from_parentdir(parentdir_prefix, root, verbose=False): - # Source tarballs conventionally unpack into a directory that includes - # both the project name and a version string. - dirname = os.path.basename(root) - if not dirname.startswith(parentdir_prefix): - if verbose: - print("guessing rootdir is '%s', but '%s' doesn't start with " - "prefix '%s'" % (root, dirname, parentdir_prefix)) - return None - return {"version": dirname[len(parentdir_prefix):], "full": ""} - - -def git_get_keywords(versionfile_abs): - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - f = open(versionfile_abs, "r") - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - f.close() - except EnvironmentError: - pass - return keywords - - -def git_versions_from_keywords(keywords, tag_prefix, verbose=False): - if not keywords: - return {} # keyword-finding function failed to find keywords - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - return {} # unexpanded, so not in an unpacked git-archive tarball - refs = set([r.strip() for r in refnames.strip("()").split(",")]) - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) - if verbose: - print("discarding '%s', no digits" % ",".join(refs-tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - if verbose: - print("picking %s" % r) - return {"version": r, - "full": keywords["full"].strip()} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full": keywords["full"].strip()} - - -def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False): - # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens. - - # dirty - dirty = git_describe.endswith("-dirty") - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - dirty_suffix = ".dirty" if dirty else "" - - # now we have TAG-NUM-gHEX or HEX - - if "-" not in git_describe: # just HEX - return "0+untagged.g"+git_describe+dirty_suffix, dirty - - # just TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparseable. Maybe git-describe is misbehaving? - return "0+unparseable"+dirty_suffix, dirty - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - return None, dirty - tag = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - distance = int(mo.group(2)) - - # commit: short hex revision ID - commit = mo.group(3) - - # now build up version string, with post-release "local version - # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a - # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you - # can always test version.endswith(".dirty"). - version = tag - if distance or dirty: - version += "+%d.g%s" % (distance, commit) + dirty_suffix - - return version, dirty - - -def git_versions_from_vcs(tag_prefix, root, verbose=False): - # this runs 'git' from the root of the source tree. This only gets called - # if the git-archive 'subst' keywords were *not* expanded, and - # _version.py hasn't already been rewritten with a short version string, - # meaning we're inside a checked out source tree. - - if not os.path.exists(os.path.join(root, ".git")): - if verbose: - print("no .git in %s" % root) - return {} # get_versions() will try next method - - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - # if there is a tag, this yields TAG-NUM-gHEX[-dirty] - # if there are no tags, this yields HEX[-dirty] (no NUM) - stdout = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long"], - cwd=root) - # --long was added in git-1.5.5 - if stdout is None: - return {} # try next method - version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose) - - # build "full", which is FULLHEX[.dirty] - stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) - if stdout is None: - return {} - full = stdout.strip() - if dirty: - full += ".dirty" - - return {"version": version, "full": full} - - -def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False): - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - keywords = {"refnames": git_refnames, "full": git_full} - ver = git_versions_from_keywords(keywords, tag_prefix, verbose) - if ver: - return ver - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for i in versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return default - - return (git_versions_from_vcs(tag_prefix, root, verbose) - or versions_from_parentdir(parentdir_prefix, root, verbose) - or default) diff --git a/setup.py b/setup.py index 11982ac39..667ba4ee2 100644 --- a/setup.py +++ b/setup.py @@ -101,7 +101,9 @@ elif sys.platform == 'darwin': setup( name='borgbackup', - use_scm_version=True, + use_scm_version={ + 'write_to': 'borg/_version.py', + }, author='The Borg Collective (see AUTHORS file)', author_email='borgbackup@librelist.com', url='https://borgbackup.github.io/', From ea8f3bd7e7ae4d46bd6830bc1301bcfd8854b121 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 28 Aug 2015 23:22:26 +0200 Subject: [PATCH 105/142] restore_xattrs: minor cleanup / simplification if we use {} as default for item.get(), we do not need the "if" as iteration over an empty dict won't do anything. also fixes too deep indentation the original code had. --- borg/archive.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index e214c7857..b2f14b668 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -323,14 +323,13 @@ class Archive: raise Exception('Unknown archive item type %r' % item[b'mode']) def restore_attrs(self, path, item, symlink=False, fd=None): - xattrs = item.get(b'xattrs') - if xattrs: - for k, v in xattrs.items(): - try: - xattr.setxattr(fd or path, k, v, follow_symlinks=False) - except OSError as e: - if e.errno != errno.ENOTSUP: - raise + xattrs = item.get(b'xattrs', {}) + for k, v in xattrs.items(): + try: + xattr.setxattr(fd or path, k, v, follow_symlinks=False) + except OSError as e: + if e.errno != errno.ENOTSUP: + raise uid = gid = None if not self.numeric_owner: uid = user2uid(item[b'user']) From 9ebc53ad77b1e7e728fe4bc474f617482e249cc2 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 00:11:04 +0200 Subject: [PATCH 106/142] restore_xattrs: ignore if setxattr fails with EACCES, fixes #162 e.g.: - setting any security.* key is expected to fail with EACCES if one is not root. - issue #162 on our issue tracker: user was root, but due to some specific scenario involving docker and selinux, setting security.selinux key fails even when running as root not sure if it is the best solution to silently ignore this, but some lines below this change failure to do a chown is also silently ignored (happens e.g. when restoring a file not owned by the current user as a non-root user). --- borg/archive.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/borg/archive.py b/borg/archive.py index b2f14b668..18867dbd9 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -328,7 +328,11 @@ class Archive: try: xattr.setxattr(fd or path, k, v, follow_symlinks=False) except OSError as e: - if e.errno != errno.ENOTSUP: + if e.errno not in (errno.ENOTSUP, errno.EACCES, ): + # only raise if the errno is not on our ignore list: + # ENOTSUP == xattrs not supported here + # EACCES == permission denied to set this specific xattr + # (this may happen related to security.* keys) raise uid = gid = None if not self.numeric_owner: From ee58d4f074cc53b93d02ac826c7db37846f28d5d Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 00:45:54 +0200 Subject: [PATCH 107/142] docs: be more specific about Debian/Ubuntu release --- docs/installation.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index da79d88f3..c33cf8bf4 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -42,8 +42,8 @@ Mac OS X: You may need to get a recent enough OpenSSL version from homebrew_. Mac OS X: You need OS X FUSE >= 3.0. -Debian / Ubuntu installation (from git) ---------------------------------------- +Debian Jessie / Ubuntu 14.04 installation (from git) +---------------------------------------------------- Note: this uses latest, unreleased development code from git. While we try not to break master, there are no guarantees on anything. From 79d7f49a44a5f959232aa7a6f6a015122570a75e Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 01:17:19 +0200 Subject: [PATCH 108/142] docs: split install into system-specific preparations and generic instructions --- docs/installation.rst | 90 +++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 50 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index c33cf8bf4..c2acb034c 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -14,6 +14,7 @@ Installation General notes ------------- + Even though Python 3 is not the default Python version on many systems, it is usually available as an optional install. @@ -42,17 +43,15 @@ Mac OS X: You may need to get a recent enough OpenSSL version from homebrew_. Mac OS X: You need OS X FUSE >= 3.0. -Debian Jessie / Ubuntu 14.04 installation (from git) ----------------------------------------------------- -Note: this uses latest, unreleased development code from git. -While we try not to break master, there are no guarantees on anything. +Debian Jessie / Ubuntu 14.04 installation preparations (git) +------------------------------------------------------------ Some of the steps detailled below might be useful also for non-git installs. .. parsed-literal:: - # Python 3.x (>= 3.2) + Headers, Py Package Installer - apt-get install python3 python3-dev python3-pip + # Python 3.x (>= 3.2) + Headers, Py Package Installer, VirtualEnv + apt-get install python3 python3-dev python3-pip python-virtualenv # we need OpenSSL + Headers for Crypto apt-get install libssl-dev openssl @@ -75,35 +74,15 @@ Some of the steps detailled below might be useful also for non-git installs. # optional: for unit testing apt-get install fakeroot - # get |project_name| from github, install it - git clone |git_url| - apt-get install python-virtualenv - virtualenv --python=python3 borg-env - source borg-env/bin/activate # always before using! - - # install borg + dependencies into virtualenv - pip install cython # compile .pyx -> .c - pip install tox pytest # optional, for running unit tests - pip install sphinx # optional, to build the docs - pip install llfuse # optional, for FUSE support - cd borg - pip install -e . # in-place editable mode - - # optional: run all the tests, on all supported Python versions - fakeroot -u tox - - -Korora / Fedora 21 installation (from git) ------------------------------------------- -Note: this uses latest, unreleased development code from git. -While we try not to break master, there are no guarantees on anything. +Korora / Fedora 21 installation preparations (git) +-------------------------------------------------- Some of the steps detailled below might be useful also for non-git installs. .. parsed-literal:: - # Python 3.x (>= 3.2) + Headers, Py Package Installer - sudo dnf install python3 python3-devel python3-pip + # Python 3.x (>= 3.2) + Headers, Py Package Installer, VirtualEnv + sudo dnf install python3 python3-devel python3-pip python3-virtualenv # we need OpenSSL + Headers for Crypto sudo dnf install openssl-devel openssl @@ -120,27 +99,10 @@ Some of the steps detailled below might be useful also for non-git installs. # optional: for unit testing sudo dnf install fakeroot - # get |project_name| from github, install it - git clone |git_url| - dnf install python3-virtualenv - virtualenv --python=python3 borg-env - source borg-env/bin/activate # always before using! +Cygwin installation preparations (git) +-------------------------------------- - # install borg + dependencies into virtualenv - pip install cython # compile .pyx -> .c - pip install tox pytest # optional, for running unit tests - pip install sphinx # optional, to build the docs - pip install llfuse # optional, for FUSE support - cd borg - pip install -e . # in-place editable mode - - # optional: run all the tests, on all supported Python versions - fakeroot -u tox - - -Cygwin (from git) ------------------ Please note that running under cygwin is rather experimental, stuff has been tested with CygWin (x86-64) v2.1.0. @@ -167,7 +129,7 @@ You can then install ``pip`` and ``virtualenv``: easy_install-3.4 pip pip install virtualenv -And now continue as for Linux (see above). +And now continue with the generic installation (see below). In case that creation of the virtual env fails, try deleting this file: @@ -175,3 +137,31 @@ In case that creation of the virtual env fails, try deleting this file: /usr/lib/python3.4/__pycache__/platform.cpython-34.pyc + +Generic: Installing borgbackup (git) +------------------------------------ + +After you have done the installation preparations, you can now fetch and build +|project_name|. + +Note: this uses latest, unreleased development code from git. +While we try not to break master, there are no guarantees on anything. + +.. parsed-literal:: + # get |project_name| from github, install it + git clone |git_url| + + virtualenv --python=python3 borg-env + source borg-env/bin/activate # always before using! + + # install borg + dependencies into virtualenv + pip install cython # compile .pyx -> .c + pip install tox pytest # optional, for running unit tests + pip install sphinx # optional, to build the docs + pip install llfuse # optional, for FUSE support + cd borg + pip install -e . # in-place editable mode + + # optional: run all the tests, on all supported Python versions + fakeroot -u tox + From cebb61d8ddf20121664509f52dbdf7e010dc1cf2 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 02:55:09 +0200 Subject: [PATCH 109/142] docs: add pypi, wheel, dist package based install docs --- docs/installation.rst | 115 +++++++++++++++++++++++++++++++++--------- 1 file changed, 90 insertions(+), 25 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index c2acb034c..85830dd4d 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -14,6 +14,20 @@ Installation General notes ------------- +You need to do some platform specific preparation steps (to install libraries +and tools) followed by the generic installation of |project_name| itself: + +Below, we describe different ways to install |project_name|. + +- (dist package) - easy and fast, needs a distribution and platform specific + binary package (for your Linux/*BSD/OS X/... distribution). +- (wheel) - easy and fast, needs a platform specific borgbackup binary wheel, + which matches your platform [OS and CPU]). +- (pypi) - installing a source package from pypi needs more installation steps + and will compile stuff - try this if there is no binary wheel that works for + you. +- (git) - for developers and power users who want to have the latest code or + use revision control (each release is tagged). Even though Python 3 is not the default Python version on many systems, it is usually available as an optional install. @@ -22,10 +36,12 @@ Virtualenv_ can be used to build and install |project_name| without affecting the system Python or requiring root access. Important: -if you install into a virtual environment, you need to activate +If you install into a virtual environment, you need to activate the virtual env first (``source borg-env/bin/activate``). Alternatively, directly run ``borg-env/bin/borg`` (or symlink that into some directory that is in your PATH so you can just run ``borg``). +Using a virtual environment is optional, but recommended except for the most +simple use cases. The llfuse_ python package is also required if you wish to mount an archive as a FUSE filesystem. Only FUSE >= 2.8.0 can support llfuse. @@ -43,10 +59,51 @@ Mac OS X: You may need to get a recent enough OpenSSL version from homebrew_. Mac OS X: You need OS X FUSE >= 3.0. -Debian Jessie / Ubuntu 14.04 installation preparations (git) ------------------------------------------------------------- +Installation (dist package) +--------------------------- +Some Linux, *BSD and OS X distributions might offer a ready-to-use +|project_name| package (which can be easily installed in the usual way). -Some of the steps detailled below might be useful also for non-git installs. +As |project_name| is still relatively new, such a package might be not +available for your system yet. Please ask package maintainers to build a +package or, if you can package / submit it yourself, please help us with +that! + +If a package is available, it might be interesting for you to check its version +and compare that to our latest release and review the change log (see links on +our web site). + + +Debian Jessie / Ubuntu 14.04 preparations (wheel) +------------------------------------------------- + +.. parsed-literal:: + + # Python stuff we need + apt-get install python3 python3-pip + + # Libraries we need (fuse is optional) + apt-get install openssl libacl1 liblz4-1 fuse + + +Installation (wheel) +-------------------- + +This uses the latest binary wheel release. + +.. parsed-literal:: + # Check https://github.com/borgbackup/borg/issues/147 for the correct + # platform-specific binary wheel, download and install it: + + # system-wide installation, needs sudo/root permissions: + sudo pip install borgbackup-*.whl + + # home directory installation, no sudo/root needed: + pip install --user borgbackup-*.whl + + +Debian Jessie / Ubuntu 14.04 preparations (git/pypi) +---------------------------------------------------- .. parsed-literal:: @@ -75,8 +132,8 @@ Some of the steps detailled below might be useful also for non-git installs. apt-get install fakeroot -Korora / Fedora 21 installation preparations (git) --------------------------------------------------- +Korora / Fedora 21 preparations (git/pypi) +------------------------------------------ Some of the steps detailled below might be useful also for non-git installs. @@ -100,8 +157,8 @@ Some of the steps detailled below might be useful also for non-git installs. sudo dnf install fakeroot -Cygwin installation preparations (git) --------------------------------------- +Cygwin preparations (git/pypi) +------------------------------ Please note that running under cygwin is rather experimental, stuff has been tested with CygWin (x86-64) v2.1.0. @@ -110,17 +167,12 @@ You'll need at least (use the cygwin installer to fetch/install these): :: - python3 - python3-setuptools - python3-cython - binutils - gcc-core - git - libopenssl + python3 python3-setuptools + python3-cython # not needed for releases + binutils gcc-core + libopenssl openssl-devel liblz4_1 liblz4-devel # from cygwinports.org - make - openssh - openssl-devel + git make openssh You can then install ``pip`` and ``virtualenv``: @@ -138,13 +190,26 @@ In case that creation of the virtual env fails, try deleting this file: /usr/lib/python3.4/__pycache__/platform.cpython-34.pyc -Generic: Installing borgbackup (git) ------------------------------------- +Installation (pypi) +------------------- -After you have done the installation preparations, you can now fetch and build -|project_name|. +This uses the latest (source package) release from PyPi. -Note: this uses latest, unreleased development code from git. +.. parsed-literal:: + virtualenv --python=python3 borg-env + source borg-env/bin/activate # always before using! + + # install borg + dependencies into virtualenv + pip install llfuse # optional, for FUSE support + pip install borgbackup + +Note: we install into a virtual environment here, but this is not a requirement. + + +Installation (git) +------------------ + +This uses latest, unreleased development code from git. While we try not to break master, there are no guarantees on anything. .. parsed-literal:: @@ -155,13 +220,13 @@ While we try not to break master, there are no guarantees on anything. source borg-env/bin/activate # always before using! # install borg + dependencies into virtualenv - pip install cython # compile .pyx -> .c - pip install tox pytest # optional, for running unit tests pip install sphinx # optional, to build the docs pip install llfuse # optional, for FUSE support cd borg + pip install -r requirements.d/development.txt pip install -e . # in-place editable mode # optional: run all the tests, on all supported Python versions fakeroot -u tox +Note: as a developer or power user, you always want to use a virtual environment. From 8a483511cbcfe0ef681a41ef6df9d85d7e6bef28 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 03:01:39 +0200 Subject: [PATCH 110/142] docs: theme: use a lighter green --- docs/_themes/local/static/local.css_t | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/_themes/local/static/local.css_t b/docs/_themes/local/static/local.css_t index 4395cc97b..ca5ce577e 100644 --- a/docs/_themes/local/static/local.css_t +++ b/docs/_themes/local/static/local.css_t @@ -31,7 +31,7 @@ div.documentwrapper { float: right; width: 760px; padding: 0 20px 20px 20px; - color: #00aa00; + color: #00cc00; background-color: #000000; margin-bottom: 2em; } @@ -48,7 +48,7 @@ div.sphinxsidebar { h1, h2, h3 { font-weight: normal; - color: #33dd33; + color: #33ff33; } h1 { @@ -99,12 +99,12 @@ div.sphinxsidebar a:link, div.sphinxsidebar a:visited { } div.sphinxsidebar { - color: #00aa00; + color: #00cc00; background: 0000000; } div.sphinxsidebar input { - color: #00cc00; + color: #00ff00; background: 0000000; border: 1px solid #444444; } @@ -171,4 +171,3 @@ div.seealso { border-radius: .4em; box-shadow: 2px 2px #dd6; } - From e4dc482598e525e762854c1bfc7939ce0ae854df Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 03:39:53 +0200 Subject: [PATCH 111/142] docs: quickstart: key modes, cosmetic changes --- docs/installation.rst | 30 ++++++++++++++++-------------- docs/quickstart.rst | 37 +++++++++++++++++++------------------ 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 85830dd4d..6bc38a0aa 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -19,24 +19,24 @@ and tools) followed by the generic installation of |project_name| itself: Below, we describe different ways to install |project_name|. -- (dist package) - easy and fast, needs a distribution and platform specific +- **dist package** - easy and fast, needs a distribution and platform specific binary package (for your Linux/*BSD/OS X/... distribution). -- (wheel) - easy and fast, needs a platform specific borgbackup binary wheel, +- **wheel** - easy and fast, needs a platform specific borgbackup binary wheel, which matches your platform [OS and CPU]). -- (pypi) - installing a source package from pypi needs more installation steps +- **pypi** - installing a source package from pypi needs more installation steps and will compile stuff - try this if there is no binary wheel that works for you. -- (git) - for developers and power users who want to have the latest code or +- **git** - for developers and power users who want to have the latest code or use revision control (each release is tagged). -Even though Python 3 is not the default Python version on many systems, it is -usually available as an optional install. +**Python 3**: Even though this is not the default Python version on many systems, +it is usually available as an optional install. Virtualenv_ can be used to build and install |project_name| without affecting the system Python or requiring root access. Important: -If you install into a virtual environment, you need to activate +If you install into a virtual environment, you need to **activate** the virtual env first (``source borg-env/bin/activate``). Alternatively, directly run ``borg-env/bin/borg`` (or symlink that into some directory that is in your PATH so you can just run ``borg``). @@ -46,7 +46,7 @@ simple use cases. The llfuse_ python package is also required if you wish to mount an archive as a FUSE filesystem. Only FUSE >= 2.8.0 can support llfuse. -You only need Cython to compile the .pyx files to the respective .c files +You only need **Cython** to compile the .pyx files to the respective .c files when using |project_name| code from git. For |project_name| releases, the .c files will be bundled, so you won't need Cython to install a release. @@ -61,8 +61,8 @@ Mac OS X: You need OS X FUSE >= 3.0. Installation (dist package) --------------------------- -Some Linux, *BSD and OS X distributions might offer a ready-to-use -|project_name| package (which can be easily installed in the usual way). +Some Linux, BSD and OS X distributions might offer a ready-to-use +`borgbackup` package (which can be easily installed in the usual way). As |project_name| is still relatively new, such a package might be not available for your system yet. Please ask package maintainers to build a @@ -92,14 +92,15 @@ Installation (wheel) This uses the latest binary wheel release. .. parsed-literal:: + # Check https://github.com/borgbackup/borg/issues/147 for the correct # platform-specific binary wheel, download and install it: # system-wide installation, needs sudo/root permissions: - sudo pip install borgbackup-*.whl + sudo pip install borgbackup.whl # home directory installation, no sudo/root needed: - pip install --user borgbackup-*.whl + pip install --user borgbackup.whl Debian Jessie / Ubuntu 14.04 preparations (git/pypi) @@ -135,9 +136,8 @@ Debian Jessie / Ubuntu 14.04 preparations (git/pypi) Korora / Fedora 21 preparations (git/pypi) ------------------------------------------ -Some of the steps detailled below might be useful also for non-git installs. - .. parsed-literal:: + # Python 3.x (>= 3.2) + Headers, Py Package Installer, VirtualEnv sudo dnf install python3 python3-devel python3-pip python3-virtualenv @@ -196,6 +196,7 @@ Installation (pypi) This uses the latest (source package) release from PyPi. .. parsed-literal:: + virtualenv --python=python3 borg-env source borg-env/bin/activate # always before using! @@ -213,6 +214,7 @@ This uses latest, unreleased development code from git. While we try not to break master, there are no guarantees on anything. .. parsed-literal:: + # get |project_name| from github, install it git clone |git_url| diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 4b78fefbb..b6c4c42df 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -121,7 +121,7 @@ Repository encryption Repository encryption is enabled at repository creation time:: - $ borg init --encryption=passphrase|keyfile PATH + $ borg init --encryption=repokey|keyfile PATH When repository encryption is enabled all data is encrypted using 256-bit AES_ encryption and the integrity and authenticity is verified using `HMAC-SHA256`_. @@ -130,28 +130,29 @@ All data is encrypted before being written to the repository. This means that an attacker who manages to compromise the host containing an encrypted archive will not be able to access any of the data. -|project_name| supports two different methods to derive the AES and HMAC keys. +|project_name| supports different methods to store the AES and HMAC keys. -Passphrase based encryption - This method uses a user supplied passphrase to derive the keys using the - PBKDF2_ key derivation function. This method is convenient to use since - there is no key file to keep track of and secure as long as a *strong* - passphrase is used. +``repokey`` mode + The key is stored inside the repository (in its "config" file). + Use this mode if you trust in your good passphrase giving you enough + protection. - .. Note:: - For automated backups the passphrase can be specified using the - `BORG_PASSPHRASE` environment variable. +``keyfile`` mode + The key is stored on your local disk (in ``~/.borg/keys/``). + Use this mode if you want "passphrase and having-the-key" security. -Key file based encryption - This method generates random keys at repository initialization time that - are stored in a password protected file in the ``~/.borg/keys/`` directory. - The key file is a printable text file. This method is secure and suitable - for automated backups. +In both modes, the key is stored in encrypted form and can be only decrypted +by providing the correct passphrase. - .. Note:: - The repository data is totally inaccessible without the key file - so it must be kept **safe**. +For automated backups the passphrase can be specified using the +`BORG_PASSPHRASE` environment variable. +**The repository data is totally inaccessible without the key:** + Make a backup copy of the key file (``keyfile`` mode) or repo config + file (``repokey`` mode) and keep it at a safe place, so you still have + the key in case it gets corrupted or lost. + The backup that is encrypted with that key won't help you with that, + of course. .. _remote_repos: From c823554b6b39bc32061676472b3baac4281e5de9 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 04:00:22 +0200 Subject: [PATCH 112/142] docs: usage: improved formatting, cosmetic changes --- borg/archiver.py | 2 ++ docs/usage.rst | 86 +++++++++++++++++++++++++++++------------------- 2 files changed, 55 insertions(+), 33 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 166050dcd..8cce07b8b 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -567,6 +567,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") The check command verifies the consistency of a repository and the corresponding archives. First, the underlying repository data files are checked: + - For all segments the segment magic (header) is checked - For all objects stored in the segments, all metadata (e.g. crc and size) and all data is read. The read data is checked by size and CRC. Bit rot and other @@ -580,6 +581,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") - The repository check can be skipped using the --archives-only option. Second, the consistency and correctness of the archive metadata is verified: + - Is the repo manifest present? If not, it is rebuilt from archive metadata chunks (this requires reading and decrypting of all metadata and data). - Check if archive metadata chunk is present. if not, remove archive from diff --git a/docs/usage.rst b/docs/usage.rst index c4e2fa80f..8595ca7f8 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -41,26 +41,32 @@ Environment Variables |project_name| uses some environment variables for automation: -:: +Specifying a passphrase: + BORG_PASSPHRASE + When set, use the value to answer the passphrase question for encrypted repositories. - Specifying a passphrase: - BORG_PASSPHRASE : When set, use the value to answer the passphrase question for encrypted repositories. +Some "yes" sayers (if set, they automatically confirm that you really want to do X even if there is that warning): + BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK + For "Warning: Attempting to access a previously unknown unencrypted repository" + BORG_RELOCATED_REPO_ACCESS_IS_OK + For "Warning: The repository at location ... was previously located at ..." + BORG_CHECK_I_KNOW_WHAT_I_AM_DOING + For "Warning: 'check --repair' is an experimental feature that might result in data loss." - Some "yes" sayers (if set, they automatically confirm that you really want to do X even if there is that warning): - BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK : For "Warning: Attempting to access a previously unknown unencrypted repository" - BORG_RELOCATED_REPO_ACCESS_IS_OK : For "Warning: The repository at location ... was previously located at ..." - BORG_CHECK_I_KNOW_WHAT_I_AM_DOING : For "Warning: 'check --repair' is an experimental feature that might result in data loss." +Directories: + BORG_KEYS_DIR + Default to '~/.borg/keys'. This directory contains keys for encrypted repositories. + BORG_CACHE_DIR + Default to '~/.cache/borg'. This directory contains the local cache and might need a lot + of space for dealing with big repositories). - Directories: - BORG_KEYS_DIR : Default to '~/.borg/keys'. This directory contains keys for encrypted repositories. - BORG_CACHE_DIR : Default to '~/.cache/borg'. This directory contains the local cache and might need a lot - of space for dealing with big repositories). +Building: + BORG_OPENSSL_PREFIX + Adds given OpenSSL header file directory to the default locations (setup.py). - Building: - BORG_OPENSSL_PREFIX : Adds given OpenSSL header file directory to the default locations (setup.py). - - General: - TMPDIR : where temporary files are stored (might need a lot of temporary space for some operations) +General: + TMPDIR + where temporary files are stored (might need a lot of temporary space for some operations) Please note: @@ -75,29 +81,43 @@ Resource Usage |project_name| might use a lot of resources depending on the size of the data set it is dealing with. -CPU: it won't go beyond 100% of 1 core as the code is currently single-threaded. - Especially higher zlib and lzma compression levels use significant amounts of CPU cycles. +CPU: + It won't go beyond 100% of 1 core as the code is currently single-threaded. + Especially higher zlib and lzma compression levels use significant amounts + of CPU cycles. -Memory (RAM): the chunks index and the files index are read into memory for performance reasons. - compression, esp. lzma compression with high levels might need substantial amounts - of memory. +Memory (RAM): + The chunks index and the files index are read into memory for performance + reasons. + Compression, esp. lzma compression with high levels might need substantial + amounts of memory. -Temporary files: reading data and metadata from a FUSE mounted repository will consume about the same space as the - deduplicated chunks used to represent them in the repository. +Temporary files: + Reading data and metadata from a FUSE mounted repository will consume about + the same space as the deduplicated chunks used to represent them in the + repository. -Cache files: chunks index and files index (plus a compressed collection of single-archive chunk indexes). +Cache files: + Contains the chunks index and files index (plus a compressed collection of + single-archive chunk indexes). -Chunks index: proportional to the amount of data chunks in your repo. lots of small chunks in your repo implies a big - chunks index. you may need to tweak the chunker params (see create options) if you have a lot of data and - you want to keep the chunks index at some reasonable size. +Chunks index: + Proportional to the amount of data chunks in your repo. Lots of small chunks + in your repo imply a big chunks index. You may need to tweak the chunker + params (see create options) if you have a lot of data and you want to keep + the chunks index at some reasonable size. -Files index: proportional to the amount of files in your last backup. can be switched off (see create options), but - next backup will be much slower if you do. +Files index: + Proportional to the amount of files in your last backup. Can be switched + off (see create options), but next backup will be much slower if you do. -Network: if your repository is remote, all deduplicated (and optionally compressed/encrypted) data of course has to go - over the connection (ssh: repo url). if you use a locally mounted network filesystem, additionally some copy - operations used for transaction support also go over the connection. if you backup multiple sources to one - target repository, additional traffic happens for cache resynchronization. +Network: + If your repository is remote, all deduplicated (and optionally compressed/ + encrypted) data of course has to go over the connection (ssh: repo url). + If you use a locally mounted network filesystem, additionally some copy + operations used for transaction support also go over the connection. If + you backup multiple sources to one target repository, additional traffic + happens for cache resynchronization. In case you are interested in more details, please read the internals documentation. From ddd0e2700bd77111e401178c3074ec102275fb12 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 04:16:03 +0200 Subject: [PATCH 113/142] docs: improve faq and support section --- docs/faq.rst | 3 ++- docs/support.rst | 16 ++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/faq.rst b/docs/faq.rst index 06ea63bd2..37e7b2dfe 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -97,7 +97,8 @@ Can |project_name| add redundancy to the backup data to deal with hardware malfu of low-level storage layout information and control which we do not have (and also can't get, even if we wanted). - So, if you need that, consider RAID1 or a filesystems that offers redundant storage. + So, if you need that, consider RAID1 or a filesystem that offers redundant storage + or just make 2 backups to different locations / different hardware. Can |project_name| verify data integrity of a backup archive? Yes, if you want to detect accidental data damage (like bit rot), use the ``check`` diff --git a/docs/support.rst b/docs/support.rst index 02f3b2be5..e59862675 100644 --- a/docs/support.rst +++ b/docs/support.rst @@ -4,15 +4,15 @@ Support ======= -Please first read the docs and the FAQ section in the docs, a lot of stuff is -documented / explained there. +Please first read the docs and existing issue tracker issues and mailing +list posts, a lot of stuff is already documented / explained / discussed / +filed there. Issue Tracker ------------- If you've found a bug or have a concrete feature request, please create a new -ticket on the project's `issue tracker`_ (after checking whether someone else -already has reported the same thing). +ticket on the project's `issue tracker`_. For more general questions or discussions, IRC or mailing list are preferred. @@ -48,10 +48,10 @@ then spend the funds as they deem fit). If you do not have some specific bounty the software / project you like. If you want to encourage developers to fix some specific issue or implement some -specific feature suggestion, you can post a bounty or back an existing one (they -always refer to an issue in our `issue tracker`_). +specific feature suggestion, you can post a new bounty or back an existing one +(they always refer to an issue in our `issue tracker`_). -As a developer, you can become a Bounty Hunter and win bounties by contributing -to |project_name|, a free and open source software project. +As a developer, you can become a Bounty Hunter and win bounties (earn money) by +contributing to |project_name|, a free and open source software project. We might also use BountySource to fund raise for some bigger goals. From 1a949eab3ed05d4e25f19f84244ea5c4f724f9a7 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 04:23:57 +0200 Subject: [PATCH 114/142] docs: internals: improve structure and formatting --- docs/internals.rst | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/internals.rst b/docs/internals.rst index 4792e5045..d989fd9c5 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -8,7 +8,6 @@ This page documents the internal data structures and storage mechanisms of |project_name|. It is partly based on `mailing list discussion about internals`_ and also on static code analysis. -It may not be exactly up to date with the current source code. Repository and Archives ----------------------- @@ -151,6 +150,9 @@ Each archive info contains: It is the last object stored, in the last segment, and is replaced each time. +The Archive +----------- + The archive metadata does not contain the file items directly. Only references to other objects that contain that data. An archive is an object that contains: @@ -163,6 +165,10 @@ object that contains: * username * time + +The Item +-------- + Each item represents a file, directory or other fs item and is stored as an ``item`` dictionary that contains: @@ -220,7 +226,7 @@ what files you have based on a specific set of chunk sizes). Indexes / Caches ---------------- -The files cache is stored in ``cache/files`` and is indexed on the +The **files cache** is stored in ``cache/files`` and is indexed on the ``file path hash``. At backup time, it is used to quickly determine whether we need to chunk a given file (or whether it is unchanged and we already have all its pieces). @@ -239,7 +245,7 @@ archives in different setups. The files cache is stored as a python associative array storing python objects, which generates a lot of overhead. -The chunks cache is stored in ``cache/chunks`` and is indexed on the +The **chunks cache** is stored in ``cache/chunks`` and is indexed on the ``chunk id_hash``. It is used to determine whether we already have a specific chunk, to count references to it and also for statistics. It contains: @@ -248,7 +254,7 @@ It contains: * size * encrypted/compressed size -The repository index is stored in ``repo/index.%d`` and is indexed on the +The **repository index** is stored in ``repo/index.%d`` and is indexed on the ``chunk id_hash``. It is used to determine a chunk's location in the repository. It contains: From d779057b7902157a16ffd50fde66704fa766fc4b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 04:46:13 +0200 Subject: [PATCH 115/142] fix issue with negative "all archives" size, fixes #165 This fixes a infrequent problem when (refcount * chunksize) overflowed a int32_t. chunksize is always <= 8MiB and usually rather ~64KiB (with default chunker params). Thus, this happened only for high refcounts and/or unusually big chunks. --- borg/_hashindex.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/_hashindex.c b/borg/_hashindex.c index aa1881f18..e1ff936f7 100644 --- a/borg/_hashindex.c +++ b/borg/_hashindex.c @@ -380,8 +380,8 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs chunks += values[0]; unique_size += values[1]; unique_csize += values[2]; - size += values[0] * values[1]; - csize += values[0] * values[2]; + size += (int64_t) values[0] * values[1]; + csize += (int64_t) values[0] * values[2]; } *total_size = size; *total_csize = csize; From 273c04329f9e71c2f816742d30cf0deaecb10101 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 05:02:05 +0200 Subject: [PATCH 116/142] update CHANGES --- CHANGES.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 883878962..8b96bbbf3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -13,6 +13,7 @@ Compatibility notes: default "none" compression, use "zlib,0" if you want a "no compression" mode that can be read by older borg). Also the new code is able to read repos and archives made with older borg versions (for all zlib levels 0..9). +- lz4 compression library (liblz4) is a new requirement (#156) Deprecations: @@ -30,7 +31,6 @@ New features: - create --compression none (default, means: do not compress, just pass through data "as is". this is more efficient than zlib level 0 as used in borg 0.24) - create --compression lz4 (super-fast, but not very high compression) - Please note that borgbackup needs lz4 library as additional requirement (#156). - create --compression zlib,N (slower, higher compression, default for N is 6) - create --compression lzma,N (slowest, highest compression, default N is 6) - honor the nodump flag (UF_NODUMP) and do not backup such items @@ -43,6 +43,8 @@ Bug fixes: - close files which fell out the lrucache - fadvise DONTNEED now is only called for the byte range actually read, not for the whole file, fixes #158. +- fix issue with negative "all archives" size, fixes #165 +- restore_xattrs: ignore if setxattr fails with EACCES, fixes #162 Other changes: @@ -54,6 +56,7 @@ Other changes: - lrucache refactoring / cleanup, add dispose function, py.test tests - generalize hashindex code for any key length (less hardcoding) - lock roster: catch file not found in remove() method and ignore it +- travis CI: use requirements file - improved docs: - replace hack for llfuse with proper solution (install libfuse-dev) @@ -61,6 +64,9 @@ Other changes: - update development docs about fakeroot - internals: add some words about lock files / locking system - support: mention BountySource and for what it can be used + - theme: use a lighter green + - add pypi, wheel, dist package based install docs + - split install docs into system-specific preparations and generic instructions Version 0.24.0 From d2e5ce90f28f5550ca6d9946b83fdecd6108190f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 05:41:41 +0200 Subject: [PATCH 117/142] add FAQ entry about OS X commandline vs. HFS+ encoding issue, see #143. a real fix for this is in development, but needs some time. --- docs/faq.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/faq.rst b/docs/faq.rst index 37e7b2dfe..d13fe67f1 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -86,6 +86,18 @@ If it crashes with a UnicodeError, what can I do? export LANG=en_US.UTF-8 # or similar, important is correct charset +I can't extract non-ascii filenames by giving them on the commandline on OS X!? + This is due to different ways to represent some characters in unicode. + HFS+ likes the decomposed form while the commandline seems to be the composed + form usually. If you run into that, for now maybe just try: + + - avoiding the non-ascii characters on the commandline by e.g. extracting + the parent directory (or even everything) + - try to enter the composed form on the commandline + - mount the repo using FUSE and use some file manager + + See issue #143 on the issue tracker for more about this. + If I want to run |project_name| on a ARM CPU older than ARM v6? You need to enable the alignment trap handler to fixup misaligned accesses:: From 31e97d568b8ee66868f944008305f387b18ae6cd Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 12:52:18 +0200 Subject: [PATCH 118/142] remove x bits from repository.py --- borg/repository.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 borg/repository.py diff --git a/borg/repository.py b/borg/repository.py old mode 100755 new mode 100644 From 2332ee433ed6cd451cd4c4968f0f6c378f7884e4 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 13:35:58 +0200 Subject: [PATCH 119/142] remove docs/misc/create_compression.txt outdated - it just showed different levels of zlib compression, but not we additionally have "lzma", "lz4" and "none" compression. the "usage" and "internals" docs give some hints about them, too. --- docs/misc/create_compression.txt | 130 ------------------------------- 1 file changed, 130 deletions(-) delete mode 100644 docs/misc/create_compression.txt diff --git a/docs/misc/create_compression.txt b/docs/misc/create_compression.txt deleted file mode 100644 index 89ffdf4d9..000000000 --- a/docs/misc/create_compression.txt +++ /dev/null @@ -1,130 +0,0 @@ -data compression -================ - -borg create --compression N repo::archive data - -Currently, borg only supports zlib compression. There are plans to expand this -to other, faster or better compression algorithms in the future. - -N == 0 -> zlib level 0 == very quick, no compression -N == 1 -> zlib level 1 == quick, low compression -... -N == 9 -> zlib level 9 == slow, high compression - -Measurements made on a Haswell Ultrabook, SSD storage, Linux. - - -Example 1: lots of relatively small text files (linux kernel src) ------------------------------------------------------------------ - -N == 1 does a good job here, it saves the additional time needed for -compression because it needs to store less into storage (see N == 0). - -N == 6 is also quite ok, a little slower, a little less repo size. -6 was the old default of borg. - -High compression levels only give a little more compression, but take a lot -of cpu time. - -$ borg create --stats --compression 0 ------------------------------------------------------------------------------- -Duration: 50.40 seconds -Number of files: 72890 - - Original size Compressed size Deduplicated size -This archive: 1.17 GB 1.18 GB 1.01 GB - - Unique chunks Total chunks -Chunk index: 70263 82309 ------------------------------------------------------------------------------- - -$ borg create --stats --compression 1 ------------------------------------------------------------------------------- -Duration: 49.29 seconds -Number of files: 72890 - - Original size Compressed size Deduplicated size -This archive: 1.17 GB 368.62 MB 295.22 MB - - Unique chunks Total chunks -Chunk index: 70280 82326 ------------------------------------------------------------------------------- - -$ borg create --stats --compression 5 ------------------------------------------------------------------------------- -Duration: 59.99 seconds -Number of files: 72890 - - Original size Compressed size Deduplicated size -This archive: 1.17 GB 331.70 MB 262.20 MB - - Unique chunks Total chunks -Chunk index: 70290 82336 ------------------------------------------------------------------------------- - -$ borg create --stats --compression 6 ------------------------------------------------------------------------------- -Duration: 1 minutes 13.64 seconds -Number of files: 72890 - - Original size Compressed size Deduplicated size -This archive: 1.17 GB 328.79 MB 259.56 MB - - Unique chunks Total chunks -Chunk index: 70279 82325 ------------------------------------------------------------------------------- - -$ borg create --stats --compression 9 ------------------------------------------------------------------------------- -Duration: 3 minutes 1.58 seconds -Number of files: 72890 - - Original size Compressed size Deduplicated size -This archive: 1.17 GB 326.57 MB 257.57 MB - - Unique chunks Total chunks -Chunk index: 70292 82338 ------------------------------------------------------------------------------- - - -Example 2: large VM disk file (sparse file) -------------------------------------------- - -The file's directory size is 80GB, but a lot of it is sparse (and reads as -zeros). - -$ borg create --stats --compression 0 ------------------------------------------------------------------------------- -Duration: 13 minutes 48.47 seconds -Number of files: 1 - - Original size Compressed size Deduplicated size -This archive: 80.54 GB 80.55 GB 10.87 GB - - Unique chunks Total chunks -Chunk index: 147307 177109 ------------------------------------------------------------------------------- - -$ borg create --stats --compression 1 ------------------------------------------------------------------------------- -Duration: 15 minutes 31.34 seconds -Number of files: 1 - - Original size Compressed size Deduplicated size -This archive: 80.54 GB 6.68 GB 5.67 GB - - Unique chunks Total chunks -Chunk index: 147309 177111 ------------------------------------------------------------------------------- - -$ borg create --stats --compression 6 ------------------------------------------------------------------------------- -Duration: 18 minutes 57.54 seconds -Number of files: 1 - - Original size Compressed size Deduplicated size -This archive: 80.54 GB 6.19 GB 5.44 GB - - Unique chunks Total chunks -Chunk index: 147307 177109 ------------------------------------------------------------------------------- From ab545ae45dbe7f7a696937e9c45fd6d2f9795067 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 14:10:31 +0200 Subject: [PATCH 120/142] CHANGES: release 0.25.0 --- CHANGES.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 8b96bbbf3..d4da70e96 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,18 +2,18 @@ Borg Changelog ============== -Version 0.25.0 (not released yet) ---------------------------------- +Version 0.25.0 +-------------- Compatibility notes: +- lz4 compression library (liblz4) is a new requirement (#156) - the new compression code is very compatible: as long as you stay with zlib compression, older borg releases will still be able to read data from a repo/archive made with the new code (note: this is not the case for the default "none" compression, use "zlib,0" if you want a "no compression" mode that can be read by older borg). Also the new code is able to read repos and archives made with older borg versions (for all zlib levels 0..9). -- lz4 compression library (liblz4) is a new requirement (#156) Deprecations: From f7210c749f8ed6db1c46a37e1ca4b7f4bf418bf3 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 29 Aug 2015 23:34:58 +0200 Subject: [PATCH 121/142] remove cpu intensive compression methods for the chunks.archive also remove the comment about how good xz compresses - while that was true for smaller index files, it seems to be less effective with bigger ones. maybe just an issue with compression dict size. --- borg/cache.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/borg/cache.py b/borg/cache.py index 2391be275..207fb58a6 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -213,9 +213,6 @@ class Cache: so it has complete and current information about all backup archives. Finally, it builds the master chunks index by merging all indices from the tar. - - Note: compression (esp. xz) is very effective in keeping the tar - relatively small compared to the files it contains. """ in_archive_path = os.path.join(self.path, 'chunks.archive') out_archive_path = os.path.join(self.path, 'chunks.archive.tmp') @@ -234,8 +231,10 @@ class Cache: return tf def open_out_archive(): - for compression in ('xz', 'bz2', 'gz'): - # xz needs py 3.3, bz2 and gz also work on 3.2 + for compression in ('gz', ): + # 'xz' needs py 3.3 and is expensive on the cpu + # 'bz2' also works on 3.2 and is expensive on the cpu + # 'gz' also works on 3.2 and is less expensive on the cpu try: tf = tarfile.open(out_archive_path, 'w:'+compression, format=tarfile.PAX_FORMAT) break From 22dd925986f46615d4bb8b09830c493ae65ec896 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 30 Aug 2015 03:03:48 +0200 Subject: [PATCH 122/142] chunks index archive: remove all tar and compression related stuff and just use separate files in a directory the compression was quite cpu intensive and didn't work that great anyway. now the disk space usage is a bit higher, but it is much faster and less hard on the cpu. disk space needs grow linearly with the amount and size of the archives, this is a problem esp. if one has many and/or big archives (but this problem existed before also because compression was not as effective as I believed). the tar archive always needed a complete rebuild (and thus: decompression and recompression) because deleting outdated archive indexes was not possible in the tar file. now we just have a directory chunks.archive.d and keep archive index files there for all archives we already know. if an archive does not exist any more in the repo, we just delete its index file. if an archive is unknown still, we fetch the infos and build a new index file. when merging, we avoid growing the hash table from zero, but just start with the first archive's index as basis for merging. --- borg/cache.py | 185 ++++++++++++++++++++++---------------------------- 1 file changed, 82 insertions(+), 103 deletions(-) diff --git a/borg/cache.py b/borg/cache.py index 207fb58a6..65e64af5b 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -96,8 +96,7 @@ class Cache: with open(os.path.join(self.path, 'config'), 'w') as fd: config.write(fd) ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8')) - with open(os.path.join(self.path, 'chunks.archive'), 'wb') as fd: - pass # empty file + os.makedirs(os.path.join(self.path, 'chunks.archive.d')) with open(os.path.join(self.path, 'files'), 'wb') as fd: pass # empty file @@ -153,7 +152,6 @@ class Cache: os.mkdir(txn_dir) shutil.copy(os.path.join(self.path, 'config'), txn_dir) shutil.copy(os.path.join(self.path, 'chunks'), txn_dir) - shutil.copy(os.path.join(self.path, 'chunks.archive'), txn_dir) shutil.copy(os.path.join(self.path, 'files'), txn_dir) os.rename(os.path.join(self.path, 'txn.tmp'), os.path.join(self.path, 'txn.active')) @@ -195,7 +193,6 @@ class Cache: if os.path.exists(txn_dir): shutil.copy(os.path.join(txn_dir, 'config'), self.path) shutil.copy(os.path.join(txn_dir, 'chunks'), self.path) - shutil.copy(os.path.join(txn_dir, 'chunks.archive'), self.path) shutil.copy(os.path.join(txn_dir, 'files'), self.path) os.rename(txn_dir, os.path.join(self.path, 'txn.tmp')) if os.path.exists(os.path.join(self.path, 'txn.tmp')): @@ -206,53 +203,14 @@ class Cache: def sync(self): """Re-synchronize chunks cache with repository. - If present, uses a compressed tar archive of known backup archive - indices, so it only needs to fetch infos from repo and build a chunk - index once per backup archive. - If out of sync, the tar gets rebuilt from known + fetched chunk infos, - so it has complete and current information about all backup archives. - Finally, it builds the master chunks index by merging all indices from - the tar. + Maintains a directory with known backup archive indexes, so it only + needs to fetch infos from repo and build a chunk index once per backup + archive. + If out of sync, missing archive indexes get added, outdated indexes + get removed and a new master chunks index is built by merging all + archive indexes. """ - in_archive_path = os.path.join(self.path, 'chunks.archive') - out_archive_path = os.path.join(self.path, 'chunks.archive.tmp') - - def open_in_archive(): - try: - tf = tarfile.open(in_archive_path, 'r') - except OSError as e: - if e.errno != errno.ENOENT: - raise - # file not found - tf = None - except tarfile.ReadError: - # empty file? - tf = None - return tf - - def open_out_archive(): - for compression in ('gz', ): - # 'xz' needs py 3.3 and is expensive on the cpu - # 'bz2' also works on 3.2 and is expensive on the cpu - # 'gz' also works on 3.2 and is less expensive on the cpu - try: - tf = tarfile.open(out_archive_path, 'w:'+compression, format=tarfile.PAX_FORMAT) - break - except tarfile.CompressionError: - continue - else: # shouldn't happen - tf = None - return tf - - def close_archive(tf): - if tf: - tf.close() - - def delete_in_archive(): - os.unlink(in_archive_path) - - def rename_out_archive(): - os.rename(out_archive_path, in_archive_path) + archive_path = os.path.join(self.path, 'chunks.archive.d') def add(chunk_idx, id, size, csize, incr=1): try: @@ -261,16 +219,21 @@ class Cache: except KeyError: chunk_idx[id] = incr, size, csize - def transfer_known_idx(archive_id, tf_in, tf_out): - archive_id_hex = hexlify(archive_id).decode('ascii') - tarinfo = tf_in.getmember(archive_id_hex) - archive_name = tarinfo.pax_headers['archive_name'] - print('Already known archive:', archive_name) - f_in = tf_in.extractfile(archive_id_hex) - tf_out.addfile(tarinfo, f_in) - return archive_name + def mkpath(id, suffix=''): + path = os.path.join(archive_path, id + suffix) + return path.encode('utf-8') - def fetch_and_build_idx(archive_id, repository, key, tmp_dir, tf_out): + def list_archives(): + fns = os.listdir(archive_path) + # only return filenames that are 64 hex digits (256bit) + return [fn for fn in fns if len(fn) == 64] + + def cleanup_outdated(ids): + for id in ids: + id_hex = hexlify(id).decode('ascii') + os.unlink(mkpath(id_hex)) + + def fetch_and_build_idx(archive_id, repository, key): chunk_idx = ChunkIndex() cdata = repository.get(archive_id) data = key.decrypt(archive_id, cdata) @@ -293,55 +256,71 @@ class Cache: for chunk_id, size, csize in item[b'chunks']: add(chunk_idx, chunk_id, size, csize) archive_id_hex = hexlify(archive_id).decode('ascii') - file_tmp = os.path.join(tmp_dir, archive_id_hex).encode('utf-8') - chunk_idx.write(file_tmp) - tarinfo = tf_out.gettarinfo(file_tmp, archive_id_hex) - tarinfo.pax_headers['archive_name'] = archive[b'name'] - with open(file_tmp, 'rb') as f: - tf_out.addfile(tarinfo, f) - os.unlink(file_tmp) + fn = mkpath(archive_id_hex) + fn_tmp = mkpath(archive_id_hex, suffix='.tmp') + try: + chunk_idx.write(fn_tmp) + except Exception: + os.unlink(fn_tmp) + else: + os.rename(fn_tmp, fn) - def create_master_idx(chunk_idx, tf_in, tmp_dir): + def create_master_idx(chunk_idx): + # deallocates old hashindex, creates empty hashindex: chunk_idx.clear() - for tarinfo in tf_in: - archive_id_hex = tarinfo.name - archive_name = tarinfo.pax_headers['archive_name'] - print("- extracting archive %s ..." % archive_name) - tf_in.extract(archive_id_hex, tmp_dir) - chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8') - print("- reading archive ...") - archive_chunk_idx = ChunkIndex.read(chunk_idx_path) - print("- merging archive ...") - chunk_idx.merge(archive_chunk_idx) - os.unlink(chunk_idx_path) + archives = list_archives() + if archives: + chunk_idx = None + for fn in archives: + archive_id_hex = fn + archive_id = unhexlify(archive_id_hex) + for name, info in self.manifest.archives.items(): + if info[b'id'] == archive_id: + archive_name = name + break + archive_chunk_idx_path = mkpath(archive_id_hex) + print("- reading archive %s ..." % archive_name) + archive_chunk_idx = ChunkIndex.read(archive_chunk_idx_path) + print("- merging archive ...") + if chunk_idx is None: + # we just use the first archive's idx as starting point, + # to avoid growing the hash table from 0 size and also + # to save 1 merge call. + chunk_idx = archive_chunk_idx + else: + chunk_idx.merge(archive_chunk_idx) + return chunk_idx + + def legacy_support(): + try: + # get rid of the compressed tar file, if present + os.unlink(os.path.join(self.path, 'chunks.archive')) + except: + pass + try: + # create the directory for the archive index files we use now + os.mkdir(archive_path) + except: + pass + self.begin_txn() print('Synchronizing chunks cache...') - # XXX we have to do stuff on disk due to lacking ChunkIndex api - with tempfile.TemporaryDirectory(prefix='borg-tmp') as tmp_dir: - repository = cache_if_remote(self.repository) - out_archive = open_out_archive() - in_archive = open_in_archive() - if in_archive: - known_ids = set(unhexlify(hexid) for hexid in in_archive.getnames()) - else: - known_ids = set() - archive_ids = set(info[b'id'] for info in self.manifest.archives.values()) - print('Rebuilding archive collection. Known: %d Repo: %d Unknown: %d' % ( - len(known_ids), len(archive_ids), len(archive_ids - known_ids), )) - for archive_id in archive_ids & known_ids: - transfer_known_idx(archive_id, in_archive, out_archive) - close_archive(in_archive) - delete_in_archive() # free disk space - for archive_id in archive_ids - known_ids: - fetch_and_build_idx(archive_id, repository, self.key, tmp_dir, out_archive) - close_archive(out_archive) - rename_out_archive() - print('Merging collection into master chunks cache...') - in_archive = open_in_archive() - create_master_idx(self.chunks, in_archive, tmp_dir) - close_archive(in_archive) - print('Done.') + repository = cache_if_remote(self.repository) + legacy_support() + known_ids = set(unhexlify(hexid) for hexid in list_archives()) + archive_ids = set(info[b'id'] for info in self.manifest.archives.values()) + print('Rebuilding archive collection. Repo: %d Known: %d Outdated: %d Unknown: %d' % ( + len(archive_ids), len(known_ids), + len(known_ids - archive_ids), len(archive_ids - known_ids), )) + cleanup_outdated(known_ids - archive_ids) + for archive_id in archive_ids - known_ids: + fetch_and_build_idx(archive_id, repository, self.key) + known_ids = set(unhexlify(hexid) for hexid in list_archives()) + assert known_ids == archive_ids + print('Merging collection into master chunks cache...') + self.chunks = create_master_idx(self.chunks) + print('Done.') def add_chunk(self, id, data, stats): if not self.txn_active: From 54ccbc5ae26c0b11c804a635955f2fa5953462f5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 30 Aug 2015 15:15:15 +0200 Subject: [PATCH 123/142] chunks index resync: do all in one pass if we do not have a cached archive index: fetch and build and merge it if we have one: merge it --- borg/cache.py | 100 +++++++++++++++++++++++++------------------------- 1 file changed, 51 insertions(+), 49 deletions(-) diff --git a/borg/cache.py b/borg/cache.py index 65e64af5b..13f80f325 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -212,6 +212,23 @@ class Cache: """ archive_path = os.path.join(self.path, 'chunks.archive.d') + def mkpath(id, suffix=''): + id_hex = hexlify(id).decode('ascii') + path = os.path.join(archive_path, id_hex + suffix) + return path.encode('utf-8') + + def cached_archives(): + fns = os.listdir(archive_path) + # filenames with 64 hex digits == 256bit + return set(unhexlify(fn) for fn in fns if len(fn) == 64) + + def repo_archives(): + return set(info[b'id'] for info in self.manifest.archives.values()) + + def cleanup_outdated(ids): + for id in ids: + os.unlink(mkpath(id)) + def add(chunk_idx, id, size, csize, incr=1): try: count, size, csize = chunk_idx[id] @@ -219,20 +236,6 @@ class Cache: except KeyError: chunk_idx[id] = incr, size, csize - def mkpath(id, suffix=''): - path = os.path.join(archive_path, id + suffix) - return path.encode('utf-8') - - def list_archives(): - fns = os.listdir(archive_path) - # only return filenames that are 64 hex digits (256bit) - return [fn for fn in fns if len(fn) == 64] - - def cleanup_outdated(ids): - for id in ids: - id_hex = hexlify(id).decode('ascii') - os.unlink(mkpath(id_hex)) - def fetch_and_build_idx(archive_id, repository, key): chunk_idx = ChunkIndex() cdata = repository.get(archive_id) @@ -242,7 +245,6 @@ class Cache: if archive[b'version'] != 1: raise Exception('Unknown archive metadata version') decode_dict(archive, (b'name',)) - print('Analyzing new archive:', archive[b'name']) unpacker = msgpack.Unpacker() for item_id, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])): data = key.decrypt(item_id, chunk) @@ -255,33 +257,43 @@ class Cache: if b'chunks' in item: for chunk_id, size, csize in item[b'chunks']: add(chunk_idx, chunk_id, size, csize) - archive_id_hex = hexlify(archive_id).decode('ascii') - fn = mkpath(archive_id_hex) - fn_tmp = mkpath(archive_id_hex, suffix='.tmp') + fn = mkpath(archive_id) + fn_tmp = mkpath(archive_id, suffix='.tmp') try: chunk_idx.write(fn_tmp) except Exception: os.unlink(fn_tmp) else: os.rename(fn_tmp, fn) + return chunk_idx + + def lookup_name(archive_id): + for name, info in self.manifest.archives.items(): + if info[b'id'] == archive_id: + return name def create_master_idx(chunk_idx): + print('Synchronizing chunks cache...') + cached_ids = cached_archives() + archive_ids = repo_archives() + print('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.' % ( + len(archive_ids), len(cached_ids), + len(cached_ids - archive_ids), len(archive_ids - cached_ids), )) # deallocates old hashindex, creates empty hashindex: chunk_idx.clear() - archives = list_archives() - if archives: + cleanup_outdated(cached_ids - archive_ids) + if archive_ids: chunk_idx = None - for fn in archives: - archive_id_hex = fn - archive_id = unhexlify(archive_id_hex) - for name, info in self.manifest.archives.items(): - if info[b'id'] == archive_id: - archive_name = name - break - archive_chunk_idx_path = mkpath(archive_id_hex) - print("- reading archive %s ..." % archive_name) - archive_chunk_idx = ChunkIndex.read(archive_chunk_idx_path) - print("- merging archive ...") + for archive_id in archive_ids: + archive_name = lookup_name(archive_id) + if archive_id in cached_ids: + archive_chunk_idx_path = mkpath(archive_id) + print("Reading cached archive chunk index for %s ..." % archive_name) + archive_chunk_idx = ChunkIndex.read(archive_chunk_idx_path) + else: + print('Fetching and building archive index for %s ...' % archive_name) + archive_chunk_idx = fetch_and_build_idx(archive_id, repository, self.key) + print("Merging into master chunks index ...") if chunk_idx is None: # we just use the first archive's idx as starting point, # to avoid growing the hash table from 0 size and also @@ -289,38 +301,28 @@ class Cache: chunk_idx = archive_chunk_idx else: chunk_idx.merge(archive_chunk_idx) + print('Done.') return chunk_idx - def legacy_support(): + def legacy_cleanup(): + """bring old cache dirs into the desired state (cleanup and adapt)""" try: - # get rid of the compressed tar file, if present os.unlink(os.path.join(self.path, 'chunks.archive')) except: pass try: - # create the directory for the archive index files we use now + os.unlink(os.path.join(self.path, 'chunks.archive.tmp')) + except: + pass + try: os.mkdir(archive_path) except: pass - self.begin_txn() - print('Synchronizing chunks cache...') repository = cache_if_remote(self.repository) - legacy_support() - known_ids = set(unhexlify(hexid) for hexid in list_archives()) - archive_ids = set(info[b'id'] for info in self.manifest.archives.values()) - print('Rebuilding archive collection. Repo: %d Known: %d Outdated: %d Unknown: %d' % ( - len(archive_ids), len(known_ids), - len(known_ids - archive_ids), len(archive_ids - known_ids), )) - cleanup_outdated(known_ids - archive_ids) - for archive_id in archive_ids - known_ids: - fetch_and_build_idx(archive_id, repository, self.key) - known_ids = set(unhexlify(hexid) for hexid in list_archives()) - assert known_ids == archive_ids - print('Merging collection into master chunks cache...') + legacy_cleanup() self.chunks = create_master_idx(self.chunks) - print('Done.') def add_chunk(self, id, data, stats): if not self.txn_active: From 0b1035746e3382663b24308ea2b53e8a3f426dc0 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 00:29:46 +0200 Subject: [PATCH 124/142] read special files as if they were regular files, update docs, closes #79 do not use the files cache for such special files --- borg/archiver.py | 14 ++++++++++---- borg/cache.py | 5 +++-- docs/usage.rst | 10 ++++++++++ 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 8cce07b8b..e0e7a94f6 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -145,7 +145,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") continue else: restrict_dev = None - self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev) + self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev, + read_special=args.read_special) archive.save(timestamp=args.timestamp) if args.progress: archive.stats.show_progress(final=True) @@ -163,7 +164,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") print('-' * 78) return self.exit_code - def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev): + def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev, + read_special=False): if exclude_path(path, excludes): return try: @@ -180,7 +182,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") # Ignore if nodump flag is set if has_lchflags and (st.st_flags & stat.UF_NODUMP): return - if stat.S_ISREG(st.st_mode): + if (stat.S_ISREG(st.st_mode) or + read_special and not stat.S_ISDIR(st.st_mode)): try: status = archive.process_file(path, st, cache) except IOError as e: @@ -197,7 +200,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") for filename in sorted(entries): entry_path = os.path.normpath(os.path.join(path, filename)) self._process(archive, cache, excludes, exclude_caches, skip_inodes, - entry_path, restrict_dev) + entry_path, restrict_dev, read_special=read_special) elif stat.S_ISLNK(st.st_mode): status = archive.process_symlink(path, st) elif stat.S_ISFIFO(st.st_mode): @@ -687,6 +690,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") 'zlib,0 .. zlib,9 == zlib (with level 0..9), ' 'lzma == lzma (default level 6), ' 'lzma,0 .. lzma,9 == lzma (with level 0..9).') + subparser.add_argument('--read-special', dest='read_special', + action='store_true', default=False, + help='open and read special files as if they were regular files') subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to create') diff --git a/borg/cache.py b/borg/cache.py index 2391be275..639ffc279 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -3,6 +3,7 @@ from .remote import cache_if_remote import errno import msgpack import os +import stat import sys from binascii import hexlify import shutil @@ -381,7 +382,7 @@ class Cache: stats.update(-size, -csize, False) def file_known_and_unchanged(self, path_hash, st): - if not self.do_files: + if not (self.do_files and stat.S_ISREG(st.st_mode)): return None if self.files is None: self._read_files() @@ -398,7 +399,7 @@ class Cache: return None def memorize_file(self, path_hash, st, ids): - if not self.do_files: + if not (self.do_files and stat.S_ISREG(st.st_mode)): return # Entry: Age, inode, size, mtime, chunk ids mtime_ns = st_mtime_ns(st) diff --git a/docs/usage.rst b/docs/usage.rst index 8595ca7f8..27c258504 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -210,6 +210,11 @@ Examples # Even slower, even higher compression (N = 0..9) $ borg create --compression lzma,N /mnt/backup::repo ~ + # Backup some LV snapshots (you have to create the snapshots before this + # and remove them afterwards). We also backup the output of lvdisplay so + # we can see the LV sizes at restore time. See also "borg extract" examples. + $ lvdisplay > lvdisplay.txt + $ borg create --read-special /mnt/backup::repo lvdisplay.txt /dev/vg0/*-snapshot .. include:: usage/extract.rst.inc @@ -229,6 +234,11 @@ Examples # Extract the "src" directory but exclude object files $ borg extract /mnt/backup::my-files home/USERNAME/src --exclude '*.o' + # Restore LV snapshots (the target LVs /dev/vg0/* of correct size have + # to be already available and will be overwritten by this command!) + $ borg extract --stdout /mnt/backup::repo dev/vg0/root-snapshot > /dev/vg0/root + $ borg extract --stdout /mnt/backup::repo dev/vg0/home-snapshot > /dev/vg0/home + Note: currently, extract always writes into the current working directory ("."), so make sure you ``cd`` to the right place before calling ``borg extract``. From a912c027573ea031094de91827434e64bb0a3675 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 01:10:43 +0200 Subject: [PATCH 125/142] detect inconsistency / corruption / hash collision, closes #170 added a check that compares the size of the new chunk with the stored size of the already existing chunk in storage that has the same id_hash value. raise an exception if there is a size mismatch. this could happen if: - the stored size is somehow incorrect (corruption or software bug) - we found a hash collision for the id_hash (for sha256, this is very unlikely) --- borg/cache.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/borg/cache.py b/borg/cache.py index 2391be275..def181629 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -347,9 +347,9 @@ class Cache: def add_chunk(self, id, data, stats): if not self.txn_active: self.begin_txn() - if self.seen_chunk(id): - return self.chunk_incref(id, stats) size = len(data) + if self.seen_chunk(id, size): + return self.chunk_incref(id, stats) data = self.key.encrypt(data) csize = len(data) self.repository.put(id, data, wait=False) @@ -357,8 +357,14 @@ class Cache: stats.update(size, csize, True) return id, size, csize - def seen_chunk(self, id): - return self.chunks.get(id, (0, 0, 0))[0] + def seen_chunk(self, id, size=None): + refcount, stored_size, _ = self.chunks.get(id, (0, None, None)) + if size is not None and stored_size is not None and size != stored_size: + # we already have a chunk with that id, but different size. + # this is either a hash collision (unlikely) or corruption or a bug. + raise Exception("chunk has same id [%r], but different size (stored: %d new: %d)!" % ( + id, stored_size, size)) + return refcount def chunk_incref(self, id, stats): if not self.txn_active: From b3f5231bac387741aad790b6840697a24695d61c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 18:18:24 +0200 Subject: [PATCH 126/142] BORG_REPO env var support sets the default repository to use, e.g. like: export BORG_REPO=/mnt/backup/repo borg init borg create ::archive borg list borg mount :: /mnt fusermount -u /mnt borg delete ::archive --- borg/archiver.py | 15 +++-- borg/helpers.py | 25 ++++++- borg/testsuite/helpers.py | 133 +++++++++++++++++++++++++++++--------- docs/usage.rst | 12 ++-- 4 files changed, 141 insertions(+), 44 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 8cce07b8b..6a1e40b7e 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -18,7 +18,7 @@ from .compress import Compressor, COMPR_BUFFER from .repository import Repository from .cache import Cache from .key import key_creator -from .helpers import Error, location_validator, format_time, format_file_size, \ +from .helpers import Error, location_validator, Location, format_time, format_file_size, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ @@ -556,7 +556,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") description=self.do_init.__doc__, epilog=init_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) subparser.set_defaults(func=self.do_init) - subparser.add_argument('repository', metavar='REPOSITORY', + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default=Location(), type=location_validator(archive=False), help='repository to create') subparser.add_argument('-e', '--encryption', dest='encryption', @@ -604,7 +604,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") epilog=check_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) subparser.set_defaults(func=self.do_check) - subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE', + subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default=Location(), type=location_validator(), help='repository or archive to check consistency of') subparser.add_argument('--repository-only', dest='repo_only', action='store_true', @@ -629,7 +629,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") epilog=change_passphrase_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) subparser.set_defaults(func=self.do_change_passphrase) - subparser.add_argument('repository', metavar='REPOSITORY', + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default=Location(), type=location_validator(archive=False)) create_epilog = textwrap.dedent(""" @@ -760,7 +760,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('-s', '--stats', dest='stats', action='store_true', default=False, help='print statistics for the deleted archive') - subparser.add_argument('target', metavar='TARGET', + subparser.add_argument('target', metavar='TARGET', nargs='?', default=Location(), type=location_validator(), help='archive or repository to delete') @@ -775,7 +775,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('--short', dest='short', action='store_true', default=False, help='only print file/directory names, nothing else') - subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', type=location_validator(), + subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default=Location(), + type=location_validator(), help='repository/archive to list contents of') mount_epilog = textwrap.dedent(""" This command mounts an archive as a FUSE filesystem. This can be useful for @@ -858,7 +859,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") help='number of yearly archives to keep') subparser.add_argument('-p', '--prefix', dest='prefix', type=str, help='only consider archive names starting with this prefix') - subparser.add_argument('repository', metavar='REPOSITORY', + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default=Location(), type=location_validator(archive=False), help='repository to prune') diff --git a/borg/helpers.py b/borg/helpers.py index 6d2b81736..aa5bead0b 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -466,13 +466,34 @@ class Location: r'(?P[^:]+)(?:::(?P.+))?$') scp_re = re.compile(r'((?:(?P[^@]+)@)?(?P[^:/]+):)?' r'(?P[^:]+)(?:::(?P.+))?$') + # get the repo from BORG_RE env and the optional archive from param. + # if the syntax requires giving REPOSITORY (see "borg mount"), + # use "::" to let it use the env var. + # if REPOSITORY argument is optional, it'll automatically use the env. + env_re = re.compile(r'(?:::(?P.+)?)?$') - def __init__(self, text): + def __init__(self, text=''): self.orig = text - if not self.parse(text): + if not self.parse(self.orig): raise ValueError def parse(self, text): + valid = self._parse(text) + if valid: + return True + m = self.env_re.match(text) + if not m: + return False + repo = os.environ.get('BORG_REPO') + if repo is None: + return False + valid = self._parse(repo) + if not valid: + return False + self.archive = m.group('archive') + return True + + def _parse(self, text): m = self.ssh_re.match(text) if m: self.proto = m.group('proto') diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index b61a8268f..95531df83 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -23,42 +23,115 @@ class BigIntTestCase(BaseTestCase): self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70) -class LocationTestCase(BaseTestCase): +class TestLocationWithoutEnv: + def test_ssh(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + assert repr(Location('ssh://user@host:1234/some/path::archive')) == \ + "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')" + assert repr(Location('ssh://user@host:1234/some/path')) == \ + "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive=None)" - def test(self): - self.assert_equal( - repr(Location('ssh://user@host:1234/some/path::archive')), - "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')" - ) - self.assert_equal( - repr(Location('file:///some/path::archive')), - "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')" - ) - self.assert_equal( - repr(Location('user@host:/some/path::archive')), - "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')" - ) - self.assert_equal( - repr(Location('path::archive')), - "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')" - ) - self.assert_equal( - repr(Location('/some/absolute/path::archive')), - "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')" - ) - self.assert_equal( - repr(Location('some/relative/path::archive')), - "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')" - ) - self.assert_raises(ValueError, lambda: Location('ssh://localhost:22/path:archive')) + def test_file(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + assert repr(Location('file:///some/path::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')" + assert repr(Location('file:///some/path')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive=None)" - def test_canonical_path(self): + def test_scp(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + assert repr(Location('user@host:/some/path::archive')) == \ + "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')" + assert repr(Location('user@host:/some/path')) == \ + "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)" + + def test_folder(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + assert repr(Location('path::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')" + assert repr(Location('path')) == \ + "Location(proto='file', user=None, host=None, port=None, path='path', archive=None)" + + def test_abspath(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + assert repr(Location('/some/absolute/path::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')" + assert repr(Location('/some/absolute/path')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive=None)" + + def test_relpath(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + assert repr(Location('some/relative/path::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')" + assert repr(Location('some/relative/path')) == \ + "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive=None)" + + def test_underspecified(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + with pytest.raises(ValueError): + Location('::archive') + with pytest.raises(ValueError): + Location('::') + with pytest.raises(ValueError): + Location() + + def test_no_double_colon(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) + with pytest.raises(ValueError): + Location('ssh://localhost:22/path:archive') + + def test_canonical_path(self, monkeypatch): + monkeypatch.delenv('BORG_REPO', raising=False) locations = ['some/path::archive', 'file://some/path::archive', 'host:some/path::archive', 'host:~user/some/path::archive', 'ssh://host/some/path::archive', 'ssh://user@host:1234/some/path::archive'] for location in locations: - self.assert_equal(Location(location).canonical_path(), - Location(Location(location).canonical_path()).canonical_path()) + assert Location(location).canonical_path() == \ + Location(Location(location).canonical_path()).canonical_path() + + +class TestLocationWithEnv: + def test_ssh(self, monkeypatch): + monkeypatch.setenv('BORG_REPO', 'ssh://user@host:1234/some/path') + assert repr(Location('::archive')) == \ + "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive='archive')" + assert repr(Location()) == \ + "Location(proto='ssh', user='user', host='host', port=1234, path='/some/path', archive=None)" + + def test_file(self, monkeypatch): + monkeypatch.setenv('BORG_REPO', 'file:///some/path') + assert repr(Location('::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive='archive')" + assert repr(Location()) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/path', archive=None)" + + def test_scp(self, monkeypatch): + monkeypatch.setenv('BORG_REPO', 'user@host:/some/path') + assert repr(Location('::archive')) == \ + "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive='archive')" + assert repr(Location()) == \ + "Location(proto='ssh', user='user', host='host', port=None, path='/some/path', archive=None)" + + def test_folder(self, monkeypatch): + monkeypatch.setenv('BORG_REPO', 'path') + assert repr(Location('::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='path', archive='archive')" + assert repr(Location()) == \ + "Location(proto='file', user=None, host=None, port=None, path='path', archive=None)" + + def test_abspath(self, monkeypatch): + monkeypatch.setenv('BORG_REPO', '/some/absolute/path') + assert repr(Location('::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive='archive')" + assert repr(Location()) == \ + "Location(proto='file', user=None, host=None, port=None, path='/some/absolute/path', archive=None)" + + def test_relpath(self, monkeypatch): + monkeypatch.setenv('BORG_REPO', 'some/relative/path') + assert repr(Location('::archive')) == \ + "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive='archive')" + assert repr(Location()) == \ + "Location(proto='file', user=None, host=None, port=None, path='some/relative/path', archive=None)" class FormatTimedeltaTestCase(BaseTestCase): diff --git a/docs/usage.rst b/docs/usage.rst index 8595ca7f8..882ba469b 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -41,9 +41,15 @@ Environment Variables |project_name| uses some environment variables for automation: -Specifying a passphrase: +General: + BORG_REPO + When set, use the value to give the default repository location. If a command needs an archive + parameter, you can abbreviate as `::archive`. If a command needs a repository parameter, you + can either leave it away or abbreviate as `::`, if a positional parameter is required. BORG_PASSPHRASE When set, use the value to answer the passphrase question for encrypted repositories. + TMPDIR + where temporary files are stored (might need a lot of temporary space for some operations) Some "yes" sayers (if set, they automatically confirm that you really want to do X even if there is that warning): BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK @@ -64,10 +70,6 @@ Building: BORG_OPENSSL_PREFIX Adds given OpenSSL header file directory to the default locations (setup.py). -General: - TMPDIR - where temporary files are stored (might need a lot of temporary space for some operations) - Please note: From 817ce18bc6c1f9507e4ce70169d73bf7ae7769e2 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 20:19:28 +0200 Subject: [PATCH 127/142] fix repository arg default --- borg/archiver.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 6a1e40b7e..54389d5e7 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -18,7 +18,7 @@ from .compress import Compressor, COMPR_BUFFER from .repository import Repository from .cache import Cache from .key import key_creator -from .helpers import Error, location_validator, Location, format_time, format_file_size, \ +from .helpers import Error, location_validator, format_time, format_file_size, \ format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \ get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ @@ -556,7 +556,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") description=self.do_init.__doc__, epilog=init_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) subparser.set_defaults(func=self.do_init) - subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default=Location(), + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', type=location_validator(archive=False), help='repository to create') subparser.add_argument('-e', '--encryption', dest='encryption', @@ -604,7 +604,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") epilog=check_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) subparser.set_defaults(func=self.do_check) - subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default=Location(), + subparser.add_argument('repository', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='', type=location_validator(), help='repository or archive to check consistency of') subparser.add_argument('--repository-only', dest='repo_only', action='store_true', @@ -629,7 +629,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") epilog=change_passphrase_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) subparser.set_defaults(func=self.do_change_passphrase) - subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default=Location(), + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', type=location_validator(archive=False)) create_epilog = textwrap.dedent(""" @@ -760,7 +760,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('-s', '--stats', dest='stats', action='store_true', default=False, help='print statistics for the deleted archive') - subparser.add_argument('target', metavar='TARGET', nargs='?', default=Location(), + subparser.add_argument('target', metavar='TARGET', nargs='?', default='', type=location_validator(), help='archive or repository to delete') @@ -775,7 +775,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('--short', dest='short', action='store_true', default=False, help='only print file/directory names, nothing else') - subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default=Location(), + subparser.add_argument('src', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='', type=location_validator(), help='repository/archive to list contents of') mount_epilog = textwrap.dedent(""" @@ -859,7 +859,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") help='number of yearly archives to keep') subparser.add_argument('-p', '--prefix', dest='prefix', type=str, help='only consider archive names starting with this prefix') - subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default=Location(), + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', type=location_validator(archive=False), help='repository to prune') From f5069c4e812a874ef9e742a9c3712b7ba7ca15c7 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 21:11:52 +0200 Subject: [PATCH 128/142] fix reaction to "no" answer at delete repo prompt, fixes #182 --- borg/archiver.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 8cce07b8b..0f931cd39 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -296,10 +296,11 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") print("You requested to completely DELETE the repository *including* all archives it contains:") for archive_info in manifest.list_archive_infos(sort_by='ts'): print(format_archive(archive_info)) - while not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'): + if not os.environ.get('BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'): print("""Type "YES" if you understand this and want to continue.\n""") - if input('Do you want to continue? ') == 'YES': - break + if input('Do you want to continue? ') != 'YES': + self.exit_code = 1 + return self.exit_code repository.destroy() cache.destroy() print("Repository and corresponding cache were deleted.") From e244fe2f69288a01ae4aff2a5cef9ac6564ca21b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 22:06:52 +0200 Subject: [PATCH 129/142] change 2 more chunker vars to off_t so they get 64bit on 32bit platforms. --- borg/_chunker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/_chunker.c b/borg/_chunker.c index 23abf1e6c..b817775b0 100644 --- a/borg/_chunker.c +++ b/borg/_chunker.c @@ -130,7 +130,7 @@ static int chunker_fill(Chunker *c) { ssize_t n; - size_t offset, length; + off_t offset, length; PyObject *data; memmove(c->data, c->data + c->last, c->position + c->remaining - c->last); c->position -= c->last; From 13f20647dcad97f2726fa2fac8ebcad92a9a19df Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 23:26:47 +0200 Subject: [PATCH 130/142] use absolute path, attic issue #200, attic issue #137 the daemonize code changes the cwd, thus a relative repo path can't work. borg mount repo mnt # did not work borg mount --foreground repo mnt # did work borg mount /abs/path/repo mnt # did work --- borg/repository.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/borg/repository.py b/borg/repository.py index 559a87d87..f43161fb6 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -50,14 +50,14 @@ class Repository: """Object with key {} not found in repository {}.""" def __init__(self, path, create=False, exclusive=False): - self.path = path + self.path = os.path.abspath(path) self.io = None self.lock = None self.index = None self._active_txn = False if create: - self.create(path) - self.open(path, exclusive) + self.create(self.path) + self.open(self.path, exclusive) def __del__(self): self.close() From 16e5f241fca9d622ab38205f185906d1db4a871c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 6 Sep 2015 23:51:03 +0200 Subject: [PATCH 131/142] update CHANGES --- CHANGES.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index d4da70e96..eb7b93667 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,27 @@ Borg Changelog ============== +Version 0.26.0 (not released yet) +--------------------------------- + +New features: + +- BORG_REPO env var to specify the default repo, #168 +- read special files as if they were regular files, #79 + +Bug fixes: + +- borg mount repo: use absolute path, attic #200, attic #137 +- chunker: use off_t to get 64bit on 32bit platform, #178 +- initialize chunker fd to -1, so it's not equal to STDIN_FILENO (0) +- fix reaction to "no" answer at delete repo prompt, #182 + +Other changes: + +- detect inconsistency / corruption / hash collision, #170 +- replace versioneer with setuptools_scm, #106 + + Version 0.25.0 -------------- From 1aacdda4a409da803c722234f96fcc3043b72aef Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 8 Sep 2015 03:12:45 +0200 Subject: [PATCH 132/142] implement borg create --dry-run, attic issue #267 also: fix verbose mode display of stdin backup --- borg/archive.py | 1 + borg/archiver.py | 104 ++++++++++++++++++++++--------------- borg/testsuite/archiver.py | 8 +++ 3 files changed, 71 insertions(+), 42 deletions(-) diff --git a/borg/archive.py b/borg/archive.py index 18867dbd9..d6eff1ba9 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -455,6 +455,7 @@ class Archive: b'mtime': int_to_bigint(int(time.time()) * 1000000000) } self.add_item(item) + return 'i' # stdin def process_file(self, path, st, cache): status = None diff --git a/borg/archiver.py b/borg/archiver.py index 728b85482..fd6422781 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -102,17 +102,21 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") def do_create(self, args): """Create new archive""" + dry_run = args.dry_run t0 = datetime.now() - repository = self.open_repository(args.archive, exclusive=True) - manifest, key = Manifest.load(repository) - compr_args = dict(buffer=COMPR_BUFFER) - compr_args.update(args.compression) - key.compressor = Compressor(**compr_args) - cache = Cache(repository, key, manifest, do_files=args.cache_files) - archive = Archive(repository, key, manifest, args.archive.archive, cache=cache, - create=True, checkpoint_interval=args.checkpoint_interval, - numeric_owner=args.numeric_owner, progress=args.progress, - chunker_params=args.chunker_params) + if not dry_run: + repository = self.open_repository(args.archive, exclusive=True) + manifest, key = Manifest.load(repository) + compr_args = dict(buffer=COMPR_BUFFER) + compr_args.update(args.compression) + key.compressor = Compressor(**compr_args) + cache = Cache(repository, key, manifest, do_files=args.cache_files) + archive = Archive(repository, key, manifest, args.archive.archive, cache=cache, + create=True, checkpoint_interval=args.checkpoint_interval, + numeric_owner=args.numeric_owner, progress=args.progress, + chunker_params=args.chunker_params) + else: + archive = cache = None # Add cache dir to inode_skip list skip_inodes = set() try: @@ -130,11 +134,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") for path in args.paths: if path == '-': # stdin path = 'stdin' - self.print_verbose(path) - try: - archive.process_stdin(path, cache) - except IOError as e: - self.print_error('%s: %s', path, e) + if not dry_run: + try: + status = archive.process_stdin(path, cache) + except IOError as e: + self.print_error('%s: %s', path, e) + else: + status = '-' + self.print_verbose("%1s %s", status, path) continue path = os.path.normpath(path) if args.dontcross: @@ -146,26 +153,27 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") else: restrict_dev = None self._process(archive, cache, args.excludes, args.exclude_caches, skip_inodes, path, restrict_dev, - read_special=args.read_special) - archive.save(timestamp=args.timestamp) - if args.progress: - archive.stats.show_progress(final=True) - if args.stats: - t = datetime.now() - diff = t - t0 - print('-' * 78) - print('Archive name: %s' % args.archive.archive) - print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii')) - print('Start time: %s' % t0.strftime('%c')) - print('End time: %s' % t.strftime('%c')) - print('Duration: %s' % format_timedelta(diff)) - print('Number of files: %d' % archive.stats.nfiles) - archive.stats.print_('This archive:', cache) - print('-' * 78) + read_special=args.read_special, dry_run=dry_run) + if not dry_run: + archive.save(timestamp=args.timestamp) + if args.progress: + archive.stats.show_progress(final=True) + if args.stats: + t = datetime.now() + diff = t - t0 + print('-' * 78) + print('Archive name: %s' % args.archive.archive) + print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii')) + print('Start time: %s' % t0.strftime('%c')) + print('End time: %s' % t.strftime('%c')) + print('Duration: %s' % format_timedelta(diff)) + print('Number of files: %d' % archive.stats.nfiles) + archive.stats.print_('This archive:', cache) + print('-' * 78) return self.exit_code def _process(self, archive, cache, excludes, exclude_caches, skip_inodes, path, restrict_dev, - read_special=False): + read_special=False, dry_run=False): if exclude_path(path, excludes): return try: @@ -184,14 +192,16 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") return if (stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode)): - try: - status = archive.process_file(path, st, cache) - except IOError as e: - self.print_error('%s: %s', path, e) + if not dry_run: + try: + status = archive.process_file(path, st, cache) + except IOError as e: + self.print_error('%s: %s', path, e) elif stat.S_ISDIR(st.st_mode): if exclude_caches and is_cachedir(path): return - status = archive.process_dir(path, st) + if not dry_run: + status = archive.process_dir(path, st) try: entries = os.listdir(path) except OSError as e: @@ -200,13 +210,17 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") for filename in sorted(entries): entry_path = os.path.normpath(os.path.join(path, filename)) self._process(archive, cache, excludes, exclude_caches, skip_inodes, - entry_path, restrict_dev, read_special=read_special) + entry_path, restrict_dev, read_special=read_special, + dry_run=dry_run) elif stat.S_ISLNK(st.st_mode): - status = archive.process_symlink(path, st) + if not dry_run: + status = archive.process_symlink(path, st) elif stat.S_ISFIFO(st.st_mode): - status = archive.process_fifo(path, st) + if not dry_run: + status = archive.process_fifo(path, st) elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode): - status = archive.process_dev(path, st) + if not dry_run: + status = archive.process_dev(path, st) elif stat.S_ISSOCK(st.st_mode): # Ignore unix sockets return @@ -222,7 +236,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") # Note: A/M/U is relative to the "files" cache, not to the repo. # This would be an issue if the files cache is not used. if status is None: - status = '?' # need to add a status code somewhere + if not dry_run: + status = '?' # need to add a status code somewhere + else: + status = '-' # dry run, item was not backed up # output ALL the stuff - it can be easily filtered using grep. # even stuff considered unchanged might be interesting. self.print_verbose("%1s %s", status, remove_surrogates(path)) @@ -694,6 +711,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""") subparser.add_argument('--read-special', dest='read_special', action='store_true', default=False, help='open and read special files as if they were regular files') + subparser.add_argument('-n', '--dry-run', dest='dry_run', + action='store_true', default=False, + help='do not create a backup archive') subparser.add_argument('archive', metavar='ARCHIVE', type=location_validator(archive=True), help='archive to create') diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index e635d1b0c..95df90a0a 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -485,6 +485,14 @@ class ArchiverTestCase(ArchiverTestCaseBase): mode = os.stat(self.repository_path).st_mode self.assertEqual(stat.S_IMODE(mode), 0o700) + def test_create_dry_run(self): + self.cmd('init', self.repository_location) + self.cmd('create', '--dry-run', self.repository_location + '::test', 'input') + # Make sure no archive has been created + repository = Repository(self.repository_path) + manifest, key = Manifest.load(repository) + self.assert_equal(len(manifest.archives), 0) + def test_cmdline_compatibility(self): self.create_regular_file('file1', size=1024 * 80) self.cmd('init', self.repository_location) From d9fb1d2b03b58bccc1908c185b62346ee2677f79 Mon Sep 17 00:00:00 2001 From: Ed Blackman Date: Tue, 8 Sep 2015 23:33:34 -0400 Subject: [PATCH 133/142] Normalize paths before pattern matching on OS X The OS X file system HFS+ stores path names as Unicode, and converts them to a variant of Unicode NFD for storage. Because path names will always be in this canonical form, it's not friendly to require users to match this form exactly. Convert paths from the repository and patterns from the command line to NFD before comparing them. Unix (and Windows, I think) file systems don't convert path names into a canonical form, so users will continue to have to exactly match the path name they want, because there could be two paths with the same character visually that are actually composed of different byte sequences. --- borg/helpers.py | 43 +++++++++++++++--- borg/testsuite/helpers.py | 96 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 132 insertions(+), 7 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index aa5bead0b..ecf138125 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -7,6 +7,8 @@ import pwd import re import sys import time +import unicodedata + from datetime import datetime, timezone, timedelta from fnmatch import translate from operator import attrgetter @@ -220,6 +222,10 @@ def exclude_path(path, patterns): # unify the two cases, we add a path separator to the end of # the path before matching. +##### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +##### For discussion only, don't merge this code! +##### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + class IncludePattern: """Literal files or directories listed on the command line for some operations (e.g. extract, but not create). @@ -227,10 +233,22 @@ class IncludePattern: path match as well. A trailing slash makes no difference. """ def __init__(self, pattern): - self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep + def match(path): + return (path+os.path.sep).startswith(self.pattern) - def match(self, path): - return (path+os.path.sep).startswith(self.pattern) + # HFS+ converts paths to a canonical form, so users shouldn't be + # required to enter an exact match + if sys.platform in ('darwin',): + # repository paths will be mostly in NFD, as the OSX exception list + # to NFD is small, so normalize to that form for best performance + pattern = unicodedata.normalize("NFD", pattern) + self.match = lambda p: match(unicodedata.normalize("NFD", p)) + # Windows and Unix filesystems allow different forms, so users + # always have to enter an exact match + else: + self.match = match + + self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep def __repr__(self): return '%s(%s)' % (type(self), self.pattern) @@ -241,17 +259,30 @@ class ExcludePattern(IncludePattern): exclude the contents of a directory, but not the directory itself. """ def __init__(self, pattern): + def match(path): + return self.regex.match(path+os.path.sep) is not None + if pattern.endswith(os.path.sep): self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep else: self.pattern = os.path.normpath(pattern)+os.path.sep+'*' + + # HFS+ converts paths to a canonical form, so users shouldn't be + # required to enter an exact match + if sys.platform in ('darwin',): + # repository paths will be mostly in NFD, as the OSX exception list + # to NFD is small, so normalize to that form for best performance + self.pattern = unicodedata.normalize("NFD", self.pattern) + self.match = lambda p: match(unicodedata.normalize("NFD", p)) + # Windows and Unix filesystems allow different forms, so users + # always have to enter an exact match + else: + self.match = match + # fnmatch and re.match both cache compiled regular expressions. # Nevertheless, this is about 10 times faster. self.regex = re.compile(translate(self.pattern)) - def match(self, path): - return self.regex.match(path+os.path.sep) is not None - def __repr__(self): return '%s(%s)' % (type(self), self.pattern) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 95531df83..002033f57 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -3,9 +3,10 @@ from time import mktime, strptime from datetime import datetime, timezone, timedelta import pytest +import sys import msgpack -from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \ +from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, \ prune_within, prune_split, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams from . import BaseTestCase @@ -178,6 +179,99 @@ class PatternTestCase(BaseTestCase): ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']) +@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='all but OS X test') +class IncludePatternNonAsciiTestCase(BaseTestCase): + def testComposedUnicode(self): + pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' + i = IncludePattern(pattern) + + assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert not i.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testDecomposedUnicode(self): + pattern = 'ba\N{COMBINING ACUTE ACCENT}' + i = IncludePattern(pattern) + + assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testInvalidUnicode(self): + pattern = str(b'ba\x80', 'latin1') + i = IncludePattern(pattern) + + assert not i.match("ba/foo") + assert i.match(str(b"ba\x80/foo", 'latin1')) + + +@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='all but OS X test') +class ExcludePatternNonAsciiTestCase(BaseTestCase): + def testComposedUnicode(self): + pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' + e = ExcludePattern(pattern) + + assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert not e.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testDecomposedUnicode(self): + pattern = 'ba\N{COMBINING ACUTE ACCENT}' + e = ExcludePattern(pattern) + + assert not e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testInvalidUnicode(self): + pattern = str(b'ba\x80', 'latin1') + e = ExcludePattern(pattern) + + assert not e.match("ba/foo") + assert e.match(str(b"ba\x80/foo", 'latin1')) + +#@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='OS X only test') +class OSXPatternNormalizationTestCase(BaseTestCase): + # monkey patch sys.platform to allow testing on non-OSX during development + # remove and uncomment OSX-only decorator before push + def setUp(self): + self.oldplatform = sys.platform + sys.platform = 'darwin' + pass + + # monkey patch sys.platform to allow testing on non-OSX during development + # remove and uncomment OSX-only decorator before push + def tearDown(self): + sys.platform = self.oldplatform + pass + + def testComposedUnicode(self): + pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' + i = IncludePattern(pattern) + e = ExcludePattern(pattern) + + assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") + assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testDecomposedUnicode(self): + pattern = 'ba\N{COMBINING ACUTE ACCENT}' + i = IncludePattern(pattern) + e = ExcludePattern(pattern) + + assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") + assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") + + def testInvalidUnicode(self): + pattern = str(b'ba\x80', 'latin1') + i = IncludePattern(pattern) + e = ExcludePattern(pattern) + + assert not i.match("ba/foo") + assert i.match(str(b"ba\x80/foo", 'latin1')) + assert not e.match("ba/foo") + assert e.match(str(b"ba\x80/foo", 'latin1')) + + def test_compression_specs(): with pytest.raises(ValueError): CompressionSpec('') From d510ff7c63a4ad64f2c6a84e2af74092366136fa Mon Sep 17 00:00:00 2001 From: Ed Blackman Date: Wed, 9 Sep 2015 13:41:34 -0400 Subject: [PATCH 134/142] Merge non-ascii Include and ExcludePattern tests to parallel the OSX non-ascii tests --- borg/testsuite/helpers.py | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 002033f57..360695ba8 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -180,52 +180,38 @@ class PatternTestCase(BaseTestCase): @pytest.mark.skipif(sys.platform.startswith('darwin'), reason='all but OS X test') -class IncludePatternNonAsciiTestCase(BaseTestCase): +class PatternNonAsciiTestCase(BaseTestCase): def testComposedUnicode(self): pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' i = IncludePattern(pattern) + e = ExcludePattern(pattern) assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") assert not i.match("ba\N{COMBINING ACUTE ACCENT}/foo") - - def testDecomposedUnicode(self): - pattern = 'ba\N{COMBINING ACUTE ACCENT}' - i = IncludePattern(pattern) - - assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") - assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") - - def testInvalidUnicode(self): - pattern = str(b'ba\x80', 'latin1') - i = IncludePattern(pattern) - - assert not i.match("ba/foo") - assert i.match(str(b"ba\x80/foo", 'latin1')) - - -@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='all but OS X test') -class ExcludePatternNonAsciiTestCase(BaseTestCase): - def testComposedUnicode(self): - pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' - e = ExcludePattern(pattern) - assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") assert not e.match("ba\N{COMBINING ACUTE ACCENT}/foo") def testDecomposedUnicode(self): pattern = 'ba\N{COMBINING ACUTE ACCENT}' + i = IncludePattern(pattern) e = ExcludePattern(pattern) + assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") + assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo") assert not e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo") def testInvalidUnicode(self): pattern = str(b'ba\x80', 'latin1') + i = IncludePattern(pattern) e = ExcludePattern(pattern) + assert not i.match("ba/foo") + assert i.match(str(b"ba\x80/foo", 'latin1')) assert not e.match("ba/foo") assert e.match(str(b"ba\x80/foo", 'latin1')) + #@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='OS X only test') class OSXPatternNormalizationTestCase(BaseTestCase): # monkey patch sys.platform to allow testing on non-OSX during development From cc13f3db979300ab1ebc982106e1ad8074133bb7 Mon Sep 17 00:00:00 2001 From: Ed Blackman Date: Wed, 9 Sep 2015 13:48:46 -0400 Subject: [PATCH 135/142] Express non-ascii pattern platform skips better including correcting thinko in the commented-out OSX-only test --- borg/testsuite/helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 360695ba8..077c171b2 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -179,7 +179,7 @@ class PatternTestCase(BaseTestCase): ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']) -@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='all but OS X test') +@pytest.mark.skipif(sys.platform in ('darwin',), reason='all but OS X test') class PatternNonAsciiTestCase(BaseTestCase): def testComposedUnicode(self): pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' @@ -212,7 +212,7 @@ class PatternNonAsciiTestCase(BaseTestCase): assert e.match(str(b"ba\x80/foo", 'latin1')) -#@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='OS X only test') +#@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test') class OSXPatternNormalizationTestCase(BaseTestCase): # monkey patch sys.platform to allow testing on non-OSX during development # remove and uncomment OSX-only decorator before push From 13ddfdf4a3b64b109dde3a7ba5333a32e14be758 Mon Sep 17 00:00:00 2001 From: Ed Blackman Date: Wed, 9 Sep 2015 15:00:58 -0400 Subject: [PATCH 136/142] Move pattern normalization decision into decorator Using a decorator moves the duplicate code in the init methods into a single decorator method, while still retaining the same runtime overhead (zero for for the non-OSX path, one extra function call plus the call to unicodedata.normalize for OSX). The pattern classes are much visually cleaner, and duplicate code limited to two lines normalizing the pattern on OSX. Because the decoration happens at class init time (vs instance init time for the previous approach), the OSX and non-OSX test cases can no longer be called in the same run, so I also removed the OSX test case monkey patching and uncommented the platform skipif decorator. --- borg/helpers.py | 52 +++++++++++++++++++-------------------- borg/testsuite/helpers.py | 15 +---------- 2 files changed, 26 insertions(+), 41 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index ecf138125..0da9918f8 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -1,6 +1,7 @@ import argparse import binascii from collections import namedtuple +from functools import wraps import grp import os import pwd @@ -222,9 +223,22 @@ def exclude_path(path, patterns): # unify the two cases, we add a path separator to the end of # the path before matching. -##### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -##### For discussion only, don't merge this code! -##### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +def normalized(func): + """ Decorator for the Pattern match methods, returning a wrapper that + normalizes OSX paths to match the normalized pattern on OSX, and + returning the original method on other platforms""" + @wraps(func) + def normalize_wrapper(self, path): + return func(self, unicodedata.normalize("NFD", path)) + + if sys.platform in ('darwin',): + # HFS+ converts paths to a canonical form, so users shouldn't be + # required to enter an exact match + return normalize_wrapper + else: + # Windows and Unix filesystems allow different forms, so users + # always have to enter an exact match + return func class IncludePattern: """Literal files or directories listed on the command line @@ -233,23 +247,15 @@ class IncludePattern: path match as well. A trailing slash makes no difference. """ def __init__(self, pattern): - def match(path): - return (path+os.path.sep).startswith(self.pattern) - - # HFS+ converts paths to a canonical form, so users shouldn't be - # required to enter an exact match if sys.platform in ('darwin',): - # repository paths will be mostly in NFD, as the OSX exception list - # to NFD is small, so normalize to that form for best performance pattern = unicodedata.normalize("NFD", pattern) - self.match = lambda p: match(unicodedata.normalize("NFD", p)) - # Windows and Unix filesystems allow different forms, so users - # always have to enter an exact match - else: - self.match = match self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep + @normalized + def match(self, path): + return (path+os.path.sep).startswith(self.pattern) + def __repr__(self): return '%s(%s)' % (type(self), self.pattern) @@ -259,30 +265,22 @@ class ExcludePattern(IncludePattern): exclude the contents of a directory, but not the directory itself. """ def __init__(self, pattern): - def match(path): - return self.regex.match(path+os.path.sep) is not None - if pattern.endswith(os.path.sep): self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep else: self.pattern = os.path.normpath(pattern)+os.path.sep+'*' - # HFS+ converts paths to a canonical form, so users shouldn't be - # required to enter an exact match if sys.platform in ('darwin',): - # repository paths will be mostly in NFD, as the OSX exception list - # to NFD is small, so normalize to that form for best performance self.pattern = unicodedata.normalize("NFD", self.pattern) - self.match = lambda p: match(unicodedata.normalize("NFD", p)) - # Windows and Unix filesystems allow different forms, so users - # always have to enter an exact match - else: - self.match = match # fnmatch and re.match both cache compiled regular expressions. # Nevertheless, this is about 10 times faster. self.regex = re.compile(translate(self.pattern)) + @normalized + def match(self, path): + return self.regex.match(path+os.path.sep) is not None + def __repr__(self): return '%s(%s)' % (type(self), self.pattern) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 077c171b2..f755df22a 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -212,21 +212,8 @@ class PatternNonAsciiTestCase(BaseTestCase): assert e.match(str(b"ba\x80/foo", 'latin1')) -#@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test') +@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test') class OSXPatternNormalizationTestCase(BaseTestCase): - # monkey patch sys.platform to allow testing on non-OSX during development - # remove and uncomment OSX-only decorator before push - def setUp(self): - self.oldplatform = sys.platform - sys.platform = 'darwin' - pass - - # monkey patch sys.platform to allow testing on non-OSX during development - # remove and uncomment OSX-only decorator before push - def tearDown(self): - sys.platform = self.oldplatform - pass - def testComposedUnicode(self): pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}' i = IncludePattern(pattern) From 1eecb020e88b635adbc7c2213430eed91b49bc5f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Thu, 10 Sep 2015 23:12:12 +0200 Subject: [PATCH 137/142] cython code: add some int types to get rid of unspecific python add / subtract operations they somehow pull in some floating point error code that led to a undefined symbol FPE_... when using the borgbackup wheel on some non-ubuntu/debian linux platform. --- borg/chunker.pyx | 2 +- borg/crypto.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/chunker.pyx b/borg/chunker.pyx index 1d4897db1..0faa06f38 100644 --- a/borg/chunker.pyx +++ b/borg/chunker.pyx @@ -20,7 +20,7 @@ cdef extern from "_chunker.c": cdef class Chunker: cdef _Chunker *chunker - def __cinit__(self, seed, chunk_min_exp, chunk_max_exp, hash_mask_bits, hash_window_size): + def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size): min_size = 1 << chunk_min_exp max_size = 1 << chunk_max_exp hash_mask = (1 << hash_mask_bits) - 1 diff --git a/borg/crypto.pyx b/borg/crypto.pyx index 61dbc42d5..d8143bdbc 100644 --- a/borg/crypto.pyx +++ b/borg/crypto.pyx @@ -52,7 +52,7 @@ bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0] long_to_bytes = lambda x: _long.pack(x) -def num_aes_blocks(length): +def num_aes_blocks(int length): """Return the number of AES blocks required to encrypt/decrypt *length* bytes of data. Note: this is only correct for modes without padding, like AES-CTR. """ From bc021d4ed7c5b6245413c180a5215d8c1dbbddf5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 12 Sep 2015 19:16:45 +0200 Subject: [PATCH 138/142] do not test lzma level 9 compression got a MemoryError in a vagrant VM, level 9 needs a lot of memory... --- borg/testsuite/compress.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/testsuite/compress.py b/borg/testsuite/compress.py index 8019925b2..ce46c9d30 100644 --- a/borg/testsuite/compress.py +++ b/borg/testsuite/compress.py @@ -93,7 +93,7 @@ def test_compressor(): params_list += [ dict(name='lzma', level=0, buffer=buffer), dict(name='lzma', level=6, buffer=buffer), - dict(name='lzma', level=9, buffer=buffer), + # we do not test lzma on level 9 because of the huge memory needs ] for params in params_list: c = Compressor(**params) From e8f4fe0b88b63102cd04b92d526c7e9276cd776c Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 12 Sep 2015 19:19:52 +0200 Subject: [PATCH 139/142] pkg-config is needed for llfuse installation --- docs/installation.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 6bc38a0aa..4bc60569d 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -127,7 +127,7 @@ Debian Jessie / Ubuntu 14.04 preparations (git/pypi) # in case you get complaints about permission denied on /etc/fuse.conf: # on ubuntu this means your user is not in the "fuse" group. just add # yourself there, log out and log in again. - apt-get install libfuse-dev fuse + apt-get install libfuse-dev fuse pkg-config # optional: for unit testing apt-get install fakeroot @@ -151,7 +151,7 @@ Korora / Fedora 21 preparations (git/pypi) sudo dnf install lz4-devel # optional: FUSE support - to mount backup archives - sudo dnf install fuse-devel fuse + sudo dnf install fuse-devel fuse pkgconfig # optional: for unit testing sudo dnf install fakeroot From d74da7c031cc25da0b59ec420e8c815f9b6614b0 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 12 Sep 2015 19:26:46 +0200 Subject: [PATCH 140/142] llfuse 0.41 install troubles on some platforms, require < 0.41 UnicodeDecodeError exception due to non-ascii llfuse setup.py --- docs/installation.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 4bc60569d..4d025c822 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -201,7 +201,8 @@ This uses the latest (source package) release from PyPi. source borg-env/bin/activate # always before using! # install borg + dependencies into virtualenv - pip install llfuse # optional, for FUSE support + pip install 'llfuse<0.41' # optional, for FUSE support + # 0.41 and 0.41.1 have unicode issues at install time pip install borgbackup Note: we install into a virtual environment here, but this is not a requirement. @@ -223,7 +224,8 @@ While we try not to break master, there are no guarantees on anything. # install borg + dependencies into virtualenv pip install sphinx # optional, to build the docs - pip install llfuse # optional, for FUSE support + pip install 'llfuse<0.41' # optional, for FUSE support + # 0.41 and 0.41.1 have unicode issues at install time cd borg pip install -r requirements.d/development.txt pip install -e . # in-place editable mode From cff7dffc955cd5e1b5184dff2e8123f3c5925400 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 12 Sep 2015 19:38:38 +0200 Subject: [PATCH 141/142] detect lz4.h header file location use similar code as for openssl headers --- docs/usage.rst | 2 ++ setup.py | 28 +++++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index 0ce547b93..da6d93f11 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -69,6 +69,8 @@ Directories: Building: BORG_OPENSSL_PREFIX Adds given OpenSSL header file directory to the default locations (setup.py). + BORG_LZ4_PREFIX + Adds given LZ4 header file directory to the default locations (setup.py). Please note: diff --git a/setup.py b/setup.py index 667ba4ee2..3c1880421 100644 --- a/setup.py +++ b/setup.py @@ -71,14 +71,36 @@ def detect_openssl(prefixes): return prefix +def detect_lz4(prefixes): + for prefix in prefixes: + filename = os.path.join(prefix, 'include', 'lz4.h') + if os.path.exists(filename): + with open(filename, 'r') as fd: + if 'LZ4_decompress_safe' in fd.read(): + return prefix + + +include_dirs = [] +library_dirs = [] + possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl', '/usr/local/borg', '/opt/local'] if os.environ.get('BORG_OPENSSL_PREFIX'): possible_openssl_prefixes.insert(0, os.environ.get('BORG_OPENSSL_PREFIX')) ssl_prefix = detect_openssl(possible_openssl_prefixes) if not ssl_prefix: raise Exception('Unable to find OpenSSL >= 1.0 headers. (Looked here: {})'.format(', '.join(possible_openssl_prefixes))) -include_dirs = [os.path.join(ssl_prefix, 'include')] -library_dirs = [os.path.join(ssl_prefix, 'lib')] +include_dirs.append(os.path.join(ssl_prefix, 'include')) +library_dirs.append(os.path.join(ssl_prefix, 'lib')) + + +possible_lz4_prefixes = ['/usr', '/usr/local', '/usr/local/borg', '/opt/local'] +if os.environ.get('BORG_LZ4_PREFIX'): + possible_openssl_prefixes.insert(0, os.environ.get('BORG_LZ4_PREFIX')) +lz4_prefix = detect_lz4(possible_lz4_prefixes) +if not lz4_prefix: + raise Exception('Unable to find LZ4 headers. (Looked here: {})'.format(', '.join(possible_lz4_prefixes))) +include_dirs.append(os.path.join(lz4_prefix, 'include')) +library_dirs.append(os.path.join(lz4_prefix, 'lib')) with open('README.rst', 'r') as fd: @@ -87,7 +109,7 @@ with open('README.rst', 'r') as fd: cmdclass = {'build_ext': build_ext, 'sdist': Sdist} ext_modules = [ - Extension('borg.compress', [compress_source], libraries=['lz4']), + Extension('borg.compress', [compress_source], libraries=['lz4'], include_dirs=include_dirs, library_dirs=library_dirs), Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs), Extension('borg.chunker', [chunker_source]), Extension('borg.hashindex', [hashindex_source]) From 6c619000e3b6714e991d62aeaf316f9a53776235 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sat, 12 Sep 2015 22:44:23 +0200 Subject: [PATCH 142/142] pull fixed argparse from pypi in case we have a buggy python see argparse 1.4.0 changelog for details --- setup.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 3c1880421..f59c734d9 100644 --- a/setup.py +++ b/setup.py @@ -4,10 +4,22 @@ import sys from glob import glob min_python = (3, 2) -if sys.version_info < min_python: +my_python = sys.version_info + +if my_python < min_python: print("Borg requires Python %d.%d or later" % min_python) sys.exit(1) +# msgpack pure python data corruption was fixed in 0.4.6. +# Also, we might use some rather recent API features. +install_requires=['msgpack-python>=0.4.6', ] + +if (my_python < (3, 2, 4) or + (3, 3, 0) <= my_python < (3, 3, 1)): + # argparse in stdlib does not work there due to a bug, + # pull a fixed argparse from pypi + install_requires.append("argparse>=1.4.0") + from setuptools import setup, Extension from setuptools.command.sdist import sdist @@ -158,7 +170,5 @@ setup( cmdclass=cmdclass, ext_modules=ext_modules, setup_requires=['setuptools_scm>=1.7'], - # msgpack pure python data corruption was fixed in 0.4.6. - # Also, we might use some rather recent API features. - install_requires=['msgpack-python>=0.4.6'], + install_requires=install_requires, )