Merge branch 'master' into windows

# Conflicts:
#	Vagrantfile
#	src/borg/archive.py
#	src/borg/constants.py
#	src/borg/helpers.py
#	src/borg/testsuite/archiver.py
This commit is contained in:
Marian Beermann 2016-07-17 19:06:30 +02:00
commit b48cde9f50
78 changed files with 2550 additions and 1061 deletions

1
.gitignore vendored
View file

@ -26,3 +26,4 @@ borg.exe
*.dll
.coverage
.vagrant
.eggs

View file

@ -6,19 +6,24 @@ cache:
directories:
- $HOME/.cache/pip
# note: use py 3.5.2, it has lzma support. 3.5(.0) on travis.org/trusty does not.
matrix:
include:
- python: 3.4
os: linux
dist: trusty
env: TOXENV=py34
- python: 3.5
- python: 3.5.2
os: linux
dist: trusty
env: TOXENV=py35
- python: nightly
os: linux
dist: trusty
env: TOXENV=py36
- python: 3.5
- python: 3.4
os: linux
dist: trusty
env: TOXENV=flake8
- language: generic
os: osx

View file

@ -32,8 +32,6 @@ if [[ "$(uname -s)" == 'Darwin' ]]; then
python -m pip install --user 'virtualenv<14.0'
else
pip install 'virtualenv<14.0'
sudo add-apt-repository -y ppa:gezakovacs/lz4
sudo apt-get update
sudo apt-get install -y liblz4-dev
sudo apt-get install -y libacl1-dev
fi

View file

@ -1,5 +1,7 @@
|screencast|
.. highlight:: bash
What is BorgBackup?
===================
@ -87,7 +89,10 @@ Initialize a new backup repository and create a backup archive::
$ borg init /path/to/repo
$ borg create /path/to/repo::Saturday1 ~/Documents
Now doing another backup, just to show off the great deduplication::
Now doing another backup, just to show off the great deduplication:
.. code-block:: none
:emphasize-lines: 11
$ borg create -v --stats /path/to/repo::Saturday2 ~/Documents
-----------------------------------------------------------------------------
@ -113,13 +118,14 @@ Links
=====
* `Main Web Site <https://borgbackup.readthedocs.org/>`_
* `Releases <https://github.com/borgbackup/borg/releases>`_
* `PyPI packages <https://pypi.python.org/pypi/borgbackup>`_
* `ChangeLog <https://github.com/borgbackup/borg/blob/master/docs/changes.rst>`_
* `GitHub <https://github.com/borgbackup/borg>`_
* `Issue Tracker <https://github.com/borgbackup/borg/issues>`_
* `Bounties & Fundraisers <https://www.bountysource.com/teams/borgbackup>`_
* `Mailing List <https://mail.python.org/mailman/listinfo/borgbackup>`_
* `Releases <https://github.com/borgbackup/borg/releases>`_,
`PyPI packages <https://pypi.python.org/pypi/borgbackup>`_ and
`ChangeLog <https://github.com/borgbackup/borg/blob/master/docs/changes.rst>`_
* `GitHub <https://github.com/borgbackup/borg>`_,
`Issue Tracker <https://github.com/borgbackup/borg/issues>`_ and
`Bounties & Fundraisers <https://www.bountysource.com/teams/borgbackup>`_
* `Web-Chat (IRC) <http://webchat.freenode.net/?randomnick=1&channels=%23borgbackup&uio=MTY9dHJ1ZSY5PXRydWUa8>`_ and
`Mailing List <https://mail.python.org/mailman/listinfo/borgbackup>`_
* `License <https://borgbackup.readthedocs.org/en/stable/authors.html#license>`_
Notes

50
Vagrantfile vendored
View file

@ -42,7 +42,7 @@ def packages_redhatted
# needed to compile msgpack-python (otherwise it will use slow fallback code):
yum install -y gcc-c++
# for building python:
yum install -y zlib-devel bzip2-devel ncurses-devel readline-devel xz-devel sqlite-devel
yum install -y zlib-devel bzip2-devel ncurses-devel readline-devel xz xz-devel sqlite-devel
#yum install -y python-pip
#pip install virtualenv
touch ~vagrant/.bash_profile ; chown vagrant ~vagrant/.bash_profile
@ -53,10 +53,10 @@ def packages_darwin
return <<-EOF
# install all the (security and other) updates
sudo softwareupdate --install --all
# get osxfuse 3.0.x pre-release code from github:
curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.2.0/osxfuse-3.2.0.dmg >osxfuse.dmg
# get osxfuse 3.x pre-release code from github:
curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.3.3/osxfuse-3.3.3.dmg >osxfuse.dmg
MOUNTDIR=$(echo `hdiutil mount osxfuse.dmg | tail -1 | awk '{$1="" ; print $0}'` | xargs -0 echo) \
&& sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.2.0.pkg" -target /
&& sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for OS X 3.3.3.pkg" -target /
sudo chown -R vagrant /usr/local # brew must be able to create stuff here
ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
brew update
@ -109,7 +109,6 @@ def packages_openbsd
pkg_add lz4
# pkg_add fuse # does not install, sdl dependency missing
pkg_add git # no fakeroot
pkg_add python-3.4.2
pkg_add py3-setuptools
ln -sf /usr/local/bin/python3.4 /usr/local/bin/python3
ln -sf /usr/local/bin/python3.4 /usr/local/bin/python
@ -166,7 +165,7 @@ def install_pythons(boxname)
. ~/.bash_profile
pyenv install 3.4.0 # tests
pyenv install 3.5.0 # tests
pyenv install 3.5.1 # binary build, use latest 3.5.x release
pyenv install 3.5.2 # binary build, use latest 3.5.x release
pyenv rehash
EOF
end
@ -184,8 +183,8 @@ def build_pyenv_venv(boxname)
. ~/.bash_profile
cd /vagrant/borg
# use the latest 3.5 release
pyenv global 3.5.1
pyenv virtualenv 3.5.1 borg-env
pyenv global 3.5.2
pyenv virtualenv 3.5.2 borg-env
ln -s ~/.pyenv/versions/borg-env .
EOF
end
@ -207,6 +206,22 @@ def install_borg(boxname)
EOF
end
def install_borg_no_fuse(boxname)
return <<-EOF
. ~/.bash_profile
cd /vagrant/borg
. borg-env/bin/activate
pip install -U wheel # upgrade wheel, too old for 3.5
cd borg
# clean up (wrong/outdated) stuff we likely got via rsync:
rm -f borg/*.so borg/*.cpy*
rm -f borg/{chunker,crypto,compress,hashindex,platform_linux}.c
rm -rf borg/__pycache__ borg/support/__pycache__ borg/testsuite/__pycache__
pip install -r requirements.d/development.txt
pip install -e .
EOF
end
def install_pyinstaller(boxname)
return <<-EOF
. ~/.bash_profile
@ -241,7 +256,7 @@ def build_binary_with_pyinstaller(boxname)
cd /vagrant/borg
. borg-env/bin/activate
cd borg
pyinstaller -F -n borg.exe --distpath=/vagrant/borg --clean borg/__main__.py
pyinstaller -F -n borg.exe --distpath=/vagrant/borg --clean src/borg/__main__.py --hidden-import=borg.platform.posix
EOF
end
@ -337,9 +352,7 @@ Vagrant.configure(2) do |config|
b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos6_32")
b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos6_32")
b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos6_32")
b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("centos6_32")
b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("centos6_32")
b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("centos6_32")
b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("centos6_32")
b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos6_32")
end
@ -355,9 +368,7 @@ Vagrant.configure(2) do |config|
b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos6_64")
b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos6_64")
b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos6_64")
b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("centos6_64")
b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("centos6_64")
b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("centos6_64")
b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("centos6_64")
b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos6_64")
end
@ -472,16 +483,13 @@ Vagrant.configure(2) do |config|
end
config.vm.define "openbsd64" do |b|
b.vm.synced_folder ".", "/vagrant/borg/borg", :type => "rsync", :rsync__args => ["--verbose", "--archive", "--delete", "-z"]
b.vm.synced_folder ".", "/vagrant", disabled: true
b.vm.provision "fix perms", :type => :shell, :inline => fix_perms
b.vm.box = "bodgit/openbsd-5.7-amd64"
b.vm.box = "kaorimatz/openbsd-5.9-amd64"
b.vm.provider :virtualbox do |v|
v.memory = 768
end
b.vm.provision "packages openbsd", :type => :shell, :inline => packages_openbsd
b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("openbsd64")
b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("openbsd64")
b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("openbsd64")
b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("openbsd64")
end
@ -495,7 +503,7 @@ Vagrant.configure(2) do |config|
end
b.vm.provision "packages netbsd", :type => :shell, :inline => packages_netbsd
b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("netbsd64")
b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("netbsd64")
b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("netbsd64")
b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("netbsd64")
end

View file

@ -1,3 +1,4 @@
.. highlight:: python
API Documentation
=================
@ -6,54 +7,18 @@ API Documentation
:members:
:undoc-members:
.. automodule:: borg.upgrader
:members:
:undoc-members:
.. automodule:: borg.archive
:members:
:undoc-members:
.. automodule:: borg.fuse
:members:
:undoc-members:
.. automodule:: borg.platform
:members:
:undoc-members:
.. automodule:: borg.locking
:members:
:undoc-members:
.. automodule:: borg.shellpattern
:members:
:undoc-members:
.. automodule:: borg.repository
:members:
:undoc-members:
.. automodule:: borg.lrucache
:members:
:undoc-members:
.. automodule:: borg.remote
:members:
:undoc-members:
.. automodule:: borg.hash_sizes
:members:
:undoc-members:
.. automodule:: borg.xattr
:members:
:undoc-members:
.. automodule:: borg.helpers
:members:
:undoc-members:
.. automodule:: borg.cache
:members:
:undoc-members:
@ -66,7 +31,31 @@ API Documentation
:members:
:undoc-members:
.. automodule:: borg.platform_darwin
.. automodule:: borg.helpers
:members:
:undoc-members:
.. automodule:: borg.locking
:members:
:undoc-members:
.. automodule:: borg.shellpattern
:members:
:undoc-members:
.. automodule:: borg.lrucache
:members:
:undoc-members:
.. automodule:: borg.fuse
:members:
:undoc-members:
.. automodule:: borg.xattr
:members:
:undoc-members:
.. automodule:: borg.platform
:members:
:undoc-members:
@ -79,7 +68,7 @@ API Documentation
:undoc-members:
.. automodule:: borg.compress
:members:
:members: get_compressor, Compressor, CompressorBase
:undoc-members:
.. automodule:: borg.chunker
@ -89,7 +78,3 @@ API Documentation
.. automodule:: borg.crypto
:members:
:undoc-members:
.. automodule:: borg.platform_freebsd
:members:
:undoc-members:

View file

@ -1,6 +1,55 @@
Changelog
=========
Important note about pre-1.0.4 potential repo corruption
--------------------------------------------------------
Some external errors (like network or disk I/O errors) could lead to
corruption of the backup repository due to issue #1138.
A sign that this happened is if "E" status was reported for a file that can
not be explained by problems with the source file. If you still have logs from
"borg create -v --list", you can check for "E" status.
Here is what could cause corruption and what you can do now:
1) I/O errors (e.g. repo disk errors) while writing data to repo.
This could lead to corrupted segment files.
Fix::
# check for corrupt chunks / segments:
borg check -v --repository-only REPO
# repair the repo:
borg check -v --repository-only --repair REPO
# make sure everything is fixed:
borg check -v --repository-only REPO
2) Unreliable network / unreliable connection to the repo.
This could lead to archive metadata corruption.
Fix::
# check for corrupt archives:
borg check -v --archives-only REPO
# delete the corrupt archives:
borg delete --force REPO::CORRUPT_ARCHIVE
# make sure everything is fixed:
borg check -v --archives-only REPO
3) In case you want to do more intensive checking.
The best check that everything is ok is to run a dry-run extraction::
borg extract -v --dry-run REPO::ARCHIVE
Version 1.1.0 (not released yet)
--------------------------------
@ -74,8 +123,197 @@ Other changes:
- ChunkBuffer: add test for leaving partial chunk in buffer, fixes #945
Version 1.0.3
-------------
Version 1.0.6 (2016-07-12)
--------------------------
Bug fixes:
- Linux: handle multiple LD_PRELOAD entries correctly, #1314, #1111
- Fix crash with unclear message if the libc is not found, #1314, #1111
Other changes:
- tests:
- Fixed O_NOATIME tests for Solaris and GNU Hurd, #1315
- Fixed sparse file tests for (file) systems not supporting it, #1310
- docs:
- Fixed syntax highlighting, #1313
- misc docs: added data processing overview picture
Version 1.0.6rc1 (2016-07-10)
-----------------------------
New features:
- borg check --repair: heal damaged files if missing chunks re-appear (e.g. if
the previously missing chunk was added again in a later backup archive),
#148. (*) Also improved logging.
Bug fixes:
- sync_dir: silence fsync() failing with EINVAL, #1287
Some network filesystems (like smbfs) don't support this and we use this in
repository code.
- borg mount (FUSE):
- fix directories being shadowed when contained paths were also specified,
#1295
- raise I/O Error (EIO) on damaged files (unless -o allow_damaged_files is
used), #1302. (*)
- borg extract: warn if a damaged file is extracted, #1299. (*)
- Added some missing return code checks (ChunkIndex._add, hashindex_resize).
- borg check: fix/optimize initial hash table size, avoids resize of the table.
Other changes:
- tests:
- add more FUSE tests, #1284
- deduplicate fuse (u)mount code
- fix borg binary test issues, #862
- docs:
- changelog: added release dates to older borg releases
- fix some sphinx (docs generator) warnings, #881
Notes:
(*) Some features depend on information (chunks_healthy list) added to item
metadata when a file with missing chunks was "repaired" using all-zero
replacement chunks. The chunks_healthy list is generated since borg 1.0.4,
thus borg can't recognize such "repaired" (but content-damaged) files if the
repair was done with an older borg version.
Version 1.0.5 (2016-07-07)
--------------------------
Bug fixes:
- borg mount: fix FUSE crash in xattr code on Linux introduced in 1.0.4, #1282
Other changes:
- backport some FAQ entries from master branch
- add release helper scripts
- Vagrantfile:
- centos6: no FUSE, don't build binary
- add xz for redhat-like dists
Version 1.0.4 (2016-07-07)
--------------------------
New features:
- borg serve --append-only, #1168
This was included because it was a simple change (append-only functionality
was already present via repository config file) and makes better security now
practically usable.
- BORG_REMOTE_PATH environment variable, #1258
This was included because it was a simple change (--remote-path cli option
was already present) and makes borg much easier to use if you need it.
- Repository: cleanup incomplete transaction on "no space left" condition.
In many cases, this can avoid a 100% full repo filesystem (which is very
problematic as borg always needs free space - even to delete archives).
Bug fixes:
- Fix wrong handling and reporting of OSErrors in borg create, #1138.
This was a serious issue: in the context of "borg create", errors like
repository I/O errors (e.g. disk I/O errors, ssh repo connection errors)
were handled badly and did not lead to a crash (which would be good for this
case, because the repo transaction would be incomplete and trigger a
transaction rollback to clean up).
Now, error handling for source files is cleanly separated from every other
error handling, so only problematic input files are logged and skipped.
- Implement fail-safe error handling for borg extract.
Note that this isn't nearly as critical as the borg create error handling
bug, since nothing is written to the repo. So this was "merely" misleading
error reporting.
- Add missing error handler in directory attr restore loop.
- repo: make sure write data hits disk before the commit tag (#1236) and also
sync the containing directory.
- FUSE: getxattr fail must use errno.ENOATTR, #1126
(fixes Mac OS X Finder malfunction: "zero bytes" file length, access denied)
- borg check --repair: do not lose information about the good/original chunks.
If we do not lose the original chunk IDs list when "repairing" a file
(replacing missing chunks with all-zero chunks), we have a chance to "heal"
the file back into its original state later, in case the chunks re-appear
(e.g. in a fresh backup). Healing is not implemented yet, see #148.
- fixes for --read-special mode:
- ignore known files cache, #1241
- fake regular file mode, #1214
- improve symlinks handling, #1215
- remove passphrase from subprocess environment, #1105
- Ignore empty index file (will trigger index rebuild), #1195
- add missing placeholder support for --prefix, #1027
- improve exception handling for placeholder replacement
- catch and format exceptions in arg parsing
- helpers: fix "undefined name 'e'" in exception handler
- better error handling for missing repo manifest, #1043
- borg delete:
- make it possible to delete a repo without manifest
- borg delete --forced allows to delete corrupted archives, #1139
- borg check:
- make borg check work for empty repo
- fix resync and msgpacked item qualifier, #1135
- rebuild_manifest: fix crash if 'name' or 'time' key were missing.
- better validation of item metadata dicts, #1130
- better validation of archive metadata dicts
- close the repo on exit - even if rollback did not work, #1197.
This is rather cosmetic, it avoids repo closing in the destructor.
- tests:
- fix sparse file test, #1170
- flake8: ignore new F405, #1185
- catch "invalid argument" on cygwin, #257
- fix sparseness assertion in test prep, #1264
Other changes:
- make borg build/work on OpenSSL 1.0 and 1.1, #1187
- docs / help:
- fix / clarify prune help, #1143
- fix "patterns" help formatting
- add missing docs / help about placeholders
- resources: rename atticmatic to borgmatic
- document sshd settings, #545
- more details about checkpoints, add split trick, #1171
- support docs: add freenode web chat link, #1175
- add prune visualization / example, #723
- add note that Fnmatch is default, #1247
- make clear that lzma levels > 6 are a waste of cpu cycles
- add a "do not edit" note to auto-generated files, #1250
- update cygwin installation docs
- repository interoperability with borg master (1.1dev) branch:
- borg check: read item metadata keys from manifest, #1147
- read v2 hints files, #1235
- fix hints file "unknown version" error handling bug
- tests: add tests for format_line
- llfuse: update version requirement for freebsd
- Vagrantfile:
- use openbsd 5.9, #716
- do not install llfuse on netbsd (broken)
- update OSXfuse to version 3.3.3
- use Python 3.5.2 to build the binaries
- glibc compatibility checker: scripts/glibc_check.py
- add .eggs to .gitignore
Version 1.0.3 (2016-05-20)
--------------------------
Bug fixes:
@ -104,8 +342,8 @@ Other changes:
- borg create help: document format tags, #894
Version 1.0.2
-------------
Version 1.0.2 (2016-04-16)
--------------------------
Bug fixes:
@ -140,8 +378,8 @@ Other changes:
- fix confusing usage of "repo" as archive name (use "arch")
Version 1.0.1
-------------
Version 1.0.1 (2016-04-08)
--------------------------
New features:
@ -192,8 +430,8 @@ Other changes:
- Document logo font. Recreate logo png. Remove GIMP logo file.
Version 1.0.0
-------------
Version 1.0.0 (2016-03-05)
--------------------------
The major release number change (0.x -> 1.x) indicates bigger incompatible
changes, please read the compatibility notes, adapt / test your scripts and
@ -276,8 +514,8 @@ Other changes:
- FAQ: how to limit bandwidth
Version 1.0.0rc2
----------------
Version 1.0.0rc2 (2016-02-28)
-----------------------------
New features:
@ -318,8 +556,8 @@ Other changes:
- "connection closed by remote": add FAQ entry and point to issue #636
Version 1.0.0rc1
----------------
Version 1.0.0rc1 (2016-02-07)
-----------------------------
New features:
@ -368,8 +606,8 @@ Other changes:
- misc. updates and fixes
Version 0.30.0
--------------
Version 0.30.0 (2016-01-23)
---------------------------
Compatibility notes:
@ -446,8 +684,8 @@ Other changes:
- add gcc gcc-c++ to redhat/fedora/corora install docs, fixes #583
Version 0.29.0
--------------
Version 0.29.0 (2015-12-13)
---------------------------
Compatibility notes:
@ -522,8 +760,8 @@ Other changes:
- fix wrong installation instructions for archlinux
Version 0.28.2
--------------
Version 0.28.2 (2015-11-15)
---------------------------
New features:
@ -546,8 +784,8 @@ Other changes:
- minor install docs improvements
Version 0.28.1
--------------
Version 0.28.1 (2015-11-08)
---------------------------
Bug fixes:
@ -561,8 +799,8 @@ Other changes:
- fix build on readthedocs
Version 0.28.0
--------------
Version 0.28.0 (2015-11-08)
---------------------------
Compatibility notes:
@ -659,8 +897,8 @@ Other changes:
- minor development docs update
Version 0.27.0
--------------
Version 0.27.0 (2015-10-07)
---------------------------
New features:
@ -694,8 +932,8 @@ Other changes:
- hint to single-file pyinstaller binaries from README
Version 0.26.1
--------------
Version 0.26.1 (2015-09-28)
---------------------------
This is a minor update, just docs and new pyinstaller binaries.
@ -707,8 +945,8 @@ This is a minor update, just docs and new pyinstaller binaries.
Note: if you did a python-based installation, there is no need to upgrade.
Version 0.26.0
--------------
Version 0.26.0 (2015-09-19)
---------------------------
New features:
@ -768,8 +1006,8 @@ Other changes:
- Darwin (OS X Yosemite)
Version 0.25.0
--------------
Version 0.25.0 (2015-08-29)
---------------------------
Compatibility notes:
@ -835,8 +1073,8 @@ Other changes:
- split install docs into system-specific preparations and generic instructions
Version 0.24.0
--------------
Version 0.24.0 (2015-08-09)
---------------------------
Incompatible changes (compared to 0.23):
@ -939,8 +1177,8 @@ Other changes:
- some easy micro optimizations
Version 0.23.0
--------------
Version 0.23.0 (2015-06-11)
---------------------------
Incompatible changes (compared to attic, fork related):

View file

@ -55,6 +55,8 @@ version = sw_version.split('-')[0]
# The full version, including alpha/beta/rc tags.
release = version
suppress_warnings = ['image.nonlocal_uri']
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None

View file

@ -1,4 +1,5 @@
.. include:: global.rst.inc
.. highlight:: none
.. _deployment:
Deployment

View file

@ -1,4 +1,5 @@
.. include:: global.rst.inc
.. highlight:: bash
.. _development:
Development

View file

@ -1,5 +1,6 @@
.. _faq:
.. include:: global.rst.inc
.. highlight:: none
.. _faq:
Frequently asked questions
==========================
@ -142,7 +143,7 @@ C to delete all backups residing on S.
These are your options to protect against that:
- Do not allow to permanently delete data from the repo, see :ref:`append-only-mode`.
- Do not allow to permanently delete data from the repo, see :ref:`append_only_mode`.
- Use a pull-mode setup using ``ssh -R``, see :issue:`900`.
- Mount C's filesystem on another machine and then create a backup of it.
- Do not give C filesystem-level access to S.
@ -186,6 +187,24 @@ stops after a while (some minutes, hours, ... - not immediately) with
That's a good question and we are trying to find a good answer in :issue:`636`.
Why am I seeing idle borg serve processes on the repo server?
-------------------------------------------------------------
Maybe the ssh connection between client and server broke down and that was not
yet noticed on the server. Try these settings:
::
# /etc/ssh/sshd_config on borg repo server - kill connection to client
# after ClientAliveCountMax * ClientAliveInterval seconds with no response
ClientAliveInterval 20
ClientAliveCountMax 3
If you have multiple borg create ... ; borg create ... commands in a already
serialized way in a single script, you need to give them --lock-wait N (with N
being a bit more than the time the server needs to terminate broken down
connections and release the lock).
The borg cache eats way too much disk space, what can I do?
-----------------------------------------------------------
@ -223,17 +242,23 @@ Yes, |project_name| supports resuming backups.
During a backup a special checkpoint archive named ``<archive-name>.checkpoint``
is saved every checkpoint interval (the default value for this is 5
minutes) containing all the data backed-up until that point. This checkpoint
archive is a valid archive, but it is only a partial backup. Having it
in the repo until a successful, full backup is completed is useful because it
references all the transmitted chunks up to the checkpoint time. This means
that at most <checkpoint interval> worth of data needs to be retransmitted
if you restart the backup.
minutes) containing all the data backed-up until that point.
Checkpoints only happen between files (so they don't help for interruptions
happening while a very large file is being processed).
This checkpoint archive is a valid archive (all files in it are valid and complete),
but it is only a partial backup (not all files that you wanted to backup are
contained in it). Having it in the repo until a successful, full backup is
completed is useful because it references all the transmitted chunks up
to the checkpoint. This means that in case of an interruption, you only need to
retransfer the data since the last checkpoint.
If a backup was interrupted, you do not need to do any special considerations,
just invoke ``borg create`` as you always do. You may use the same archive name
as in previous attempt or a different one (e.g. if you always include the current
datetime), it does not matter.
|project_name| always does full single-pass backups, so it will start again
from the beginning - but it will be much faster, because some of the data was
already stored into the repo (and is still referenced by the checkpoint
@ -243,6 +268,28 @@ Once your backup has finished successfully, you can delete all
``<archive-name>.checkpoint`` archives. If you run ``borg prune``, it will
also care for deleting unneeded checkpoints.
How can I backup huge file(s) over a unstable connection?
---------------------------------------------------------
You can use this "split trick" as a workaround for the in-between-files-only
checkpoints (see above), huge files and a instable connection to the repository:
Split the huge file(s) into parts of manageable size (e.g. 100MB) and create
a temporary archive of them. Borg will create checkpoints now more frequently
than if you try to backup the files in their original form (e.g. 100GB).
After that, you can remove the parts again and backup the huge file(s) in
their original form. This will now work a lot faster as a lot of content chunks
are already in the repository.
After you have successfully backed up the huge original file(s), you can remove
the temporary archive you made from the parts.
We realize that this is just a better-than-nothing workaround, see :issue:`1198`
for a potential solution.
Please note that this workaround only helps you for backup, not for restore.
If it crashes with a UnicodeError, what can I do?
-------------------------------------------------

View file

@ -1,4 +1,5 @@
.. include:: global.rst.inc
.. highlight:: bash
.. _installation:
Installation
@ -25,9 +26,17 @@ Distribution Package
--------------------
Some distributions might offer a ready-to-use ``borgbackup``
package which can be installed with the package manager. As |project_name| is
still a young project, such a package might be not available for your system
yet.
package which can be installed with the package manager.
.. important:: Those packages may not be up to date with the latest
|project_name| releases. Before submitting a bug
report, check the package version and compare that to
our latest release then review :doc:`changes` to see if
the bug has been fixed. Report bugs to the package
maintainer rather than directly to |project_name| if the
package is out of date in the distribution.
.. keep this list in alphabetical order
============ ============================================= =======
Distribution Source Command
@ -36,13 +45,16 @@ Arch Linux `[community]`_ ``pacman -S borg``
Debian `jessie-backports`_, `stretch`_, `sid`_ ``apt install borgbackup``
Gentoo `ebuild`_ ``emerge borgbackup``
GNU Guix `GNU Guix`_ ``guix package --install borg``
FreeBSD `Ports-Tree`_ ``cd /usr/ports/archivers/py-borgbackup && make install clean``
Fedora/RHEL `Fedora official repository`_, `EPEL`_ ``dnf install borgbackup``
FreeBSD `FreeBSD ports`_ ``cd /usr/ports/archivers/py-borgbackup && make install clean``
Mageia `cauldron`_ ``urpmi borgbackup``
NetBSD `pkgsrc`_ ``pkg_add py-borgbackup``
NixOS `.nix file`_ N/A
OpenBSD `OpenBSD ports`_ ``pkg_add borgbackup``
OpenIndiana `OpenIndiana hipster repository`_ ``pkg install borg``
openSUSE `openSUSE official repository`_ ``zypper in python3-borgbackup``
Fedora `Fedora official repository`_ ``dnf install borgbackup``
OS X `Brew cask`_ ``brew cask install borgbackup``
Raspbian `Raspbian testing`_ ``apt install borgbackup``
Ubuntu `16.04`_, backports (PPA): `15.10`_, `14.04`_ ``apt install borgbackup``
============ ============================================= =======
@ -50,26 +62,27 @@ Ubuntu `16.04`_, backports (PPA): `15.10`_, `14.04`_ ``apt install borgbac
.. _jessie-backports: https://packages.debian.org/jessie-backports/borgbackup
.. _stretch: https://packages.debian.org/stretch/borgbackup
.. _sid: https://packages.debian.org/sid/borgbackup
.. _Fedora official repository: https://apps.fedoraproject.org/packages/borgbackup
.. _EPEL: https://admin.fedoraproject.org/pkgdb/package/rpms/borgbackup/
.. _FreeBSD ports: http://www.freshports.org/archivers/py-borgbackup/
.. _ebuild: https://packages.gentoo.org/packages/app-backup/borgbackup
.. _Ports-Tree: http://www.freshports.org/archivers/py-borgbackup/
.. _GNU Guix: https://www.gnu.org/software/guix/package-list.html#borg
.. _pkgsrc: http://pkgsrc.se/sysutils/py-borgbackup
.. _cauldron: http://madb.mageia.org/package/show/application/0/release/cauldron/name/borgbackup
.. _.nix file: https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/backup/borg/default.nix
.. _OpenBSD ports: http://cvsweb.openbsd.org/cgi-bin/cvsweb/ports/sysutils/borgbackup/
.. _OpenIndiana hipster repository: http://pkg.openindiana.org/hipster/en/search.shtml?token=borg&action=Search
.. _openSUSE official repository: http://software.opensuse.org/package/borgbackup
.. _Brew cask: http://caskroom.io/
.. _Raspbian testing: http://archive.raspbian.org/raspbian/pool/main/b/borgbackup/
.. _16.04: https://launchpad.net/ubuntu/xenial/+source/borgbackup
.. _15.10: https://launchpad.net/~costamagnagianfranco/+archive/ubuntu/borgbackup
.. _14.04: https://launchpad.net/~costamagnagianfranco/+archive/ubuntu/borgbackup
.. _.nix file: https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/backup/borg/default.nix
.. _OpenBSD ports: http://cvsweb.openbsd.org/cgi-bin/cvsweb/ports/sysutils/borgbackup/
.. _openSUSE official repository: http://software.opensuse.org/package/borgbackup
.. _Fedora official repository: https://apps.fedoraproject.org/packages/borgbackup
.. _Brew cask: http://caskroom.io/
.. _GNU Guix: https://www.gnu.org/software/guix/package-list.html#borg
Please ask package maintainers to build a package or, if you can package /
submit it yourself, please help us with that! See :issue:`105` on
github to followup on packaging efforts.
If a package is available, it might be interesting to check its version
and compare that to our latest release and review the :doc:`changes`.
.. _pyinstaller-binary:
Standalone Binary
@ -219,15 +232,14 @@ Cygwin
.. note::
Running under Cygwin is experimental and has only been tested with Cygwin
(x86-64) v2.1.0.
(x86-64) v2.5.2.
Use the Cygwin installer to install the dependencies::
python3 python3-setuptools
python3-cython # not needed for releases
binutils gcc-g++
libopenssl openssl-devel
liblz4_1 liblz4-devel # from cygwinports.org
liblz4_1 liblz4-devel
git make openssh
You can then install ``pip`` and ``virtualenv``::
@ -235,10 +247,6 @@ You can then install ``pip`` and ``virtualenv``::
easy_install-3.4 pip
pip install virtualenv
In case the creation of the virtual environment fails, try deleting this file::
/usr/lib/python3.4/__pycache__/platform.cpython-34.pyc
.. _pip-installation:

View file

@ -1,4 +1,5 @@
.. include:: global.rst.inc
.. highlight:: none
.. _internals:
Internals
@ -280,6 +281,7 @@ emptied to 25%, its size is shrinked. So operations on it have a variable
complexity between constant and linear with low factor, and memory overhead
varies between 33% and 300%.
.. _cache-memory-usage:
Indexes / Caches memory usage
-----------------------------

View file

@ -0,0 +1,41 @@
BorgBackup from 10.000m
=======================
+--------+ +--------+ +--------+
|archive0| |archive1| ... |archiveN|
+--------+ +--------+ +--+-----+
| | |
| | |
| +---+ |
| | |
| | |
+------+-------+ |
| | | |
/chunk\/chunk\/chunk\... /maybe different chunks lists\
+-----------------------------------------------------------------+
|item list |
+-----------------------------------------------------------------+
|
+-------------------------------------+--------------+
| | |
| | |
+-------------+ +-------------+ |
|item0 | |item1 | |
| - owner | | - owner | |
| - size | | - size | ...
| - ... | | - ... |
| - chunks | | - chunks |
+----+--------+ +-----+-------+
| |
| +-----+----------------------------+-----------------+
| | | |
+-o-----o------------+ |
| | | | |
/chunk0\/chunk1\ ... /chunkN\ /chunk0\/chunk1\ ... /chunkN'\
+-----------------------------+ +------------------------------+
|file0 | |file0' |
+-----------------------------+ +------------------------------+
Thanks to anarcat for drawing the picture!

View file

@ -0,0 +1,93 @@
borg prune visualized
=====================
Assume it is 2016-01-01, today's backup has not yet been made and you have
created at least one backup on each day in 2015 except on 2015-12-20 (no
backup made on that day).
This is what borg prune --keep-daily 14 --keep-monthly 6 would keep.
Backups kept by the --keep-daily rule are marked by a "d" to the right,
backups kept by the --keep-monthly rule are marked by a "m" to the right.
Calendar view
-------------
2015
January February March
Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su
1 2 3 4 1 1
5 6 7 8 9 10 11 2 3 4 5 6 7 8 2 3 4 5 6 7 8
12 13 14 15 16 17 18 9 10 11 12 13 14 15 9 10 11 12 13 14 15
19 20 21 22 23 24 25 16 17 18 19 20 21 22 16 17 18 19 20 21 22
26 27 28 29 30 31 23 24 25 26 27 28 23 24 25 26 27 28 29
30 31
April May June
Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su
1 2 3 4 5 1 2 3 1 2 3 4 5 6 7
6 7 8 9 10 11 12 4 5 6 7 8 9 10 8 9 10 11 12 13 14
13 14 15 16 17 18 19 11 12 13 14 15 16 17 15 16 17 18 19 20 21
20 21 22 23 24 25 26 18 19 20 21 22 23 24 22 23 24 25 26 27 28
27 28 29 30 25 26 27 28 29 30 31 29 30m
July August September
Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su
1 2 3 4 5 1 2 1 2 3 4 5 6
6 7 8 9 10 11 12 3 4 5 6 7 8 9 7 8 9 10 11 12 13
13 14 15 16 17 18 19 10 11 12 13 14 15 16 14 15 16 17 18 19 20
20 21 22 23 24 25 26 17 18 19 20 21 22 23 21 22 23 24 25 26 27
27 28 29 30 31m 24 25 26 27 28 29 30 28 29 30m
31m
October November December
Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su Mo Tu We Th Fr Sa Su
1 2 3 4 1 1 2 3 4 5 6
5 6 7 8 9 10 11 2 3 4 5 6 7 8 7 8 9 10 11 12 13
12 13 14 15 16 17 18 9 10 11 12 13 14 15 14 15 16 17d18d19d20
19 20 21 22 23 24 25 16 17 18 19 20 21 22 21d22d23d24d25d26d27d
26 27 28 29 30 31m 23 24 25 26 27 28 29 28d29d30d31d
30m
List view
---------
--keep-daily 14 --keep-monthly 6
-------------------------------------------------
1. 2015-12-31 (2015-12-31 kept by daily rule)
2. 2015-12-30 1. 2015-11-30
3. 2015-12-29 2. 2015-10-31
4. 2015-12-28 3. 2015-09-30
5. 2015-12-27 4. 2015-08-31
6. 2015-12-26 5. 2015-07-31
7. 2015-12-25 6. 2015-06-30
8. 2015-12-24
9. 2015-12-23
10. 2015-12-22
11. 2015-12-21
(no backup made on 2015-12-20)
12. 2015-12-19
13. 2015-12-18
14. 2015-12-17
Notes
-----
2015-12-31 is kept due to the --keep-daily 14 rule (because it is applied
first), not due to the --keep-monthly rule.
Because of that, the --keep-monthly 6 rule keeps Nov, Oct, Sep, Aug, Jul and
Jun. December is not considered for this rule, because that backup was already
kept because of the daily rule.
2015-12-17 is kept to satisfy the --keep-daily 14 rule - because no backup was
made on 2015-12-20. If a backup had been made on that day, it would not keep
the one from 2015-12-17.
We did not include yearly, weekly, hourly, minutely or secondly rules to keep
this example simple. They all work in basically the same way.
The weekly rule is easy to understand roughly, but hard to understand in all
details. If interested, read "ISO 8601:2000 standard week-based year".

View file

@ -1,4 +1,5 @@
.. include:: global.rst.inc
.. highlight:: bash
.. _quickstart:
Quick Start
@ -11,11 +12,15 @@ The next section continues by showing how backups can be automated.
Important note about free space
-------------------------------
Before you start creating backups, please make sure that there is **always**
Before you start creating backups, please make sure that there is *always*
a good amount of free space on the filesystem that has your backup repository
(and also on ~/.cache). It is hard to tell how much, maybe 1-5%.
(and also on ~/.cache). A few GB should suffice for most hard-drive sized
repositories. See also :ref:`cache-memory-usage`.
If you run out of disk space, it can be hard or impossible to free space,
If |project_name| runs out of disk space, it tries to free as much space as it
can while aborting the current operation safely, which allows to free more space
by deleting/pruning archives. This mechanism is not bullet-proof though.
If you *really* run out of disk space, it can be hard or impossible to free space,
because |project_name| needs free space to operate - even to delete backup
archives. There is a ``--save-space`` option for some commands, but even with
that |project_name| will need free space to operate.
@ -103,10 +108,11 @@ Automating backups
The following example script backs up ``/home`` and ``/var/www`` to a remote
server. The script also uses the :ref:`borg_prune` subcommand to maintain a
certain number of old archives::
certain number of old archives:
::
#!/bin/sh
# setting this, so the repo does not need to be given on the commandline:
export BORG_REPO=username@remoteserver.com:backup
@ -115,18 +121,18 @@ certain number of old archives::
export BORG_PASSPHRASE=mysecret
# Backup most important stuff:
borg create --stats -C lz4 ::`hostname`-`date +%Y-%m-%d` \
/etc \
/home \
/var \
--exclude '/home/*/.cache' \
borg create --stats -C lz4 ::'{hostname}-{now:%Y-%m-%d}' \
/etc \
/home \
/var \
--exclude '/home/*/.cache' \
--exclude '*.pyc'
# Use the `prune` subcommand to maintain 7 daily, 4 weekly and 6 monthly
# archives of THIS machine. Using --prefix is very important to
# archives of THIS machine. The '{hostname}-' prefix is very important to
# limit prune's operation to this machine's archives and not apply to
# other machine's archives also.
borg prune -v --prefix `hostname`- \
borg prune -v --prefix '{hostname}-' \
--keep-daily=7 --keep-weekly=4 --keep-monthly=6
.. backup_compression:

View file

@ -17,25 +17,19 @@ Some of them refer to attic, but you can do the same stuff (and more) with borgb
- `TW's slides for borgbackup talks / lightning talks <https://slides.com/thomaswaldmann>`_ (just grab the latest ones)
- "Attic / Borg Backup" talk from GPN 2015 (video, german audio, english slides):
`media.ccc.de <https://media.ccc.de/browse/conferences/gpn/gpn15/gpn15-6942-attic_borg_backup.html#video>`_
or
`youtube <https://www.youtube.com/watch?v=Nb5nXEKSN-k>`_
- `Attic / Borg Backup talk from GPN 2015 (media.ccc.de) <https://media.ccc.de/browse/conferences/gpn/gpn15/gpn15-6942-attic_borg_backup.html#video>`_
- `Attic / Borg Backup talk from GPN 2015 (youtube) <https://www.youtube.com/watch?v=Nb5nXEKSN-k>`_
- "Attic" talk from Easterhegg 2015 (video, german audio, english slides):
`media.ccc.de <https://media.ccc.de/v/eh15_-_49_-__-_saal_-_201504042130_-_attic_-_the_holy_grail_of_backups_-_thomas#video>`_
or
`youtube <https://www.youtube.com/watch?v=96VEAAFDtJw>`_
- `Attic talk from Easterhegg 2015 (media.ccc.de) <https://media.ccc.de/v/eh15_-_49_-__-_saal_-_201504042130_-_attic_-_the_holy_grail_of_backups_-_thomas#video>`_
- `Attic talk from Easterhegg 2015 (youtube) <https://www.youtube.com/watch?v=96VEAAFDtJw>`_
- "Attic Backup: Mount your encrypted backups over ssh", 2014 (video, english):
`youtube <https://www.youtube.com/watch?v=BVXDFv9YMp8>`_
- `Attic Backup: Mount your encrypted backups over ssh (youtube) <https://www.youtube.com/watch?v=BVXDFv9YMp8>`_
- "Evolution of Borg", Oct 2015 (gource visualization of attic and borg development):
`youtube <https://www.youtube.com/watch?v=K4k_4wDkG6Q>`_
- `Evolution of Borg (youtube) <https://www.youtube.com/watch?v=K4k_4wDkG6Q>`_
Software
--------
- `BorgWeb - a very simple web UI for BorgBackup <https://borgweb.readthedocs.io/>`_
- some other stuff found at the `BorgBackup Github organisation <https://github.com/borgbackup/>`_
- `atticmatic <https://github.com/witten/atticmatic/>`_ (includes borgmatic)
- `borgmatic <https://torsion.org/borgmatic/>`_ - simple wrapper script for BorgBackup that creates and prunes backups

View file

@ -16,13 +16,19 @@ ticket on the project's `issue tracker`_.
For more general questions or discussions, IRC or mailing list are preferred.
IRC
---
Chat (IRC)
----------
Join us on channel #borgbackup on chat.freenode.net.
As usual on IRC, just ask or tell directly and then patiently wait for replies.
Stay connected.
You could use the following link (after connecting, you can change the random
nickname you get by typing "/nick mydesirednickname"):
http://webchat.freenode.net/?randomnick=1&channels=%23borgbackup&uio=MTY9dHJ1ZSY5PXRydWUa8
Mailing list
------------

View file

@ -1,4 +1,5 @@
.. include:: global.rst.inc
.. highlight:: none
.. _detailed_usage:
Usage
@ -79,6 +80,9 @@ General:
BORG_RSH
When set, use this command instead of ``ssh``. This can be used to specify ssh options, such as
a custom identity file ``ssh -i /path/to/private/key``. See ``man ssh`` for other options.
BORG_REMOTE_PATH
When set, use the given path/filename as remote path (default is "borg").
Using ``--remote-path PATH`` commandline option overrides the environment variable.
TMPDIR
where temporary files are stored (might need a lot of temporary space for some operations)
@ -446,14 +450,17 @@ prefix "foo" if you do not also want to match "foobar".
It is strongly recommended to always run ``prune --dry-run ...`` first so you
will see what it would do without it actually doing anything.
There is also a visualized prune example in ``docs/misc/prune-example.txt``.
::
# Keep 7 end of day and 4 additional end of week archives.
# Do a dry-run without actually deleting anything.
$ borg prune --dry-run --keep-daily=7 --keep-weekly=4 /path/to/repo
# Same as above but only apply to archive names starting with "foo":
$ borg prune --keep-daily=7 --keep-weekly=4 --prefix=foo /path/to/repo
# Same as above but only apply to archive names starting with the hostname
# of the machine followed by a "-" character:
$ borg prune --keep-daily=7 --keep-weekly=4 --prefix='{hostname}-' /path/to/repo
# Keep 7 end of day, 4 additional end of week archives,
# and an end of month archive for every month:
@ -735,32 +742,34 @@ For more details, see :ref:`chunker_details`.
--read-special
~~~~~~~~~~~~~~
The option ``--read-special`` is not intended for normal, filesystem-level (full or
partly-recursive) backups. You only give this option if you want to do something
rather ... special -- and if you have hand-picked some files that you want to treat
that way.
The --read-special option is special - you do not want to use it for normal
full-filesystem backups, but rather after carefully picking some targets for it.
``borg create --read-special`` will open all files without doing any special
treatment according to the file type (the only exception here are directories:
they will be recursed into). Just imagine what happens if you do ``cat
filename`` --- the content you will see there is what borg will backup for that
filename.
The option ``--read-special`` triggers special treatment for block and char
device files as well as FIFOs. Instead of storing them as such a device (or
FIFO), they will get opened, their content will be read and in the backup
archive they will show up like a regular file.
So, for example, symlinks will be followed, block device content will be read,
named pipes / UNIX domain sockets will be read.
Symlinks will also get special treatment if (and only if) they point to such
a special file: instead of storing them as a symlink, the target special file
will get processed as described above.
You need to be careful with what you give as filename when using ``--read-special``,
e.g. if you give ``/dev/zero``, your backup will never terminate.
One intended use case of this is backing up the contents of one or multiple
block devices, like e.g. LVM snapshots or inactive LVs or disk partitions.
The given files' metadata is saved as it would be saved without
``--read-special`` (e.g. its name, its size [might be 0], its mode, etc.) -- but
additionally, also the content read from it will be saved for it.
You need to be careful about what you include when using ``--read-special``,
e.g. if you include ``/dev/zero``, your backup will never terminate.
Restoring such files' content is currently only supported one at a time via
``--stdout`` option (and you have to redirect stdout to where ever it shall go,
maybe directly into an existing device file of your choice or indirectly via
``dd``).
To some extent, mounting a backup archive with the backups of special files
via ``borg mount`` and then loop-mounting the image files from inside the mount
point will work. If you plan to access a lot of data in there, it likely will
scale and perform better if you do not work via the FUSE mount.
Example
+++++++
@ -797,7 +806,7 @@ Now, let's see how to restore some LVs from such a backup. ::
$ borg extract --stdout /path/to/repo::arch dev/vg0/home-snapshot > /dev/vg0/home
.. _append-only-mode:
.. _append_only_mode:
Append-only mode
~~~~~~~~~~~~~~~~
@ -814,6 +823,13 @@ To activate append-only mode, edit the repository ``config`` file and add a line
In append-only mode Borg will create a transaction log in the ``transactions`` file,
where each line is a transaction and a UTC timestamp.
In addition, ``borg serve`` can act as if a repository is in append-only mode with
its option ``--append-only``. This can be very useful for fine-tuning access control
in ``.ssh/authorized_keys`` ::
command="borg serve --append-only ..." ssh-rsa <key used for not-always-trustable backup clients>
command="borg serve ..." ssh-rsa <key used for backup management>
Example
+++++++

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_break-lock:
borg break-lock

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_change-passphrase:
borg change-passphrase

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_check:
borg check
@ -15,6 +17,8 @@ optional arguments
| only perform repository checks
``--archives-only``
| only perform archives checks
``--verify-data``
| perform cryptographic archive data integrity verification (conflicts with --repository-only)
``--repair``
| attempt to repair any inconsistencies found
``--save-space``
@ -23,6 +27,8 @@ optional arguments
| only check last N archives (Default: all)
``-P``, ``--prefix``
| only consider archive names starting with this prefix
``-p``, ``--progress``
| show progress display while checking
`Common options`_
|
@ -53,9 +59,12 @@ Second, the consistency and correctness of the archive metadata is verified:
- Check if archive metadata chunk is present. if not, remove archive from
manifest.
- For all files (items) in the archive, for all chunks referenced by these
files, check if chunk is present (if not and we are in repair mode, replace
it with a same-size chunk of zeros). This requires reading of archive and
file metadata, but not data.
files, check if chunk is present.
If a chunk is not present and we are in repair mode, replace it with a same-size
replacement chunk of zeros.
If a previously lost chunk reappears (e.g. via a later backup) and we are in
repair mode, the all-zero replacement chunk will be replaced by the correct chunk.
This requires reading of archive and file metadata, but not data.
- If we are in repair mode and we checked all the archives: delete orphaned
chunks from the repo.
- if you use a remote repo server via ssh:, the archive check is executed on
@ -64,3 +73,15 @@ Second, the consistency and correctness of the archive metadata is verified:
required).
- The archive checks can be time consuming, they can be skipped using the
--repository-only option.
The --verify-data option will perform a full integrity verification (as opposed to
checking the CRC32 of the segment) of data, which means reading the data from the
repository, decrypting and decompressing it. This is a cryptographic verification,
which will detect (accidental) corruption. For encrypted repositories it is
tamper-resistant as well, unless the attacker has access to the keys.
It is also very slow.
--verify-data only verifies data used by the archives specified with --last,
--prefix or an explicitly named archive. If none of these are passed,
all data in the repository is verified.

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_create:
borg create
@ -47,7 +49,7 @@ Filesystem options
``--ignore-inode``
| ignore inode data in the file metadata cache used to detect unchanged files.
``--read-special``
| open and read special files as if they were regular files
| open and read block and char device files as well as FIFOs as if they were regular files. Also follows symlinks pointing to these kinds of files.
Archive options
``--comment COMMENT``
@ -55,17 +57,21 @@ Archive options
``--timestamp yyyy-mm-ddThh:mm:ss``
| manually specify the archive creation date/time (UTC). alternatively, give a reference file/directory.
``-c SECONDS``, ``--checkpoint-interval SECONDS``
| write checkpoint every SECONDS seconds (Default: 300)
| write checkpoint every SECONDS seconds (Default: 1800)
``--chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE``
| specify the chunker parameters. default: 19,23,21,4095
``-C COMPRESSION``, ``--compression COMPRESSION``
| select compression algorithm (and level):
| none == no compression (default),
| auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L]
| being any valid compression algorithm (and optional level),
| lz4 == lz4,
| zlib == zlib (default level 6),
| zlib,0 .. zlib,9 == zlib (with level 0..9),
| lzma == lzma (default level 6),
| lzma,0 .. lzma,9 == lzma (with level 0..9).
``--compression-from COMPRESSIONCONFIG``
| read compression patterns from COMPRESSIONCONFIG, one per line
Description
~~~~~~~~~~~
@ -79,7 +85,7 @@ The archive name needs to be unique. It must not end in '.checkpoint' or
checkpoints and treated in special ways.
In the archive name, you may use the following format tags:
{now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}
{now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}, {uuid4}
To speed up pulling backups over sshfs and similar network file systems which do
not provide correct inode information the --ignore-inode flag can be used. This
@ -87,3 +93,4 @@ potentially decreases reliability of change detection, while avoiding always rea
all files on these file systems.
See the output of the "borg help patterns" command for more help on exclude patterns.
See the output of the "borg help placeholders" command for more help on placeholders.

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_debug-delete-obj:
borg debug-delete-obj

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_debug-dump-archive-items:
borg debug-dump-archive-items

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_debug-get-obj:
borg debug-get-obj

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_debug-put-obj:
borg debug-put-obj

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_delete:
borg delete
@ -17,6 +19,8 @@ optional arguments
| print statistics for the deleted archive
``-c``, ``--cache-only``
| delete only the local cache for the given repository
``--force``
| force deletion of corrupted archives
``--save-space``
| work slower, but using less space

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_diff:
borg diff

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_extract:
borg extract
@ -42,3 +44,7 @@ by passing a list of ``PATHs`` as arguments. The file selection can further
be restricted by using the ``--exclude`` option.
See the output of the "borg help patterns" command for more help on exclude patterns.
By using ``--dry-run``, you can do all extraction steps except actually writing the
output data: reading metadata and data chunks from the repo, checking the hash/hmac,
decrypting, decompressing.

View file

@ -1,31 +1,71 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_placeholders:
borg help placeholders
~~~~~~~~~~~~~~~~~~~~~~
Repository (or Archive) URLs and --prefix values support these placeholders:
{hostname}
The (short) hostname of the machine.
{fqdn}
The full name of the machine.
{now}
The current local date and time.
{utcnow}
The current UTC date and time.
{user}
The user name (or UID, if no name is available) of the user running borg.
{pid}
The current process ID.
Examples::
borg create /path/to/repo::{hostname}-{user}-{utcnow} ...
borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ...
borg prune --prefix '{hostname}-' ...
.. _borg_patterns:
borg help patterns
~~~~~~~~~~~~~~~~~~
::
Exclusion patterns support four separate styles, fnmatch, shell, regular
expressions and path prefixes. If followed by a colon (':') the first two
characters of a pattern are used as a style selector. Explicit style
selection is necessary when a non-default style is desired or when the
desired pattern starts with two alphanumeric characters followed by a colon
(i.e. `aa:something/*`).
expressions and path prefixes. By default, fnmatch is used. If followed
by a colon (':') the first two characters of a pattern are used as a
style selector. Explicit style selection is necessary when a
non-default style is desired or when the desired pattern starts with
two alphanumeric characters followed by a colon (i.e. `aa:something/*`).
`Fnmatch <https://docs.python.org/3/library/fnmatch.html>`_, selector `fm:`
These patterns use a variant of shell pattern syntax, with '*' matching
any number of characters, '?' matching any single character, '[...]'
matching any single character specified, including ranges, and '[!...]'
matching any character not specified. For the purpose of these patterns,
the path separator ('\' for Windows and '/' on other systems) is not
treated specially. Wrap meta-characters in brackets for a literal match
(i.e. `[?]` to match the literal character `?`). For a path to match
a pattern, it must completely match from start to end, or must match from
the start to just before a path separator. Except for the root path,
paths will never end in the path separator when matching is attempted.
Thus, if a given pattern ends in a path separator, a '*' is appended
before matching is attempted.
This is the default style. These patterns use a variant of shell
pattern syntax, with '*' matching any number of characters, '?'
matching any single character, '[...]' matching any single
character specified, including ranges, and '[!...]' matching any
character not specified. For the purpose of these patterns, the
path separator ('\' for Windows and '/' on other systems) is not
treated specially. Wrap meta-characters in brackets for a literal
match (i.e. `[?]` to match the literal character `?`). For a path
to match a pattern, it must completely match from start to end, or
must match from the start to just before a path separator. Except
for the root path, paths will never end in the path separator when
matching is attempted. Thus, if a given pattern ends in a path
separator, a '*' is appended before matching is attempted.
Shell-style patterns, selector `sh:`
@ -61,32 +101,33 @@ selector prefix is also supported for patterns loaded from a file. Due to
whitespace removal paths with whitespace at the beginning or end can only be
excluded using regular expressions.
Examples:
Examples::
# Exclude '/home/user/file.o' but not '/home/user/file.odt':
$ borg create -e '*.o' backup /
# Exclude '/home/user/file.o' but not '/home/user/file.odt':
$ borg create -e '*.o' backup /
# Exclude '/home/user/junk' and '/home/user/subdir/junk' but
# not '/home/user/importantjunk' or '/etc/junk':
$ borg create -e '/home/*/junk' backup /
# Exclude '/home/user/junk' and '/home/user/subdir/junk' but
# not '/home/user/importantjunk' or '/etc/junk':
$ borg create -e '/home/*/junk' backup /
# Exclude the contents of '/home/user/cache' but not the directory itself:
$ borg create -e /home/user/cache/ backup /
# Exclude the contents of '/home/user/cache' but not the directory itself:
$ borg create -e /home/user/cache/ backup /
# The file '/home/user/cache/important' is *not* backed up:
$ borg create -e /home/user/cache/ backup / /home/user/cache/important
# The file '/home/user/cache/important' is *not* backed up:
$ borg create -e /home/user/cache/ backup / /home/user/cache/important
# The contents of directories in '/home' are not backed up when their name
# ends in '.tmp'
$ borg create --exclude 're:^/home/[^/]+\.tmp/' backup /
# The contents of directories in '/home' are not backed up when their name
# ends in '.tmp'
$ borg create --exclude 're:^/home/[^/]+\.tmp/' backup /
# Load exclusions from file
$ cat >exclude.txt <<EOF
# Comment line
/home/*/junk
*.tmp
fm:aa:something/*
re:^/home/[^/]\.tmp/
sh:/home/*/.thumbnails
EOF
$ borg create --exclude-from exclude.txt backup /
# Load exclusions from file
$ cat >exclude.txt <<EOF
# Comment line
/home/*/junk
*.tmp
fm:aa:something/*
re:^/home/[^/]\.tmp/
sh:/home/*/.thumbnails
EOF
$ borg create --exclude-from exclude.txt backup /

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_info:
borg info
@ -17,3 +19,7 @@ Description
~~~~~~~~~~~
This command displays some detailed information about the specified archive.
The "This archive" line refers exclusively to this archive:
"Deduplicated size" is the size of the unique chunks stored only for this
archive. Non-unique / common chunks show up under "All archives".

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_init:
borg init
@ -22,4 +24,45 @@ Description
This command initializes an empty repository. A repository is a filesystem
directory containing the deduplicated data from zero or more archives.
Encryption can be enabled at repository init time.
Encryption can be enabled at repository init time (the default).
It is not recommended to disable encryption. Repository encryption protects you
e.g. against the case that an attacker has access to your backup repository.
But be careful with the key / the passphrase:
If you want "passphrase-only" security, use the repokey mode. The key will
be stored inside the repository (in its "config" file). In above mentioned
attack scenario, the attacker will have the key (but not the passphrase).
If you want "passphrase and having-the-key" security, use the keyfile mode.
The key will be stored in your home directory (in .config/borg/keys). In
the attack scenario, the attacker who has just access to your repo won't have
the key (and also not the passphrase).
Make a backup copy of the key file (keyfile mode) or repo config file
(repokey mode) and keep it at a safe place, so you still have the key in
case it gets corrupted or lost. Also keep the passphrase at a safe place.
The backup that is encrypted with that key won't help you with that, of course.
Make sure you use a good passphrase. Not too short, not too simple. The real
encryption / decryption key is encrypted with / locked by your passphrase.
If an attacker gets your key, he can't unlock and use it without knowing the
passphrase.
Be careful with special or non-ascii characters in your passphrase:
- Borg processes the passphrase as unicode (and encodes it as utf-8),
so it does not have problems dealing with even the strangest characters.
- BUT: that does not necessarily apply to your OS / VM / keyboard configuration.
So better use a long passphrase made from simple ascii chars than one that
includes non-ascii stuff or characters that are hard/impossible to enter on
a different keyboard layout.
You can change your passphrase for existing repos at any time, it won't affect
the encryption/decryption key or other secrets.
When encrypting, AES-CTR-256 is used for encryption, and HMAC-SHA256 for
authentication. Hardware acceleration will be used automatically.

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_list:
borg list
@ -35,8 +37,22 @@ This command lists the contents of a repository or an archive.
See the "borg help patterns" command for more help on exclude patterns.
The following keys are available for --format when listing files:
The following keys are available for --format:
- NEWLINE: OS dependent line separator
- NL: alias of NEWLINE
- NUL: NUL character for creating print0 / xargs -0 like output, see barchive/bpath
- SPACE
- TAB
- CR
- LF
-- Keys for listing repository archives:
- archive: archive name interpreted as text (might be missing non-text characters, see barchive)
- barchive: verbatim archive name, can contain any character except NUL
- time: time of creation of the archive
- id: internal ID of the archive
-- Keys for listing archive files:
- type
- mode
- uid
@ -47,6 +63,7 @@ The following keys are available for --format when listing files:
- bpath: verbatim POSIX path, can contain any character except NUL
- source: link target for links (identical to linktarget)
- linktarget
- flags
- size
- csize: compressed size
@ -70,11 +87,3 @@ The following keys are available for --format when listing files:
- archiveid
- archivename
- extra: prepends {source} with " -> " for soft links and " link to " for hard links
- NEWLINE: OS dependent line separator
- NL: alias of NEWLINE
- NUL: NUL character for creating print0 / xargs -0 like ouput, see bpath
- SPACE
- TAB
- CR
- LF

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_migrate-to-repokey:
borg migrate-to-repokey

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_mount:
borg mount
@ -35,3 +37,15 @@ used in fstab entries:
To allow a regular user to use fstab entries, add the ``user`` option:
``/path/to/repo /mnt/point fuse.borgfs defaults,noauto,user 0 0``
For mount options, see the fuse(8) manual page. Additional mount options
supported by borg:
- allow_damaged_files: by default damaged files (where missing chunks were
replaced with runs of zeros by borg check --repair) are not readable and
return EIO (I/O error). Set this option to read such files.
The BORG_MOUNT_DATA_CACHE_ENTRIES environment variable is meant for advanced users
to tweak the performance. It sets the number of cached data chunks; additional
memory usage can be up to ~8 MiB times this number. The default is the number
of CPU cores.

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_prune:
borg prune
@ -13,12 +15,18 @@ positional arguments
optional arguments
``-n``, ``--dry-run``
| do not change repository
``--force``
| force pruning of corrupted archives
``-s``, ``--stats``
| print statistics for the deleted archive
``--list``
| output verbose list of archives it keeps/prunes
``--keep-within WITHIN``
| keep all archives within this time interval
``--keep-last``, ``--keep-secondly``
| number of secondly archives to keep
``--keep-minutely``
| number of minutely archives to keep
``-H``, ``--keep-hourly``
| number of hourly archives to keep
``-d``, ``--keep-daily``
@ -40,17 +48,23 @@ optional arguments
Description
~~~~~~~~~~~
The prune command prunes a repository by deleting archives not matching
The prune command prunes a repository by deleting all archives not matching
any of the specified retention options. This command is normally used by
automated backup scripts wanting to keep a certain number of historic backups.
As an example, "-d 7" means to keep the latest backup on each day, up to 7
most recent days with backups (days without backups do not count).
The rules are applied from hourly to yearly, and backups selected by previous
rules do not count towards those of later rules. The time that each backup
completes is used for pruning purposes. Dates and times are interpreted in
the local timezone, and weeks go from Monday to Sunday. Specifying a
negative number of archives to keep means that there is no limit.
Also, prune automatically removes checkpoint archives (incomplete archives left
behind by interrupted backup runs) except if the checkpoint is the latest
archive (and thus still needed). Checkpoint archives are not considered when
comparing archive counts against the retention limits (--keep-*).
If a prefix is set with -P, then only archives that start with the prefix are
considered for deletion and only those archives count towards the totals
specified by the rules.
Otherwise, *all* archives in the repository are candidates for deletion!
If you have multiple sequences of archives with different data sets (e.g.
from different machines) in one shared repository, use one prune call per
data set that matches only the respective archives using the -P option.
The "--keep-within" option takes an argument of the form "<int><char>",
where char is "H", "d", "w", "m", "y". For example, "--keep-within 2d" means
@ -58,7 +72,15 @@ to keep all archives that were created within the past 48 hours.
"1m" is taken to mean "31d". The archives kept with this option do not
count towards the totals specified by any other options.
If a prefix is set with -P, then only archives that start with the prefix are
considered for deletion and only those archives count towards the totals
specified by the rules.
Otherwise, *all* archives in the repository are candidates for deletion!
A good procedure is to thin out more and more the older your backups get.
As an example, "--keep-daily 7" means to keep the latest backup on each day,
up to 7 most recent days with backups (days without backups do not count).
The rules are applied from secondly to yearly, and backups selected by previous
rules do not count towards those of later rules. The time that each backup
starts is used for pruning purposes. Dates and times are interpreted in
the local timezone, and weeks go from Monday to Sunday. Specifying a
negative number of archives to keep means that there is no limit.
The "--keep-last N" option is doing the same as "--keep-secondly N" (and it will
keep the last N archives under the assumption that you do not create more than one
backup archive in the same second).

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_recreate:
borg recreate
@ -47,11 +49,15 @@ Archive options
``-C COMPRESSION``, ``--compression COMPRESSION``
| select compression algorithm (and level):
| none == no compression (default),
| auto,C[,L] == built-in heuristic decides between none or C[,L] - with C[,L]
| being any valid compression algorithm (and optional level),
| lz4 == lz4,
| zlib == zlib (default level 6),
| zlib,0 .. zlib,9 == zlib (with level 0..9),
| lzma == lzma (default level 6),
| lzma,0 .. lzma,9 == lzma (with level 0..9).
``--compression-from COMPRESSIONCONFIG``
| read compression patterns from COMPRESSIONCONFIG, one per line
``--chunker-params CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE``
| specify the chunker parameters (or "default").

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_rename:
borg rename

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_serve:
borg serve
@ -9,6 +11,8 @@ borg serve
optional arguments
``--restrict-to-path PATH``
| restrict repository access to PATH
``--append-only``
| only allow appending to repository segment files
`Common options`_
|

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_upgrade:
borg upgrade

View file

@ -1,3 +1,5 @@
.. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!
.. _borg_with-lock:
borg with-lock

5
requirements.d/attic.txt Normal file
View file

@ -0,0 +1,5 @@
# Please note:
# attic only builds using OpenSSL 1.0.x, it can not be installed using OpenSSL >= 1.1.0.
# If attic is not installed, our unit tests will just skip the tests that require attic.
attic

61
scripts/glibc_check.py Normal file
View file

@ -0,0 +1,61 @@
#!/usr/bin/env python3
"""
Check if all given binaries work with the given glibc version.
check_glibc.py 2.11 bin [bin ...]
"""
import re
import subprocess
import sys
verbose = True
objdump = "objdump -T %s"
glibc_re = re.compile(r'GLIBC_([0-9]\.[0-9]+)')
def parse_version(v):
major, minor = v.split('.')
return int(major), int(minor)
def format_version(version):
return "%d.%d" % version
def main():
given = parse_version(sys.argv[1])
filenames = sys.argv[2:]
overall_versions = set()
for filename in filenames:
try:
output = subprocess.check_output(objdump % filename, shell=True,
stderr=subprocess.STDOUT)
output = output.decode('utf-8')
versions = set(parse_version(match.group(1))
for match in glibc_re.finditer(output))
requires_glibc = max(versions)
overall_versions.add(requires_glibc)
if verbose:
print("%s %s" % (filename, format_version(requires_glibc)))
except subprocess.CalledProcessError as e:
if verbose:
print("%s errored." % filename)
wanted = max(overall_versions)
ok = given >= wanted
if verbose:
if ok:
print("The binaries work with the given glibc %s." %
format_version(given))
else:
print("The binaries do not work with the given glibc %s. "
"Minimum is: %s" % (format_version(given), format_version(wanted)))
return ok
if __name__ == '__main__':
ok = main()
sys.exit(0 if ok else 1)

2
scripts/release Executable file
View file

@ -0,0 +1,2 @@
python setup.py register sdist upload --identity="Thomas Waldmann" --sign

9
scripts/sign-binaries Executable file
View file

@ -0,0 +1,9 @@
#!/bin/bash
# usage: sign-binaries 201512312359
for file in dist/borg-*; do
gpg --armor --detach-sign $file
done
touch -t $1 dist/*

View file

@ -35,9 +35,9 @@ extras_require = {
}
if sys.platform.startswith('freebsd'):
# while llfuse 1.0 is the latest llfuse release right now,
# llfuse 0.41.1 is the latest release that actually builds on freebsd:
extras_require['fuse'] = ['llfuse==0.41.1', ]
# llfuse was frequently broken / did not build on freebsd
# llfuse 0.41.1, 1.1 are ok
extras_require['fuse'] = ['llfuse <2.0, !=0.42.*, !=0.43, !=1.0', ]
from setuptools import setup, find_packages, Extension
from setuptools.command.sdist import sdist
@ -206,12 +206,13 @@ class build_usage(Command):
for command, parser in choices.items():
print('generating help for %s' % command)
with open('docs/usage/%s.rst.inc' % command, 'w') as doc:
doc.write(".. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!\n\n")
if command == 'help':
for topic in Archiver.helptext:
params = {"topic": topic,
"underline": '~' * len('borg help ' + topic)}
doc.write(".. _borg_{topic}:\n\n".format(**params))
doc.write("borg help {topic}\n{underline}\n::\n\n".format(**params))
doc.write("borg help {topic}\n{underline}\n\n".format(**params))
doc.write(Archiver.helptext[topic])
else:
params = {"command": command,

View file

@ -114,6 +114,8 @@ static int hashindex_delete(HashIndex *index, const void *key);
static void *hashindex_next_key(HashIndex *index, const void *key);
/* Private API */
static void hashindex_free(HashIndex *index);
static int
hashindex_index(HashIndex *index, const void *key)
{
@ -162,7 +164,11 @@ hashindex_resize(HashIndex *index, int capacity)
return 0;
}
while((key = hashindex_next_key(index, key))) {
hashindex_set(new, key, key + key_size);
if(!hashindex_set(new, key, key + key_size)) {
/* This can only happen if there's a bug in the code calculating capacity */
hashindex_free(new);
return 0;
}
}
free(index->buckets);
index->buckets = new->buckets;

View file

@ -4,6 +4,7 @@ import socket
import stat
import sys
import time
from contextlib import contextmanager
from datetime import datetime, timezone
from getpass import getuser
from io import BytesIO
@ -33,6 +34,7 @@ from .helpers import ProgressIndicatorPercent, log_multi
from .helpers import PathPrefixPattern, FnmatchPattern
from .helpers import consume
from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
from .item import Item
from .key import key_factory
from .platform import acl_get, acl_set, set_flags, get_flags, swidth
from .remote import cache_if_remote
@ -89,7 +91,7 @@ class Statistics:
columns, lines = get_terminal_size()
if not final:
msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self)
path = remove_surrogates(item[b'path']) if item else ''
path = remove_surrogates(item.path) if item else ''
space = columns - swidth(msg)
if space < swidth('...') + swidth(path):
path = '%s...%s' % (path[:(space // 2) - swidth('...')], path[-space // 2:])
@ -99,6 +101,50 @@ class Statistics:
print(msg, file=stream or sys.stderr, end="\r", flush=True)
def is_special(mode):
# file types that get special treatment in --read-special mode
return stat.S_ISBLK(mode) or stat.S_ISCHR(mode) or stat.S_ISFIFO(mode)
class BackupOSError(Exception):
"""
Wrapper for OSError raised while accessing backup files.
Borg does different kinds of IO, and IO failures have different consequences.
This wrapper represents failures of input file or extraction IO.
These are non-critical and are only reported (exit code = 1, warning).
Any unwrapped IO error is critical and aborts execution (for example repository IO failure).
"""
def __init__(self, os_error):
self.os_error = os_error
self.errno = os_error.errno
self.strerror = os_error.strerror
self.filename = os_error.filename
def __str__(self):
return str(self.os_error)
@contextmanager
def backup_io():
"""Context manager changing OSError to BackupOSError."""
try:
yield
except OSError as os_error:
raise BackupOSError(os_error) from os_error
def backup_io_iter(iterator):
while True:
try:
with backup_io():
item = next(iterator)
except StopIteration:
return
yield item
class DownloadPipeline:
def __init__(self, repository, key):
@ -109,16 +155,16 @@ class DownloadPipeline:
unpacker = msgpack.Unpacker(use_list=False)
for _, data in self.fetch_many(ids):
unpacker.feed(data)
items = [decode_dict(item, ITEM_TEXT_KEYS) for item in unpacker]
items = [Item(internal_dict=item) for item in unpacker]
if filter:
items = [item for item in items if filter(item)]
for item in items:
if b'chunks' in item:
item[b'chunks'] = [ChunkListEntry(*e) for e in item[b'chunks']]
if 'chunks' in item:
item.chunks = [ChunkListEntry(*e) for e in item.chunks]
if preload:
for item in items:
if b'chunks' in item:
self.repository.preload([c.id for c in item[b'chunks']])
if 'chunks' in item:
self.repository.preload([c.id for c in item.chunks])
for item in items:
yield item
@ -138,7 +184,7 @@ class ChunkBuffer:
self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
def add(self, item):
self.buffer.write(self.packer.pack(StableDict(item)))
self.buffer.write(self.packer.pack(item.as_dict()))
if self.is_full():
self.flush()
@ -289,9 +335,6 @@ Number of files: {0.stats.nfiles}'''.format(
yield item
def add_item(self, item):
unknown_keys = set(item) - ITEM_KEYS
assert not unknown_keys, ('unknown item metadata keys detected, please update constants.ITEM_KEYS: %s',
','.join(k.decode('ascii') for k in unknown_keys))
if self.show_progress:
self.stats.show_progress(item=item, dt=0.2)
self.items_buffer.add(item)
@ -359,9 +402,10 @@ Number of files: {0.stats.nfiles}'''.format(
_, data = self.key.decrypt(id, chunk)
unpacker.feed(data)
for item in unpacker:
if b'chunks' in item:
item = Item(internal_dict=item)
if 'chunks' in item:
stats.nfiles += 1
add_file_chunks(item[b'chunks'])
add_file_chunks(item.chunks)
cache.rollback()
return stats
@ -376,22 +420,26 @@ Number of files: {0.stats.nfiles}'''.format(
:param stdout: write extracted data to stdout
:param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
:param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
:param original_path: b'path' key as stored in archive
:param original_path: 'path' key as stored in archive
"""
has_damaged_chunks = 'chunks_healthy' in item
if dry_run or stdout:
if b'chunks' in item:
for _, data in self.pipeline.fetch_many([c.id for c in item[b'chunks']], is_preloaded=True):
if 'chunks' in item:
for _, data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True):
if stdout:
sys.stdout.buffer.write(data)
if stdout:
sys.stdout.buffer.flush()
if has_damaged_chunks:
logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' %
remove_surrogates(item[b'path']))
return
original_path = original_path or item[b'path']
original_path = original_path or item.path
dest = self.cwd
if item[b'path'].startswith('/') or item[b'path'].startswith('..') or (sys.platform == 'win32' and len(item[b'path']) > 1 and item[b'path'][1] == ':'):
raise Exception('Path should be relative and local')
path = os.path.join(dest, item[b'path'])
path = os.path.join(dest, item.path)
# Attempt to remove existing files, ignore errors on failure
try:
st = os.lstat(path)
@ -403,79 +451,96 @@ Number of files: {0.stats.nfiles}'''.format(
raise self.IncompatibleFilesystemEncodingError(path, sys.getfilesystemencoding()) from None
except OSError:
pass
mode = item[b'mode']
mode = item.mode
if stat.S_ISREG(mode):
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
with backup_io():
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
# Hard link?
if b'source' in item:
source = os.path.join(dest, item[b'source'])
if os.path.exists(path):
os.unlink(path)
if not hardlink_masters:
os.link(source, path)
return
item[b'chunks'], link_target = hardlink_masters[item[b'source']]
if 'source' in item:
source = os.path.join(dest, item.source)
with backup_io():
if os.path.exists(path):
os.unlink(path)
if not hardlink_masters:
os.link(source, path)
return
item.chunks, link_target = hardlink_masters[item.source]
if link_target:
# Hard link was extracted previously, just link
os.link(link_target, path)
with backup_io():
os.link(link_target, path)
return
# Extract chunks, since the item which had the chunks was not extracted
with open(path, 'wb') as fd:
ids = [c.id for c in item[b'chunks']]
with backup_io():
fd = open(path, 'wb')
with fd:
ids = [c.id for c in item.chunks]
for _, data in self.pipeline.fetch_many(ids, is_preloaded=True):
if sparse and self.zeros.startswith(data):
# all-zero chunk: create a hole in a sparse file
fd.seek(len(data), 1)
with backup_io():
if sparse and self.zeros.startswith(data):
# all-zero chunk: create a hole in a sparse file
fd.seek(len(data), 1)
else:
fd.write(data)
with backup_io():
pos = fd.tell()
fd.truncate(pos)
fd.flush()
if sys.platform != 'win32':
self.restore_attrs(path, item, fd=fd.fileno())
else:
fd.write(data)
pos = fd.tell()
fd.truncate(pos)
fd.flush()
if sys.platform != 'win32':
self.restore_attrs(path, item, fd=fd.fileno())
else:
# File needs to be closed or timestamps are rewritten at close
fd.close()
self.restore_attrs(path, item)
# File needs to be closed or timestamps are rewritten at close
fd.close()
self.restore_attrs(path, item)
if has_damaged_chunks:
logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' %
remove_surrogates(item.path))
if hardlink_masters:
# Update master entry with extracted file path, so that following hardlinks don't extract twice.
hardlink_masters[item.get(b'source') or original_path] = (None, path)
elif stat.S_ISDIR(mode):
if not os.path.exists(path):
os.makedirs(path)
if restore_attrs:
hardlink_masters[item.get('source') or original_path] = (None, path)
return
with backup_io():
# No repository access beyond this point.
if stat.S_ISDIR(mode):
if not os.path.exists(path):
os.makedirs(path)
if restore_attrs:
self.restore_attrs(path, item)
elif stat.S_ISLNK(mode):
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
source = item.source
if os.path.exists(path):
os.unlink(path)
try:
os.symlink(source, path)
except UnicodeEncodeError:
raise self.IncompatibleFilesystemEncodingError(source, sys.getfilesystemencoding()) from None
self.restore_attrs(path, item, symlink=True)
elif stat.S_ISFIFO(mode):
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
os.mkfifo(path)
self.restore_attrs(path, item)
elif stat.S_ISLNK(mode):
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
source = item[b'source']
if os.path.exists(path):
os.unlink(path)
try:
os.symlink(source, path)
except UnicodeEncodeError:
raise self.IncompatibleFilesystemEncodingError(source, sys.getfilesystemencoding()) from None
self.restore_attrs(path, item, symlink=True)
elif stat.S_ISFIFO(mode):
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
os.mkfifo(path)
self.restore_attrs(path, item)
elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
os.mknod(path, item[b'mode'], item[b'rdev'])
self.restore_attrs(path, item)
else:
raise Exception('Unknown archive item type %r' % item[b'mode'])
elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
os.mknod(path, item.mode, item.rdev)
self.restore_attrs(path, item)
else:
raise Exception('Unknown archive item type %r' % item.mode)
def restore_attrs(self, path, item, symlink=False, fd=None):
"""
Restore filesystem attributes on *path* (*fd*) from *item*.
Does not access the repository.
"""
uid = gid = None
if not self.numeric_owner:
uid = user2uid(item[b'user'])
gid = group2gid(item[b'group'])
uid = item[b'uid'] if uid is None else uid
gid = item[b'gid'] if gid is None else gid
uid = user2uid(item.user)
gid = group2gid(item.group)
uid = item.uid if uid is None else uid
gid = item.gid if gid is None else gid
# This code is a bit of a mess due to os specific differences
if sys.platform != 'win32':
try:
@ -492,14 +557,18 @@ Number of files: {0.stats.nfiles}'''.format(
pass
if sys.platform != 'win32':
if fd:
os.fchmod(fd, item[b'mode'])
os.fchown(fd, uid, gid)
else:
os.lchown(path, uid, gid)
if fd:
os.fchmod(fd, item.mode)
elif not symlink:
os.chmod(path, item[b'mode'])
os.chmod(path, item.mode)
elif has_lchmod: # Not available on Linux
os.lchmod(path, item[b'mode'])
mtime = bigint_to_int(item[b'mtime'])
if b'atime' in item:
atime = bigint_to_int(item[b'atime'])
os.lchmod(path, item.mode)
mtime = item.mtime
if 'atime' in item:
atime = item.atime
else:
# old archives only had mtime in item metadata
atime = mtime
@ -510,14 +579,14 @@ Number of files: {0.stats.nfiles}'''.format(
else:
os.utime(path, None, ns=(atime, mtime), follow_symlinks=False)
acl_set(path, item, self.numeric_owner)
if b'bsdflags' in item:
if 'bsdflags' in item:
try:
set_flags(path, item[b'bsdflags'], fd=fd)
set_flags(path, item.bsdflags, fd=fd)
except OSError:
pass
# chown removes Linux capabilities, so set the extended attributes at the end, after chown, since they include
# the Linux capabilities in the "security.capability" attribute.
xattrs = item.get(b'xattrs', {})
xattrs = item.get('xattrs', {})
for k, v in xattrs.items():
try:
xattr.setxattr(fd or path, k, v, follow_symlinks=False)
@ -547,71 +616,107 @@ Number of files: {0.stats.nfiles}'''.format(
self.set_meta(b'name', name)
del self.manifest.archives[oldname]
def delete(self, stats, progress=False):
unpacker = msgpack.Unpacker(use_list=False)
items_ids = self.metadata[b'items']
pi = ProgressIndicatorPercent(total=len(items_ids), msg="Decrementing references %3.0f%%", same_line=True)
for (i, (items_id, data)) in enumerate(zip(items_ids, self.repository.get_many(items_ids))):
def delete(self, stats, progress=False, forced=False):
class ChunksIndexError(Error):
"""Chunk ID {} missing from chunks index, corrupted chunks index - aborting transaction."""
def chunk_decref(id, stats):
nonlocal error
try:
self.cache.chunk_decref(id, stats)
except KeyError:
cid = bin_to_hex(id)
raise ChunksIndexError(cid)
except Repository.ObjectNotFound as e:
# object not in repo - strange, but we wanted to delete it anyway.
if not forced:
raise
error = True
error = False
try:
unpacker = msgpack.Unpacker(use_list=False)
items_ids = self.metadata[b'items']
pi = ProgressIndicatorPercent(total=len(items_ids), msg="Decrementing references %3.0f%%", same_line=True)
for (i, (items_id, data)) in enumerate(zip(items_ids, self.repository.get_many(items_ids))):
if progress:
pi.show(i)
_, data = self.key.decrypt(items_id, data)
unpacker.feed(data)
chunk_decref(items_id, stats)
try:
for item in unpacker:
item = Item(internal_dict=item)
if 'chunks' in item:
for chunk_id, size, csize in item.chunks:
chunk_decref(chunk_id, stats)
except (TypeError, ValueError):
# if items metadata spans multiple chunks and one chunk got dropped somehow,
# it could be that unpacker yields bad types
if not forced:
raise
error = True
if progress:
pi.show(i)
_, data = self.key.decrypt(items_id, data)
unpacker.feed(data)
self.cache.chunk_decref(items_id, stats)
for item in unpacker:
if b'chunks' in item:
for chunk_id, size, csize in item[b'chunks']:
self.cache.chunk_decref(chunk_id, stats)
if progress:
pi.finish()
self.cache.chunk_decref(self.id, stats)
pi.finish()
except (msgpack.UnpackException, Repository.ObjectNotFound):
# items metadata corrupted
if not forced:
raise
error = True
# in forced delete mode, we try hard to delete at least the manifest entry,
# if possible also the archive superblock, even if processing the items raises
# some harmless exception.
chunk_decref(self.id, stats)
del self.manifest.archives[self.name]
if error:
logger.warning('forced deletion succeeded, but the deleted archive was corrupted.')
logger.warning('borg check --repair is required to free all space.')
def stat_attrs(self, st, path):
item = {}
attrs = dict(
mode=st.st_mode,
atime=st.st_atime_ns,
ctime=st.st_ctime_ns,
mtime=st.st_mtime_ns,
)
if sys.platform == 'win32':
owner = get_owner(path)
item = {
b'mode': st.st_mode,
b'uid': owner[1], b'user': owner[0],
b'gid': st.st_gid, b'group': gid2group(st.st_gid),
b'atime': int_to_bigint(st.st_atime_ns),
b'ctime': int_to_bigint(st.st_ctime_ns),
b'mtime': int_to_bigint(st.st_mtime_ns),
}
attrs.update({
'uid': owner[1], 'user': owner[0],
'gid': st.st_gid, 'group': gid2group(st.st_gid),
})
else:
item = {
b'mode': st.st_mode,
b'uid': st.st_uid, b'user': uid2user(st.st_uid),
b'gid': st.st_gid, b'group': gid2group(st.st_gid),
b'atime': int_to_bigint(st.st_atime_ns),
b'ctime': int_to_bigint(st.st_ctime_ns),
b'mtime': int_to_bigint(st.st_mtime_ns),
}
attrs.update({
'uid': st.st_uid, 'user': uid2user(st.st_uid),
'gid': st.st_gid, 'group': gid2group(st.st_gid),
})
if self.numeric_owner:
item[b'user'] = item[b'group'] = None
xattrs = xattr.get_all(path, follow_symlinks=False)
attrs['user'] = attrs['group'] = None
with backup_io():
xattrs = xattr.get_all(path, follow_symlinks=False)
bsdflags = get_flags(path, st)
acl_get(path, attrs, st, self.numeric_owner)
if xattrs:
item[b'xattrs'] = StableDict(xattrs)
bsdflags = get_flags(path, st)
attrs['xattrs'] = StableDict(xattrs)
if bsdflags:
item[b'bsdflags'] = bsdflags
acl_get(path, item, st, self.numeric_owner)
return item
attrs['bsdflags'] = bsdflags
return attrs
def process_dir(self, path, st):
item = {b'path': make_path_safe(path)}
item = Item(path=make_path_safe(path))
item.update(self.stat_attrs(st, path))
self.add_item(item)
return 'd' # directory
def process_fifo(self, path, st):
item = {b'path': make_path_safe(path)}
item = Item(path=make_path_safe(path))
item.update(self.stat_attrs(st, path))
self.add_item(item)
return 'f' # fifo
def process_dev(self, path, st):
item = {b'path': make_path_safe(path), b'rdev': st.st_rdev}
item = Item(path=make_path_safe(path), rdev=st.st_rdev)
item.update(self.stat_attrs(st, path))
self.add_item(item)
if stat.S_ISCHR(st.st_mode):
@ -621,7 +726,7 @@ Number of files: {0.stats.nfiles}'''.format(
def process_symlink(self, path, st):
source = os.readlink(path)
item = {b'path': make_path_safe(path), b'source': source}
item = Item(path=make_path_safe(path), source=source)
item.update(self.stat_attrs(st, path))
self.add_item(item)
return 's' # symlink
@ -630,18 +735,18 @@ Number of files: {0.stats.nfiles}'''.format(
uid, gid = 0, 0
fd = sys.stdin.buffer # binary
chunks = []
for data in self.chunker.chunkify(fd):
for data in backup_io_iter(self.chunker.chunkify(fd)):
chunks.append(cache.add_chunk(self.key.id_hash(data), Chunk(data), self.stats))
self.stats.nfiles += 1
t = int_to_bigint(int(time.time()) * 1000000000)
item = {
b'path': path,
b'chunks': chunks,
b'mode': 0o100660, # regular file, ug=rw
b'uid': uid, b'user': uid2user(uid),
b'gid': gid, b'group': gid2group(gid),
b'mtime': t, b'atime': t, b'ctime': t,
}
t = int(time.time()) * 1000000000
item = Item(
path=path,
chunks=chunks,
mode=0o100660, # regular file, ug=rw
uid=uid, user=uid2user(uid),
gid=gid, group=gid2group(gid),
mtime=t, atime=t, ctime=t,
)
self.add_item(item)
return 'i' # stdin
@ -652,19 +757,23 @@ Number of files: {0.stats.nfiles}'''.format(
if st.st_nlink > 1:
source = self.hard_links.get((st.st_ino, st.st_dev))
if (st.st_ino, st.st_dev) in self.hard_links:
item = self.stat_attrs(st, path)
item.update({
b'path': safe_path,
b'source': source,
})
item = Item(path=safe_path, source=source)
item.update(self.stat_attrs(st, path))
self.add_item(item)
status = 'h' # regular file, hardlink (to already seen inodes)
return status
else:
self.hard_links[st.st_ino, st.st_dev] = safe_path
path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
is_special_file = is_special(st.st_mode)
if not is_special_file:
path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode)
else:
# in --read-special mode, we may be called for special files.
# there should be no information in the cache about special files processed in
# read-special mode, but we better play safe as this was wrong in the past:
path_hash = ids = None
first_run = not cache.files
ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode)
if first_run:
logger.debug('Processing files ...')
chunks = None
@ -678,27 +787,35 @@ Number of files: {0.stats.nfiles}'''.format(
status = 'U' # regular file, unchanged
else:
status = 'A' # regular file, added
item = {
b'path': safe_path,
b'hardlink_master': st.st_nlink > 1, # item is a hard link and has the chunks
}
item = Item(
path=safe_path,
hardlink_master=st.st_nlink > 1, # item is a hard link and has the chunks
)
# Only chunkify the file if needed
if chunks is None:
compress = self.compression_decider1.decide(path)
logger.debug('%s -> compression %s', path, compress['name'])
fh = Archive._open_rb(path)
with backup_io():
fh = Archive._open_rb(path)
with os.fdopen(fh, 'rb') as fd:
chunks = []
for data in self.chunker.chunkify(fd, fh):
for data in backup_io_iter(self.chunker.chunkify(fd, fh)):
chunks.append(cache.add_chunk(self.key.id_hash(data),
Chunk(data, compress=compress),
self.stats))
if self.show_progress:
self.stats.show_progress(item=item, dt=0.2)
cache.memorize_file(path_hash, st, [c.id for c in chunks])
if not is_special_file:
# we must not memorize special files, because the contents of e.g. a
# block or char device will change without its mtime/size/inode changing.
cache.memorize_file(path_hash, st, [c.id for c in chunks])
status = status or 'M' # regular file, modified (if not 'A' already)
item[b'chunks'] = chunks
item.chunks = chunks
item.update(self.stat_attrs(st, path))
if is_special_file:
# we processed a special file like a regular file. reflect that in mode,
# so it can be extracted / accessed in FUSE mount like a regular file:
item.mode = stat.S_IFREG | stat.S_IMODE(item.mode)
self.stats.nfiles += 1
self.add_item(item)
return status
@ -722,12 +839,40 @@ Number of files: {0.stats.nfiles}'''.format(
return os.open(path, flags_normal)
def valid_msgpacked_dict(d, keys_serialized):
"""check if the data <d> looks like a msgpacked dict"""
d_len = len(d)
if d_len == 0:
return False
if d[0] & 0xf0 == 0x80: # object is a fixmap (up to 15 elements)
offs = 1
elif d[0] == 0xde: # object is a map16 (up to 2^16-1 elements)
offs = 3
else:
# object is not a map (dict)
# note: we must not have dicts with > 2^16-1 elements
return False
if d_len <= offs:
return False
# is the first dict key a bytestring?
if d[offs] & 0xe0 == 0xa0: # key is a small bytestring (up to 31 chars)
pass
elif d[offs] in (0xd9, 0xda, 0xdb): # key is a str8, str16 or str32
pass
else:
# key is not a bytestring
return False
# is the bytestring any of the expected key names?
key_serialized = d[offs:]
return any(key_serialized.startswith(pattern) for pattern in keys_serialized)
class RobustUnpacker:
"""A restartable/robust version of the streaming msgpack unpacker
"""
def __init__(self, validator):
def __init__(self, validator, item_keys):
super().__init__()
self.item_keys = [msgpack.packb(name) for name in ITEM_KEYS]
self.item_keys = [msgpack.packb(name.encode()) for name in item_keys]
self.validator = validator
self._buffered_data = []
self._resync = False
@ -752,18 +897,10 @@ class RobustUnpacker:
while self._resync:
if not data:
raise StopIteration
# Abort early if the data does not look like a serialized dict
if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0):
# Abort early if the data does not look like a serialized item dict
if not valid_msgpacked_dict(data, self.item_keys):
data = data[1:]
continue
# Make sure it looks like an item dict
for pattern in self.item_keys:
if data[1:].startswith(pattern):
break
else:
data = data[1:]
continue
self._unpacker = msgpack.Unpacker(object_hook=StableDict)
self._unpacker.feed(data)
try:
@ -825,7 +962,7 @@ class ArchiveChecker:
"""
# Explicitly set the initial hash table capacity to avoid performance issues
# due to hash table "resonance"
capacity = int(len(self.repository) * 1.2)
capacity = int(len(self.repository) * 1.35 + 1) # > len * 1.0 / HASH_MAX_LOAD (see _hashindex.c)
self.chunks = ChunkIndex(capacity)
marker = None
while True:
@ -838,7 +975,12 @@ class ArchiveChecker:
self.chunks[id_] = init_entry
def identify_key(self, repository):
cdata = repository.get(next(self.chunks.iteritems())[0])
try:
some_chunkid, _ = next(self.chunks.iteritems())
except StopIteration:
# repo is completely empty, no chunks
return None
cdata = repository.get(some_chunkid)
return key_factory(repository, cdata)
def verify_data(self):
@ -866,13 +1008,26 @@ class ArchiveChecker:
Iterates through all objects in the repository looking for archive metadata blocks.
"""
required_archive_keys = frozenset(key.encode() for key in REQUIRED_ARCHIVE_KEYS)
def valid_archive(obj):
if not isinstance(obj, dict):
return False
keys = set(obj)
return required_archive_keys.issubset(keys)
logger.info('Rebuilding missing manifest, this might take some time...')
# as we have lost the manifest, we do not know any more what valid item keys we had.
# collecting any key we encounter in a damaged repo seems unwise, thus we just use
# the hardcoded list from the source code. thus, it is not recommended to rebuild a
# lost manifest on a older borg version than the most recent one that was ever used
# within this repository (assuming that newer borg versions support more item keys).
manifest = Manifest(self.key, self.repository)
archive_keys_serialized = [msgpack.packb(name.encode()) for name in ARCHIVE_KEYS]
for chunk_id, _ in self.chunks.iteritems():
cdata = self.repository.get(chunk_id)
_, data = self.key.decrypt(chunk_id, cdata)
# Some basic sanity checks of the payload before feeding it into msgpack
if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0):
if not valid_msgpacked_dict(data, archive_keys_serialized):
continue
if b'cmdline' not in data or b'\xa7version\x01' not in data:
continue
@ -882,7 +1037,7 @@ class ArchiveChecker:
# msgpack with invalid data
except (TypeError, ValueError, StopIteration):
continue
if isinstance(archive, dict) and b'items' in archive and b'cmdline' in archive:
if valid_archive(archive):
logger.info('Found archive %s', archive[b'name'].decode('utf-8'))
manifest.archives[archive[b'name'].decode('utf-8')] = {b'id': chunk_id, b'time': archive[b'time']}
logger.info('Manifest rebuild complete.')
@ -916,35 +1071,64 @@ class ArchiveChecker:
self.repository.put(id_, cdata)
def verify_file_chunks(item):
"""Verifies that all file chunks are present
"""Verifies that all file chunks are present.
Missing file chunks will be replaced with new chunks of the same
length containing all zeros.
Missing file chunks will be replaced with new chunks of the same length containing all zeros.
If a previously missing file chunk re-appears, the replacement chunk is replaced by the correct one.
"""
offset = 0
chunk_list = []
for chunk_id, size, csize in item[b'chunks']:
chunks_replaced = False
has_chunks_healthy = 'chunks_healthy' in item
chunks_current = item.chunks
chunks_healthy = item.chunks_healthy if has_chunks_healthy else chunks_current
assert len(chunks_current) == len(chunks_healthy)
for chunk_current, chunk_healthy in zip(chunks_current, chunks_healthy):
chunk_id, size, csize = chunk_healthy
if chunk_id not in self.chunks:
# If a file chunk is missing, create an all empty replacement chunk
logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(safe_decode(item[b'path']), offset, offset + size))
self.error_found = True
data = bytes(size)
chunk_id = self.key.id_hash(data)
cdata = self.key.encrypt(Chunk(data))
csize = len(cdata)
add_reference(chunk_id, size, csize, cdata)
# a chunk of the healthy list is missing
if chunk_current == chunk_healthy:
logger.error('{}: New missing file chunk detected (Byte {}-{}). '
'Replacing with all-zero chunk.'.format(item.path, offset, offset + size))
self.error_found = chunks_replaced = True
data = bytes(size)
chunk_id = self.key.id_hash(data)
cdata = self.key.encrypt(Chunk(data))
csize = len(cdata)
add_reference(chunk_id, size, csize, cdata)
else:
logger.info('{}: Previously missing file chunk is still missing (Byte {}-{}). It has a '
'all-zero replacement chunk already.'.format(item.path, offset, offset + size))
chunk_id, size, csize = chunk_current
add_reference(chunk_id, size, csize)
else:
add_reference(chunk_id, size, csize)
chunk_list.append((chunk_id, size, csize))
if chunk_current == chunk_healthy:
# normal case, all fine.
add_reference(chunk_id, size, csize)
else:
logger.info('{}: Healed previously missing file chunk! '
'(Byte {}-{}).'.format(item.path, offset, offset + size))
add_reference(chunk_id, size, csize)
mark_as_possibly_superseded(chunk_current[0]) # maybe orphaned the all-zero replacement chunk
chunk_list.append([chunk_id, size, csize]) # list-typed element as chunks_healthy is list-of-lists
offset += size
item[b'chunks'] = chunk_list
if chunks_replaced and not has_chunks_healthy:
# if this is first repair, remember the correct chunk IDs, so we can maybe heal the file later
item.chunks_healthy = item.chunks
if has_chunks_healthy and chunk_list == chunks_healthy:
logger.info('{}: Completely healed previously damaged file!'.format(item.path))
del item.chunks_healthy
item.chunks = chunk_list
def robust_iterator(archive):
"""Iterates through all archive items
Missing item chunks will be skipped and the msgpack stream will be restarted
"""
unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item)
item_keys = frozenset(key.encode() for key in self.manifest.item_keys)
required_item_keys = frozenset(key.encode() for key in REQUIRED_ITEM_KEYS)
unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and 'path' in item,
self.manifest.item_keys)
_state = 0
def missing_chunk_detector(chunk_id):
@ -959,6 +1143,12 @@ class ArchiveChecker:
self.error_found = True
logger.error(msg)
def valid_item(obj):
if not isinstance(obj, StableDict):
return False
keys = set(obj)
return required_item_keys.issubset(keys) and keys.issubset(item_keys)
i = 0
for state, items in groupby(archive[b'items'], missing_chunk_detector):
items = list(items)
@ -974,8 +1164,8 @@ class ArchiveChecker:
unpacker.feed(data)
try:
for item in unpacker:
if isinstance(item, dict):
yield item
if valid_item(item):
yield Item(internal_dict=item)
else:
report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i)
except Exception:
@ -1019,7 +1209,7 @@ class ArchiveChecker:
items_buffer = ChunkBuffer(self.key)
items_buffer.write_chunk = add_callback
for item in robust_iterator(archive):
if b'chunks' in item:
if 'chunks' in item:
verify_file_chunks(item)
items_buffer.add(item)
items_buffer.flush(flush=True)
@ -1122,38 +1312,38 @@ class ArchiveRecreater:
def item_is_hardlink_master(item):
return (target_is_subset and
stat.S_ISREG(item[b'mode']) and
item.get(b'hardlink_master', True) and
b'source' not in item and
not matcher.match(item[b'path']))
stat.S_ISREG(item.mode) and
item.get('hardlink_master', True) and
'source' not in item and
not matcher.match(item.path))
for item in archive.iter_items():
if item_is_hardlink_master(item):
# Re-visit all of these items in the archive even when fast-forwarding to rebuild hardlink_masters
hardlink_masters[item[b'path']] = (item.get(b'chunks'), None)
hardlink_masters[item.path] = (item.get('chunks'), None)
continue
if resume_from:
# Fast forward to after the last processed file
if item[b'path'] == resume_from:
logger.info('Fast-forwarded to %s', remove_surrogates(item[b'path']))
if item.path == resume_from:
logger.info('Fast-forwarded to %s', remove_surrogates(item.path))
resume_from = None
continue
if not matcher.match(item[b'path']):
self.print_file_status('x', item[b'path'])
if not matcher.match(item.path):
self.print_file_status('x', item.path)
continue
if target_is_subset and stat.S_ISREG(item[b'mode']) and item.get(b'source') in hardlink_masters:
if target_is_subset and stat.S_ISREG(item.mode) and item.get('source') in hardlink_masters:
# master of this hard link is outside the target subset
chunks, new_source = hardlink_masters[item[b'source']]
chunks, new_source = hardlink_masters[item.source]
if new_source is None:
# First item to use this master, move the chunks
item[b'chunks'] = chunks
hardlink_masters[item[b'source']] = (None, item[b'path'])
del item[b'source']
item.chunks = chunks
hardlink_masters[item.source] = (None, item.path)
del item.source
else:
# Master was already moved, only update this item's source
item[b'source'] = new_source
item.source = new_source
if self.dry_run:
self.print_file_status('-', item[b'path'])
self.print_file_status('-', item.path)
else:
try:
self.process_item(archive, target, item)
@ -1165,11 +1355,11 @@ class ArchiveRecreater:
target.stats.show_progress(final=True)
def process_item(self, archive, target, item):
if b'chunks' in item:
item[b'chunks'] = self.process_chunks(archive, target, item)
if 'chunks' in item:
item.chunks = self.process_chunks(archive, target, item)
target.stats.nfiles += 1
target.add_item(item)
self.print_file_status(file_status(item[b'mode']), item[b'path'])
self.print_file_status(file_status(item.mode), item.path)
if self.interrupt:
raise self.Interrupted
@ -1177,9 +1367,9 @@ class ArchiveRecreater:
"""Return new chunk ID list for 'item'."""
# TODO: support --compression-from
if not self.recompress and not target.recreate_rechunkify:
for chunk_id, size, csize in item[b'chunks']:
for chunk_id, size, csize in item.chunks:
self.cache.chunk_incref(chunk_id, target.stats)
return item[b'chunks']
return item.chunks
new_chunks = self.process_partial_chunks(target)
chunk_iterator = self.create_chunk_iterator(archive, target, item)
consume(chunk_iterator, len(new_chunks))
@ -1210,7 +1400,7 @@ class ArchiveRecreater:
def create_chunk_iterator(self, archive, target, item):
"""Return iterator of chunks to store for 'item' from 'archive' in 'target'."""
chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item[b'chunks']])
chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item.chunks])
if target.recreate_rechunkify:
# The target.chunker will read the file contents through ChunkIteratorFileWrapper chunk-by-chunk
# (does not load the entire file into memory)
@ -1272,7 +1462,7 @@ class ArchiveRecreater:
"""Add excludes to the matcher created by exclude_cache and exclude_if_present."""
def exclude(dir, tag_item):
if self.keep_tag_files:
tag_files.append(PathPrefixPattern(tag_item[b'path']))
tag_files.append(PathPrefixPattern(tag_item.path))
tagged_dirs.append(FnmatchPattern(dir + '/'))
else:
tagged_dirs.append(PathPrefixPattern(dir))
@ -1284,18 +1474,18 @@ class ArchiveRecreater:
cachedir_masters = {}
for item in archive.iter_items(
filter=lambda item: item[b'path'].endswith(CACHE_TAG_NAME) or matcher.match(item[b'path'])):
if item[b'path'].endswith(CACHE_TAG_NAME):
cachedir_masters[item[b'path']] = item
if stat.S_ISREG(item[b'mode']):
dir, tag_file = os.path.split(item[b'path'])
filter=lambda item: item.path.endswith(CACHE_TAG_NAME) or matcher.match(item.path)):
if item.path.endswith(CACHE_TAG_NAME):
cachedir_masters[item.path] = item
if stat.S_ISREG(item.mode):
dir, tag_file = os.path.split(item.path)
if tag_file in self.exclude_if_present:
exclude(dir, item)
if self.exclude_caches and tag_file == CACHE_TAG_NAME:
if b'chunks' in item:
if 'chunks' in item:
file = open_item(archive, item)
else:
file = open_item(archive, cachedir_masters[item[b'source']])
file = open_item(archive, cachedir_masters[item.source])
if file.read(len(CACHE_TAG_CONTENTS)).startswith(CACHE_TAG_CONTENTS):
exclude(dir, item)
matcher.add(tag_files, True)
@ -1336,13 +1526,13 @@ class ArchiveRecreater:
logger.info('Replaying items from interrupted operation...')
item = None
for item in old_target.iter_items():
if b'chunks' in item:
for chunk in item[b'chunks']:
if 'chunks' in item:
for chunk in item.chunks:
self.cache.chunk_incref(chunk.id, target.stats)
target.stats.nfiles += 1
target.add_item(item)
if item:
resume_from = item[b'path']
resume_from = item.path
else:
resume_from = None
if self.progress:

View file

@ -23,12 +23,14 @@ logger = create_logger()
from . import __version__
from . import helpers
from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics
from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special
from .archive import BackupOSError, CHUNKER_PARAMS
from .cache import Cache
from .constants import * # NOQA
from .helpers import Error
from .helpers import location_validator, archivename_validator, ChunkerParams, CompressionSpec
from .helpers import ItemFormatter, format_time, format_file_size, format_archive
from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
from .helpers import Error, NoManifestError
from .helpers import location_validator, archivename_validator, ChunkerParams, CompressionSpec, PrefixSpec
from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter, format_time, format_file_size, format_archive
from .helpers import safe_encode, remove_surrogates, bin_to_hex
from .helpers import prune_within, prune_split
from .helpers import to_localtime, timestamp
@ -38,6 +40,8 @@ from .helpers import update_excludes, check_extension_modules
from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
from .helpers import log_multi
from .helpers import parse_pattern, PatternMatcher, PathPrefixPattern
from .helpers import signal_handler
from .item import Item
from .key import key_creator, RepoKey, PassphraseKey
from .platform import get_flags
from .remote import RepositoryServer, RemoteRepository, cache_if_remote
@ -164,7 +168,7 @@ class Archiver:
def do_serve(self, args):
"""Start in server mode. This command is usually not used manually.
"""
return RepositoryServer(restrict_to_paths=args.restrict_to_paths).serve()
return RepositoryServer(restrict_to_paths=args.restrict_to_paths, append_only=args.append_only).serve()
@with_repository(create=True, exclusive=True, manifest=False)
def do_init(self, args, repository):
@ -255,7 +259,7 @@ class Archiver:
if not dry_run:
try:
status = archive.process_stdin(path, cache)
except OSError as e:
except BackupOSError as e:
status = 'E'
self.print_warning('%s: %s', path, e)
else:
@ -327,14 +331,18 @@ class Archiver:
return
status = None
# Ignore if nodump flag is set
if get_flags(path, st) & stat.UF_NODUMP:
self.print_file_status('x', path)
try:
if get_flags(path, st) & stat.UF_NODUMP:
self.print_file_status('x', path)
return
except OSError as e:
self.print_warning('%s: %s', path, e)
return
if stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode):
if stat.S_ISREG(st.st_mode):
if not dry_run:
try:
status = archive.process_file(path, st, cache, self.ignore_inode)
except OSError as e:
except BackupOSError as e:
status = 'E'
self.print_warning('%s: %s', path, e)
elif stat.S_ISDIR(st.st_mode):
@ -362,13 +370,26 @@ class Archiver:
read_special=read_special, dry_run=dry_run)
elif stat.S_ISLNK(st.st_mode):
if not dry_run:
status = archive.process_symlink(path, st)
if not read_special:
status = archive.process_symlink(path, st)
else:
st_target = os.stat(path)
if is_special(st_target.st_mode):
status = archive.process_file(path, st_target, cache)
else:
status = archive.process_symlink(path, st)
elif stat.S_ISFIFO(st.st_mode):
if not dry_run:
status = archive.process_fifo(path, st)
if not read_special:
status = archive.process_fifo(path, st)
else:
status = archive.process_file(path, st, cache)
elif stat.S_ISCHR(st.st_mode) or stat.S_ISBLK(st.st_mode):
if not dry_run:
status = archive.process_dev(path, st)
if not read_special:
status = archive.process_dev(path, st)
else:
status = archive.process_file(path, st, cache)
elif stat.S_ISSOCK(st.st_mode):
# Ignore unix sockets
return
@ -411,41 +432,49 @@ class Archiver:
hardlink_masters = {} if partial_extract else None
def item_is_hardlink_master(item):
return (partial_extract and stat.S_ISREG(item[b'mode']) and
item.get(b'hardlink_master', True) and b'source' not in item)
return (partial_extract and stat.S_ISREG(item.mode) and
item.get('hardlink_master', True) and 'source' not in item)
for item in archive.iter_items(preload=True,
filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])):
orig_path = item[b'path']
filter=lambda item: item_is_hardlink_master(item) or matcher.match(item.path)):
orig_path = item.path
if item_is_hardlink_master(item):
hardlink_masters[orig_path] = (item.get(b'chunks'), None)
if not matcher.match(item[b'path']):
hardlink_masters[orig_path] = (item.get('chunks'), None)
if not matcher.match(item.path):
continue
if strip_components:
item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
if not item[b'path']:
item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
if not item.path:
continue
if not args.dry_run:
while dirs and not item[b'path'].startswith(dirs[-1][b'path']):
archive.extract_item(dirs.pop(-1), stdout=stdout)
while dirs and not item.path.startswith(dirs[-1].path):
dir_item = dirs.pop(-1)
try:
archive.extract_item(dir_item, stdout=stdout)
except BackupOSError as e:
self.print_warning('%s: %s', remove_surrogates(dir_item[b'path']), e)
if output_list:
logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
try:
if dry_run:
archive.extract_item(item, dry_run=True)
else:
if stat.S_ISDIR(item[b'mode']):
if stat.S_ISDIR(item.mode):
dirs.append(item)
archive.extract_item(item, restore_attrs=False)
else:
archive.extract_item(item, stdout=stdout, sparse=sparse, hardlink_masters=hardlink_masters,
original_path=orig_path)
except OSError as e:
except BackupOSError as e:
self.print_warning('%s: %s', remove_surrogates(orig_path), e)
if not args.dry_run:
while dirs:
archive.extract_item(dirs.pop(-1))
dir_item = dirs.pop(-1)
try:
archive.extract_item(dir_item)
except BackupOSError as e:
self.print_warning('%s: %s', remove_surrogates(dir_item[b'path']), e)
for pattern in include_patterns:
if pattern.match_count == 0:
self.print_warning("Include pattern '%s' never matched.", pattern)
@ -461,58 +490,58 @@ class Archiver:
return self.compare_chunk_contents(chunks1, chunks2)
def sum_chunk_size(item, consider_ids=None):
if item.get(b'deleted'):
if item.get('deleted'):
return None
else:
return sum(c.size for c in item[b'chunks']
return sum(c.size for c in item.chunks
if consider_ids is None or c.id in consider_ids)
def get_owner(item):
if args.numeric_owner:
return item[b'uid'], item[b'gid']
return item.uid, item.gid
else:
return item[b'user'], item[b'group']
return item.user, item.group
def get_mode(item):
if b'mode' in item:
return stat.filemode(item[b'mode'])
if 'mode' in item:
return stat.filemode(item.mode)
else:
return [None]
def has_hardlink_master(item, hardlink_masters):
return stat.S_ISREG(item[b'mode']) and item.get(b'source') in hardlink_masters
return stat.S_ISREG(item.mode) and item.get('source') in hardlink_masters
def compare_link(item1, item2):
# These are the simple link cases. For special cases, e.g. if a
# regular file is replaced with a link or vice versa, it is
# indicated in compare_mode instead.
if item1.get(b'deleted'):
if item1.get('deleted'):
return 'added link'
elif item2.get(b'deleted'):
elif item2.get('deleted'):
return 'removed link'
elif b'source' in item1 and b'source' in item2 and item1[b'source'] != item2[b'source']:
elif 'source' in item1 and 'source' in item2 and item1.source != item2.source:
return 'changed link'
def contents_changed(item1, item2):
if can_compare_chunk_ids:
return item1[b'chunks'] != item2[b'chunks']
return item1.chunks != item2.chunks
else:
if sum_chunk_size(item1) != sum_chunk_size(item2):
return True
else:
chunk_ids1 = [c.id for c in item1[b'chunks']]
chunk_ids2 = [c.id for c in item2[b'chunks']]
chunk_ids1 = [c.id for c in item1.chunks]
chunk_ids2 = [c.id for c in item2.chunks]
return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2)
def compare_content(path, item1, item2):
if contents_changed(item1, item2):
if item1.get(b'deleted'):
if item1.get('deleted'):
return ('added {:>13}'.format(format_file_size(sum_chunk_size(item2))))
elif item2.get(b'deleted'):
elif item2.get('deleted'):
return ('removed {:>11}'.format(format_file_size(sum_chunk_size(item1))))
else:
chunk_ids1 = {c.id for c in item1[b'chunks']}
chunk_ids2 = {c.id for c in item2[b'chunks']}
chunk_ids1 = {c.id for c in item1.chunks}
chunk_ids2 = {c.id for c in item2.chunks}
added_ids = chunk_ids2 - chunk_ids1
removed_ids = chunk_ids1 - chunk_ids2
added = sum_chunk_size(item2, added_ids)
@ -521,9 +550,9 @@ class Archiver:
format_file_size(-removed, precision=1, sign=True)))
def compare_directory(item1, item2):
if item2.get(b'deleted') and not item1.get(b'deleted'):
if item2.get('deleted') and not item1.get('deleted'):
return 'removed directory'
elif item1.get(b'deleted') and not item2.get(b'deleted'):
elif item1.get('deleted') and not item2.get('deleted'):
return 'added directory'
def compare_owner(item1, item2):
@ -533,7 +562,7 @@ class Archiver:
return '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2)
def compare_mode(item1, item2):
if item1[b'mode'] != item2[b'mode']:
if item1.mode != item2.mode:
return '[{} -> {}]'.format(get_mode(item1), get_mode(item2))
def compare_items(output, path, item1, item2, hardlink_masters, deleted=False):
@ -544,15 +573,15 @@ class Archiver:
changes = []
if has_hardlink_master(item1, hardlink_masters):
item1 = hardlink_masters[item1[b'source']][0]
item1 = hardlink_masters[item1.source][0]
if has_hardlink_master(item2, hardlink_masters):
item2 = hardlink_masters[item2[b'source']][1]
item2 = hardlink_masters[item2.source][1]
if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l':
changes.append(compare_link(item1, item2))
if b'chunks' in item1 and b'chunks' in item2:
if 'chunks' in item1 and 'chunks' in item2:
changes.append(compare_content(path, item1, item2))
if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd':
@ -576,21 +605,21 @@ class Archiver:
def compare_archives(archive1, archive2, matcher):
def hardlink_master_seen(item):
return b'source' not in item or not stat.S_ISREG(item[b'mode']) or item[b'source'] in hardlink_masters
return 'source' not in item or not stat.S_ISREG(item.mode) or item.source in hardlink_masters
def is_hardlink_master(item):
return item.get(b'hardlink_master', True) and b'source' not in item
return item.get('hardlink_master', True) and 'source' not in item
def update_hardlink_masters(item1, item2):
if is_hardlink_master(item1) or is_hardlink_master(item2):
hardlink_masters[item1[b'path']] = (item1, item2)
hardlink_masters[item1.path] = (item1, item2)
def compare_or_defer(item1, item2):
update_hardlink_masters(item1, item2)
if not hardlink_master_seen(item1) or not hardlink_master_seen(item2):
deferred.append((item1, item2))
else:
compare_items(output, item1[b'path'], item1, item2, hardlink_masters)
compare_items(output, item1.path, item1, item2, hardlink_masters)
orphans_archive1 = collections.OrderedDict()
orphans_archive2 = collections.OrderedDict()
@ -599,44 +628,44 @@ class Archiver:
output = []
for item1, item2 in zip_longest(
archive1.iter_items(lambda item: matcher.match(item[b'path'])),
archive2.iter_items(lambda item: matcher.match(item[b'path'])),
archive1.iter_items(lambda item: matcher.match(item.path)),
archive2.iter_items(lambda item: matcher.match(item.path)),
):
if item1 and item2 and item1[b'path'] == item2[b'path']:
if item1 and item2 and item1.path == item2.path:
compare_or_defer(item1, item2)
continue
if item1:
matching_orphan = orphans_archive2.pop(item1[b'path'], None)
matching_orphan = orphans_archive2.pop(item1.path, None)
if matching_orphan:
compare_or_defer(item1, matching_orphan)
else:
orphans_archive1[item1[b'path']] = item1
orphans_archive1[item1.path] = item1
if item2:
matching_orphan = orphans_archive1.pop(item2[b'path'], None)
matching_orphan = orphans_archive1.pop(item2.path, None)
if matching_orphan:
compare_or_defer(matching_orphan, item2)
else:
orphans_archive2[item2[b'path']] = item2
orphans_archive2[item2.path] = item2
# At this point orphans_* contain items that had no matching partner in the other archive
deleted_item = {
b'deleted': True,
b'chunks': [],
b'mode': 0,
}
deleted_item = Item(
deleted=True,
chunks=[],
mode=0,
)
for added in orphans_archive2.values():
path = added[b'path']
deleted_item[b'path'] = path
path = added.path
deleted_item.path = path
update_hardlink_masters(deleted_item, added)
compare_items(output, path, deleted_item, added, hardlink_masters, deleted=True)
for deleted in orphans_archive1.values():
path = deleted[b'path']
deleted_item[b'path'] = path
path = deleted.path
deleted_item.path = path
update_hardlink_masters(deleted, deleted_item)
compare_items(output, path, deleted, deleted_item, hardlink_masters, deleted=True)
for item1, item2 in deferred:
assert hardlink_master_seen(item1)
assert hardlink_master_seen(item2)
compare_items(output, item1[b'path'], item1, item2, hardlink_masters)
compare_items(output, item1.path, item1, item2, hardlink_masters)
for line in sorted(output):
print_output(line)
@ -670,14 +699,15 @@ class Archiver:
cache.commit()
return self.exit_code
@with_repository(exclusive=True)
def do_delete(self, args, repository, manifest, key):
@with_repository(exclusive=True, manifest=False)
def do_delete(self, args, repository):
"""Delete an existing repository or archive"""
if args.location.archive:
manifest, key = Manifest.load(repository)
with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
archive = Archive(repository, key, manifest, args.location.archive, cache=cache)
stats = Statistics()
archive.delete(stats, progress=args.progress)
archive.delete(stats, progress=args.progress, forced=args.forced)
manifest.write()
repository.commit(save_space=args.save_space)
cache.commit()
@ -690,9 +720,15 @@ class Archiver:
else:
if not args.cache_only:
msg = []
msg.append("You requested to completely DELETE the repository *including* all archives it contains:")
for archive_info in manifest.list_archive_infos(sort_by='ts'):
msg.append(format_archive(archive_info))
try:
manifest, key = Manifest.load(repository)
except NoManifestError:
msg.append("You requested to completely DELETE the repository *including* all archives it may contain.")
msg.append("This repository seems to have no manifest, so we can't tell anything about its contents.")
else:
msg.append("You requested to completely DELETE the repository *including* all archives it contains:")
for archive_info in manifest.list_archive_infos(sort_by='ts'):
msg.append(format_archive(archive_info))
msg.append("Type 'YES' if you understand this and want to continue: ")
msg = '\n'.join(msg)
if not yes(msg, false_msg="Aborting.", truish=('YES', ),
@ -735,6 +771,14 @@ class Archiver:
@with_repository()
def do_list(self, args, repository, manifest, key):
"""List archive or repository contents"""
if not hasattr(sys.stdout, 'buffer'):
# This is a shim for supporting unit tests replacing sys.stdout with e.g. StringIO,
# which doesn't have an underlying buffer (= lower file object).
def write(bytestring):
sys.stdout.write(bytestring.decode('utf-8', errors='replace'))
else:
write = sys.stdout.buffer.write
if args.location.archive:
matcher, _ = self.build_matcher(args.excludes, args.paths)
with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
@ -751,23 +795,22 @@ class Archiver:
format = "{mode} {user:6} {group:6} {size:8} {isomtime} {path}{extra}{NL}"
formatter = ItemFormatter(archive, format)
if not hasattr(sys.stdout, 'buffer'):
# This is a shim for supporting unit tests replacing sys.stdout with e.g. StringIO,
# which doesn't have an underlying buffer (= lower file object).
def write(bytestring):
sys.stdout.write(bytestring.decode('utf-8', errors='replace'))
else:
write = sys.stdout.buffer.write
for item in archive.iter_items(lambda item: matcher.match(item[b'path'])):
for item in archive.iter_items(lambda item: matcher.match(item.path)):
write(safe_encode(formatter.format_item(item)))
else:
if args.format:
format = args.format
elif args.short:
format = "{archive}{NL}"
else:
format = "{archive:<36} {time} [{id}]{NL}"
formatter = ArchiveFormatter(format)
for archive_info in manifest.list_archive_infos(sort_by='ts'):
if args.prefix and not archive_info.name.startswith(args.prefix):
continue
if args.short:
print(archive_info.name)
else:
print(format_archive(archive_info))
write(safe_encode(formatter.format_item(archive_info)))
return self.exit_code
@with_repository(cache=True)
@ -845,7 +888,7 @@ class Archiver:
else:
if args.output_list:
list_logger.info('Pruning archive: %s' % format_archive(archive))
Archive(repository, key, manifest, archive.name, cache).delete(stats)
Archive(repository, key, manifest, archive.name, cache).delete(stats, forced=args.forced)
else:
if args.output_list:
list_logger.info('Keeping archive: %s' % format_archive(archive))
@ -905,27 +948,26 @@ class Archiver:
file_status_printer=self.print_file_status,
dry_run=args.dry_run)
signal.signal(signal.SIGTERM, interrupt)
signal.signal(signal.SIGINT, interrupt)
if args.location.archive:
name = args.location.archive
if recreater.is_temporary_archive(name):
self.print_error('Refusing to work on temporary archive of prior recreate: %s', name)
return self.exit_code
recreater.recreate(name, args.comment)
else:
for archive in manifest.list_archive_infos(sort_by='ts'):
name = archive.name
with signal_handler(signal.SIGTERM, interrupt), \
signal_handler(signal.SIGINT, interrupt):
if args.location.archive:
name = args.location.archive
if recreater.is_temporary_archive(name):
continue
print('Processing', name)
if not recreater.recreate(name, args.comment):
break
manifest.write()
repository.commit()
cache.commit()
return self.exit_code
self.print_error('Refusing to work on temporary archive of prior recreate: %s', name)
return self.exit_code
recreater.recreate(name, args.comment)
else:
for archive in manifest.list_archive_infos(sort_by='ts'):
name = archive.name
if recreater.is_temporary_archive(name):
continue
print('Processing', name)
if not recreater.recreate(name, args.comment):
break
manifest.write()
repository.commit()
cache.commit()
return self.exit_code
@with_repository(manifest=False)
def do_with_lock(self, args, repository):
@ -1017,26 +1059,27 @@ class Archiver:
helptext = {}
helptext['patterns'] = textwrap.dedent('''
Exclusion patterns support four separate styles, fnmatch, shell, regular
expressions and path prefixes. If followed by a colon (':') the first two
characters of a pattern are used as a style selector. Explicit style
selection is necessary when a non-default style is desired or when the
desired pattern starts with two alphanumeric characters followed by a colon
(i.e. `aa:something/*`).
expressions and path prefixes. By default, fnmatch is used. If followed
by a colon (':') the first two characters of a pattern are used as a
style selector. Explicit style selection is necessary when a
non-default style is desired or when the desired pattern starts with
two alphanumeric characters followed by a colon (i.e. `aa:something/*`).
`Fnmatch <https://docs.python.org/3/library/fnmatch.html>`_, selector `fm:`
These patterns use a variant of shell pattern syntax, with '*' matching
any number of characters, '?' matching any single character, '[...]'
matching any single character specified, including ranges, and '[!...]'
matching any character not specified. For the purpose of these patterns,
the path separator ('\\' for Windows and '/' on other systems) is not
treated specially. Wrap meta-characters in brackets for a literal match
(i.e. `[?]` to match the literal character `?`). For a path to match
a pattern, it must completely match from start to end, or must match from
the start to just before a path separator. Except for the root path,
paths will never end in the path separator when matching is attempted.
Thus, if a given pattern ends in a path separator, a '*' is appended
before matching is attempted.
This is the default style. These patterns use a variant of shell
pattern syntax, with '*' matching any number of characters, '?'
matching any single character, '[...]' matching any single
character specified, including ranges, and '[!...]' matching any
character not specified. For the purpose of these patterns, the
path separator ('\\' for Windows and '/' on other systems) is not
treated specially. Wrap meta-characters in brackets for a literal
match (i.e. `[?]` to match the literal character `?`). For a path
to match a pattern, it must completely match from start to end, or
must match from the start to just before a path separator. Except
for the root path, paths will never end in the path separator when
matching is attempted. Thus, if a given pattern ends in a path
separator, a '*' is appended before matching is attempted.
Shell-style patterns, selector `sh:`
@ -1072,36 +1115,67 @@ class Archiver:
whitespace removal paths with whitespace at the beginning or end can only be
excluded using regular expressions.
Examples:
Examples::
# Exclude '/home/user/file.o' but not '/home/user/file.odt':
$ borg create -e '*.o' backup /
# Exclude '/home/user/file.o' but not '/home/user/file.odt':
$ borg create -e '*.o' backup /
# Exclude '/home/user/junk' and '/home/user/subdir/junk' but
# not '/home/user/importantjunk' or '/etc/junk':
$ borg create -e '/home/*/junk' backup /
# Exclude '/home/user/junk' and '/home/user/subdir/junk' but
# not '/home/user/importantjunk' or '/etc/junk':
$ borg create -e '/home/*/junk' backup /
# Exclude the contents of '/home/user/cache' but not the directory itself:
$ borg create -e /home/user/cache/ backup /
# Exclude the contents of '/home/user/cache' but not the directory itself:
$ borg create -e /home/user/cache/ backup /
# The file '/home/user/cache/important' is *not* backed up:
$ borg create -e /home/user/cache/ backup / /home/user/cache/important
# The file '/home/user/cache/important' is *not* backed up:
$ borg create -e /home/user/cache/ backup / /home/user/cache/important
# The contents of directories in '/home' are not backed up when their name
# ends in '.tmp'
$ borg create --exclude 're:^/home/[^/]+\.tmp/' backup /
# The contents of directories in '/home' are not backed up when their name
# ends in '.tmp'
$ borg create --exclude 're:^/home/[^/]+\.tmp/' backup /
# Load exclusions from file
$ cat >exclude.txt <<EOF
# Comment line
/home/*/junk
*.tmp
fm:aa:something/*
re:^/home/[^/]\.tmp/
sh:/home/*/.thumbnails
EOF
$ borg create --exclude-from exclude.txt backup /
''')
# Load exclusions from file
$ cat >exclude.txt <<EOF
# Comment line
/home/*/junk
*.tmp
fm:aa:something/*
re:^/home/[^/]\.tmp/
sh:/home/*/.thumbnails
EOF
$ borg create --exclude-from exclude.txt backup /\n\n''')
helptext['placeholders'] = textwrap.dedent('''
Repository (or Archive) URLs and --prefix values support these placeholders:
{hostname}
The (short) hostname of the machine.
{fqdn}
The full name of the machine.
{now}
The current local date and time.
{utcnow}
The current UTC date and time.
{user}
The user name (or UID, if no name is available) of the user running borg.
{pid}
The current process ID.
Examples::
borg create /path/to/repo::{hostname}-{user}-{utcnow} ...
borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ...
borg prune --prefix '{hostname}-' ...\n\n''')
def do_help(self, parser, commands, args):
if not args.topic:
@ -1162,8 +1236,8 @@ class Archiver:
help='do not load/update the file metadata cache used to detect unchanged files')
common_group.add_argument('--umask', dest='umask', type=lambda s: int(s, 8), default=UMASK_DEFAULT, metavar='M',
help='set umask to M (local and remote, default: %(default)04o)')
common_group.add_argument('--remote-path', dest='remote_path', default='borg', metavar='PATH',
help='set remote path to executable (default: "%(default)s")')
common_group.add_argument('--remote-path', dest='remote_path', metavar='PATH',
help='set remote path to executable (default: "borg")')
parser = argparse.ArgumentParser(prog=prog, description='Borg - Deduplicated Backups')
parser.add_argument('-V', '--version', action='version', version='%(prog)s ' + __version__,
@ -1180,6 +1254,8 @@ class Archiver:
subparser.set_defaults(func=self.do_serve)
subparser.add_argument('--restrict-to-path', dest='restrict_to_paths', action='append',
metavar='PATH', help='restrict repository access to PATH')
subparser.add_argument('--append-only', dest='append_only', action='store_true',
help='only allow appending to repository segment files')
init_epilog = textwrap.dedent("""
This command initializes an empty repository. A repository is a filesystem
directory containing the deduplicated data from zero or more archives.
@ -1262,9 +1338,12 @@ class Archiver:
- Check if archive metadata chunk is present. if not, remove archive from
manifest.
- For all files (items) in the archive, for all chunks referenced by these
files, check if chunk is present (if not and we are in repair mode, replace
it with a same-size chunk of zeros). This requires reading of archive and
file metadata, but not data.
files, check if chunk is present.
If a chunk is not present and we are in repair mode, replace it with a same-size
replacement chunk of zeros.
If a previously lost chunk reappears (e.g. via a later backup) and we are in
repair mode, the all-zero replacement chunk will be replaced by the correct chunk.
This requires reading of archive and file metadata, but not data.
- If we are in repair mode and we checked all the archives: delete orphaned
chunks from the repo.
- if you use a remote repo server via ssh:, the archive check is executed on
@ -1314,7 +1393,7 @@ class Archiver:
subparser.add_argument('--last', dest='last',
type=int, default=None, metavar='N',
help='only check last N archives (Default: all)')
subparser.add_argument('-P', '--prefix', dest='prefix', type=str,
subparser.add_argument('-P', '--prefix', dest='prefix', type=PrefixSpec,
help='only consider archive names starting with this prefix')
subparser.add_argument('-p', '--progress', dest='progress',
action='store_true', default=False,
@ -1370,7 +1449,7 @@ class Archiver:
checkpoints and treated in special ways.
In the archive name, you may use the following format tags:
{now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}
{now}, {utcnow}, {fqdn}, {hostname}, {user}, {pid}, {uuid4}
To speed up pulling backups over sshfs and similar network file systems which do
not provide correct inode information the --ignore-inode flag can be used. This
@ -1378,6 +1457,7 @@ class Archiver:
all files on these file systems.
See the output of the "borg help patterns" command for more help on exclude patterns.
See the output of the "borg help placeholders" command for more help on placeholders.
""")
subparser = subparsers.add_parser('create', parents=[common_parser], add_help=False,
@ -1435,7 +1515,8 @@ class Archiver:
help='ignore inode data in the file metadata cache used to detect unchanged files.')
fs_group.add_argument('--read-special', dest='read_special',
action='store_true', default=False,
help='open and read special files as if they were regular files')
help='open and read block and char device files as well as FIFOs as if they were '
'regular files. Also follows symlinks pointing to these kinds of files.')
archive_group = subparser.add_argument_group('Archive options')
archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', default='',
@ -1446,8 +1527,8 @@ class Archiver:
help='manually specify the archive creation date/time (UTC). '
'alternatively, give a reference file/directory.')
archive_group.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval',
type=int, default=300, metavar='SECONDS',
help='write checkpoint every SECONDS seconds (Default: 300)')
type=int, default=1800, metavar='SECONDS',
help='write checkpoint every SECONDS seconds (Default: 1800)')
archive_group.add_argument('--chunker-params', dest='chunker_params',
type=ChunkerParams, default=CHUNKER_PARAMS,
metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE',
@ -1606,6 +1687,9 @@ class Archiver:
subparser.add_argument('-c', '--cache-only', dest='cache_only',
action='store_true', default=False,
help='delete only the local cache for the given repository')
subparser.add_argument('--force', dest='forced',
action='store_true', default=False,
help='force deletion of corrupted archives')
subparser.add_argument('--save-space', dest='save_space', action='store_true',
default=False,
help='work slower, but using less space')
@ -1618,8 +1702,13 @@ class Archiver:
See the "borg help patterns" command for more help on exclude patterns.
The following keys are available for --format when listing files:
The following keys are available for --format:
""") + BaseFormatter.keys_help() + textwrap.dedent("""
-- Keys for listing repository archives:
""") + ArchiveFormatter.keys_help() + textwrap.dedent("""
-- Keys for listing archive files:
""") + ItemFormatter.keys_help()
subparser = subparsers.add_parser('list', parents=[common_parser], add_help=False,
description=self.do_list.__doc__,
@ -1633,7 +1722,7 @@ class Archiver:
subparser.add_argument('--format', '--list-format', dest='format', type=str,
help="""specify format for file listing
(default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}")""")
subparser.add_argument('-P', '--prefix', dest='prefix', type=str,
subparser.add_argument('-P', '--prefix', dest='prefix', type=PrefixSpec,
help='only consider archive names starting with this prefix')
subparser.add_argument('-e', '--exclude', dest='excludes',
type=parse_pattern, action='append',
@ -1660,6 +1749,13 @@ class Archiver:
To allow a regular user to use fstab entries, add the ``user`` option:
``/path/to/repo /mnt/point fuse.borgfs defaults,noauto,user 0 0``
For mount options, see the fuse(8) manual page. Additional mount options
supported by borg:
- allow_damaged_files: by default damaged files (where missing chunks were
replaced with runs of zeros by borg check --repair) are not readable and
return EIO (I/O error). Set this option to read such files.
The BORG_MOUNT_DATA_CACHE_ENTRIES environment variable is meant for advanced users
to tweak the performance. It sets the number of cached data chunks; additional
memory usage can be up to ~8 MiB times this number. The default is the number
@ -1714,7 +1810,7 @@ class Archiver:
help='repository for which to break the locks')
prune_epilog = textwrap.dedent("""
The prune command prunes a repository by deleting archives not matching
The prune command prunes a repository by deleting all archives not matching
any of the specified retention options. This command is normally used by
automated backup scripts wanting to keep a certain number of historic backups.
@ -1743,7 +1839,7 @@ class Archiver:
up to 7 most recent days with backups (days without backups do not count).
The rules are applied from secondly to yearly, and backups selected by previous
rules do not count towards those of later rules. The time that each backup
completes is used for pruning purposes. Dates and times are interpreted in
starts is used for pruning purposes. Dates and times are interpreted in
the local timezone, and weeks go from Monday to Sunday. Specifying a
negative number of archives to keep means that there is no limit.
@ -1760,6 +1856,9 @@ class Archiver:
subparser.add_argument('-n', '--dry-run', dest='dry_run',
default=False, action='store_true',
help='do not change repository')
subparser.add_argument('--force', dest='forced',
action='store_true', default=False,
help='force pruning of corrupted archives')
subparser.add_argument('-s', '--stats', dest='stats',
action='store_true', default=False,
help='print statistics for the deleted archive')
@ -1782,7 +1881,7 @@ class Archiver:
help='number of monthly archives to keep')
subparser.add_argument('-y', '--keep-yearly', dest='yearly', type=int, default=0,
help='number of yearly archives to keep')
subparser.add_argument('-P', '--prefix', dest='prefix', type=str,
subparser.add_argument('-P', '--prefix', dest='prefix', type=PrefixSpec,
help='only consider archive names starting with this prefix')
subparser.add_argument('--save-space', dest='save_space', action='store_true',
default=False,
@ -2068,8 +2167,9 @@ class Archiver:
if result.func != forced_result.func:
# someone is trying to execute a different borg subcommand, don't do that!
return forced_result
# the only thing we take from the forced "borg serve" ssh command is --restrict-to-path
# we only take specific options from the forced "borg serve" command:
result.restrict_to_paths = forced_result.restrict_to_paths
result.append_only = forced_result.append_only
return result
def parse_args(self, args=None):
@ -2126,7 +2226,7 @@ def sig_info_handler(signum, stack): # pragma: no cover
logger.info("{0} {1}/{2}".format(path, format_file_size(pos), format_file_size(total)))
break
if func in ('extract_item', ): # extract op
path = loc['item'][b'path']
path = loc['item'].path
try:
pos = loc['fd'].tell()
except Exception:
@ -2159,14 +2259,22 @@ def main(): # pragma: no cover
if os.path.basename(sys.argv[0]) == "borgfs":
sys.argv.insert(1, "mount")
# Make sure stdout and stderr have errors='replace') to avoid unicode
# Make sure stdout and stderr have errors='replace' to avoid unicode
# issues when print()-ing unicode file names
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, sys.stdout.encoding, 'replace', line_buffering=True)
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, sys.stderr.encoding, 'replace', line_buffering=True)
setup_signal_handlers()
archiver = Archiver()
msg = None
args = archiver.get_args(sys.argv, os.environ.get('SSH_ORIGINAL_COMMAND'))
try:
args = archiver.get_args(sys.argv, os.environ.get('SSH_ORIGINAL_COMMAND'))
except Error as e:
msg = e.get_message()
if e.traceback:
msg += "\n%s\n%s" % (traceback.format_exc(), sysinfo())
# we might not have logging setup yet, so get out quickly
print(msg, file=sys.stderr)
sys.exit(e.exit_code)
try:
exit_code = archiver.run(args)
except Error as e:

View file

@ -16,6 +16,7 @@ from .helpers import get_cache_dir
from .helpers import decode_dict, int_to_bigint, bigint_to_int, bin_to_hex
from .helpers import format_file_size
from .helpers import yes
from .item import Item
from .key import PlaintextKey
from .locking import UpgradableLock
from .remote import cache_if_remote
@ -298,8 +299,9 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
if not isinstance(item, dict):
logger.error('Error: Did not get expected metadata dict - archive corrupted!')
continue
if b'chunks' in item:
for chunk_id, size, csize in item[b'chunks']:
item = Item(internal_dict=item)
if 'chunks' in item:
for chunk_id, size, csize in item.chunks:
chunk_idx.add(chunk_id, 1, size, csize)
if self.do_cache:
fn = mkpath(archive_id)

View file

@ -1,10 +1,20 @@
# this set must be kept complete, otherwise the RobustUnpacker might malfunction:
ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master',
b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', b'win_dacl'])
ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master',
'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime',
'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended', 'win_dacl'])
# this is the set of keys that are always present in items:
REQUIRED_ITEM_KEYS = frozenset(['path', 'mtime', ])
# this set must be kept complete, otherwise rebuild_manifest might malfunction:
ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'username', 'time', 'time_end',
'comment', 'chunker_params',
'recreate_cmdline', 'recreate_source_id', 'recreate_args'])
# this is the set of keys that are always present in archives:
REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ])
ARCHIVE_TEXT_KEYS = (b'name', b'comment', b'hostname', b'username', b'time', b'time_end')
ITEM_TEXT_KEYS = (b'path', b'source', b'user', b'group')
# default umask, overriden by --umask, defaults to read/write only for owner
UMASK_DEFAULT = 0o077

View file

@ -12,13 +12,12 @@ cdef extern from "openssl/evp.h":
ctypedef struct EVP_CIPHER:
pass
ctypedef struct EVP_CIPHER_CTX:
unsigned char *iv
pass
ctypedef struct ENGINE:
pass
const EVP_CIPHER *EVP_aes_256_ctr()
void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *a)
void EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *a)
EVP_CIPHER_CTX *EVP_CIPHER_CTX_new()
void EVP_CIPHER_CTX_free(EVP_CIPHER_CTX *a)
int EVP_EncryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, ENGINE *impl,
const unsigned char *key, const unsigned char *iv)
@ -44,16 +43,38 @@ import struct
_int = struct.Struct('>I')
_long = struct.Struct('>Q')
_2long = struct.Struct('>QQ')
bytes_to_int = lambda x, offset=0: _int.unpack_from(x, offset)[0]
bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0]
long_to_bytes = lambda x: _long.pack(x)
cdef Py_buffer ro_buffer(object data) except *:
cdef Py_buffer view
PyObject_GetBuffer(data, &view, PyBUF_SIMPLE)
return view
def bytes16_to_int(b, offset=0):
h, l = _2long.unpack_from(b, offset)
return (h << 64) + l
def int_to_bytes16(i):
max_uint64 = 0xffffffffffffffff
l = i & max_uint64
h = (i >> 64) & max_uint64
return _2long.pack(h, l)
def increment_iv(iv, amount=1):
"""
Increment the IV by the given amount (default 1).
:param iv: input IV, 16 bytes (128 bit)
:param amount: increment value
:return: input_IV + amount, 16 bytes (128 bit)
"""
assert len(iv) == 16
iv = bytes16_to_int(iv)
iv += amount
iv = int_to_bytes16(iv)
return iv
def num_aes_blocks(int length):
@ -63,27 +84,35 @@ def num_aes_blocks(int length):
return (length + 15) // 16
cdef Py_buffer ro_buffer(object data) except *:
cdef Py_buffer view
PyObject_GetBuffer(data, &view, PyBUF_SIMPLE)
return view
cdef class AES:
"""A thin wrapper around the OpenSSL EVP cipher API
"""
cdef EVP_CIPHER_CTX ctx
cdef EVP_CIPHER_CTX *ctx
cdef int is_encrypt
cdef unsigned char iv_orig[16]
cdef long long blocks
def __cinit__(self, is_encrypt, key, iv=None):
EVP_CIPHER_CTX_init(&self.ctx)
self.ctx = EVP_CIPHER_CTX_new()
self.is_encrypt = is_encrypt
# Set cipher type and mode
cipher_mode = EVP_aes_256_ctr()
if self.is_encrypt:
if not EVP_EncryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL):
if not EVP_EncryptInit_ex(self.ctx, cipher_mode, NULL, NULL, NULL):
raise Exception('EVP_EncryptInit_ex failed')
else: # decrypt
if not EVP_DecryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL):
if not EVP_DecryptInit_ex(self.ctx, cipher_mode, NULL, NULL, NULL):
raise Exception('EVP_DecryptInit_ex failed')
self.reset(key, iv)
def __dealloc__(self):
EVP_CIPHER_CTX_cleanup(&self.ctx)
EVP_CIPHER_CTX_free(self.ctx)
def reset(self, key=None, iv=None):
cdef const unsigned char *key2 = NULL
@ -92,17 +121,21 @@ cdef class AES:
key2 = key
if iv:
iv2 = iv
assert isinstance(iv, bytes) and len(iv) == 16
for i in range(16):
self.iv_orig[i] = iv[i]
self.blocks = 0 # number of AES blocks encrypted starting with iv_orig
# Initialise key and IV
if self.is_encrypt:
if not EVP_EncryptInit_ex(&self.ctx, NULL, NULL, key2, iv2):
if not EVP_EncryptInit_ex(self.ctx, NULL, NULL, key2, iv2):
raise Exception('EVP_EncryptInit_ex failed')
else: # decrypt
if not EVP_DecryptInit_ex(&self.ctx, NULL, NULL, key2, iv2):
if not EVP_DecryptInit_ex(self.ctx, NULL, NULL, key2, iv2):
raise Exception('EVP_DecryptInit_ex failed')
@property
def iv(self):
return self.ctx.iv[:16]
return increment_iv(self.iv_orig[:16], self.blocks)
def encrypt(self, data):
cdef Py_buffer data_buf = ro_buffer(data)
@ -114,12 +147,13 @@ cdef class AES:
if not out:
raise MemoryError
try:
if not EVP_EncryptUpdate(&self.ctx, out, &outl, <const unsigned char*> data_buf.buf, inl):
if not EVP_EncryptUpdate(self.ctx, out, &outl, <const unsigned char*> data_buf.buf, inl):
raise Exception('EVP_EncryptUpdate failed')
ctl = outl
if not EVP_EncryptFinal_ex(&self.ctx, out+ctl, &outl):
if not EVP_EncryptFinal_ex(self.ctx, out+ctl, &outl):
raise Exception('EVP_EncryptFinal failed')
ctl += outl
self.blocks += num_aes_blocks(ctl)
return out[:ctl]
finally:
free(out)
@ -137,15 +171,16 @@ cdef class AES:
if not out:
raise MemoryError
try:
if not EVP_DecryptUpdate(&self.ctx, out, &outl, <const unsigned char*> data_buf.buf, inl):
if not EVP_DecryptUpdate(self.ctx, out, &outl, <const unsigned char*> data_buf.buf, inl):
raise Exception('EVP_DecryptUpdate failed')
ptl = outl
if EVP_DecryptFinal_ex(&self.ctx, out+ptl, &outl) <= 0:
if EVP_DecryptFinal_ex(self.ctx, out+ptl, &outl) <= 0:
# this error check is very important for modes with padding or
# authentication. for them, a failure here means corrupted data.
# CTR mode does not use padding nor authentication.
raise Exception('EVP_DecryptFinal failed')
ptl += outl
self.blocks += num_aes_blocks(inl)
return out[:ptl]
finally:
free(out)

View file

@ -14,8 +14,8 @@ from .logger import create_logger
logger = create_logger()
from .archive import Archive
from .helpers import daemonize
from .helpers import bigint_to_int
from .helpers import daemonize, safe_encode
from .item import Item
from .lrucache import LRUCache
# Does this version of llfuse support ns precision?
@ -38,17 +38,21 @@ class ItemCache:
def add(self, item):
pos = self.fd.seek(0, io.SEEK_END)
self.fd.write(msgpack.packb(item))
self.fd.write(msgpack.packb(item.as_dict()))
return pos + self.offset
def get(self, inode):
self.fd.seek(inode - self.offset, io.SEEK_SET)
return next(msgpack.Unpacker(self.fd, read_size=1024))
item = next(msgpack.Unpacker(self.fd, read_size=1024))
return Item(internal_dict=item)
class FuseOperations(llfuse.Operations):
"""Export archive as a fuse filesystem
"""
allow_damaged_files = False
def __init__(self, key, repository, manifest, archive, cached_repo):
super().__init__()
self._inode_count = 0
@ -57,7 +61,7 @@ class FuseOperations(llfuse.Operations):
self.items = {}
self.parent = {}
self.contents = defaultdict(dict)
self.default_dir = {b'mode': 0o40755, b'mtime': int(time.time() * 1e9), b'uid': os.getuid(), b'gid': os.getgid()}
self.default_dir = Item(mode=0o40755, mtime=int(time.time() * 1e9), uid=os.getuid(), gid=os.getgid())
self.pending_archives = {}
self.accounted_chunks = {}
self.cache = ItemCache()
@ -78,6 +82,32 @@ class FuseOperations(llfuse.Operations):
self.contents[1][os.fsencode(archive_name)] = archive_inode
self.pending_archives[archive_inode] = Archive(repository, key, manifest, archive_name)
def mount(self, mountpoint, mount_options, foreground=False):
"""Mount filesystem on *mountpoint* with *mount_options*."""
options = ['fsname=borgfs', 'ro']
if mount_options:
options.extend(mount_options.split(','))
try:
options.remove('allow_damaged_files')
self.allow_damaged_files = True
except ValueError:
pass
llfuse.init(self, mountpoint, options)
if not foreground:
daemonize()
# If the file system crashes, we do not want to umount because in that
# case the mountpoint suddenly appears to become empty. This can have
# nasty consequences, imagine the user has e.g. an active rsync mirror
# job - seeing the mountpoint empty, rsync would delete everything in the
# mirror.
umount = False
try:
signal = fuse_main()
umount = (signal is None) # no crash and no signal -> umount request
finally:
llfuse.close(umount)
def process_archive(self, archive, prefix=[]):
"""Build fuse inode hierarchy from archive metadata
"""
@ -86,8 +116,19 @@ class FuseOperations(llfuse.Operations):
_, data = self.key.decrypt(key, chunk)
unpacker.feed(data)
for item in unpacker:
segments = prefix + os.fsencode(os.path.normpath(item[b'path'])).split(b'/')
del item[b'path']
item = Item(internal_dict=item)
try:
# This can happen if an archive was created with a command line like
# $ borg create ... dir1/file dir1
# In this case the code below will have created a default_dir inode for dir1 already.
inode = self._find_inode(safe_encode(item.path), prefix)
except KeyError:
pass
else:
self.items[inode] = item
continue
segments = prefix + os.fsencode(os.path.normpath(item.path)).split(b'/')
del item.path
num_segments = len(segments)
parent = 1
for i, segment in enumerate(segments, 1):
@ -98,10 +139,10 @@ class FuseOperations(llfuse.Operations):
self.parent[archive_inode] = parent
# Leaf segment?
if i == num_segments:
if b'source' in item and stat.S_ISREG(item[b'mode']):
inode = self._find_inode(item[b'source'], prefix)
if 'source' in item and stat.S_ISREG(item.mode):
inode = self._find_inode(item.source, prefix)
item = self.cache.get(inode)
item[b'nlink'] = item.get(b'nlink', 1) + 1
item.nlink = item.get('nlink', 1) + 1
self.items[inode] = item
else:
inode = self.cache.add(item)
@ -151,60 +192,47 @@ class FuseOperations(llfuse.Operations):
item = self.get_item(inode)
size = 0
dsize = 0
try:
for key, chunksize, _ in item[b'chunks']:
if 'chunks' in item:
for key, chunksize, _ in item.chunks:
size += chunksize
if self.accounted_chunks.get(key, inode) == inode:
self.accounted_chunks[key] = inode
dsize += chunksize
except KeyError:
pass
entry = llfuse.EntryAttributes()
entry.st_ino = inode
entry.generation = 0
entry.entry_timeout = 300
entry.attr_timeout = 300
entry.st_mode = item[b'mode']
entry.st_nlink = item.get(b'nlink', 1)
entry.st_uid = item[b'uid']
entry.st_gid = item[b'gid']
entry.st_rdev = item.get(b'rdev', 0)
entry.st_mode = item.mode
entry.st_nlink = item.get('nlink', 1)
entry.st_uid = item.uid
entry.st_gid = item.gid
entry.st_rdev = item.get('rdev', 0)
entry.st_size = size
entry.st_blksize = 512
entry.st_blocks = dsize / 512
# note: older archives only have mtime (not atime nor ctime)
mtime_ns = item.mtime
if have_fuse_xtime_ns:
entry.st_mtime_ns = bigint_to_int(item[b'mtime'])
if b'atime' in item:
entry.st_atime_ns = bigint_to_int(item[b'atime'])
else:
entry.st_atime_ns = bigint_to_int(item[b'mtime'])
if b'ctime' in item:
entry.st_ctime_ns = bigint_to_int(item[b'ctime'])
else:
entry.st_ctime_ns = bigint_to_int(item[b'mtime'])
entry.st_mtime_ns = mtime_ns
entry.st_atime_ns = item.get('atime', mtime_ns)
entry.st_ctime_ns = item.get('ctime', mtime_ns)
else:
entry.st_mtime = bigint_to_int(item[b'mtime']) / 1e9
if b'atime' in item:
entry.st_atime = bigint_to_int(item[b'atime']) / 1e9
else:
entry.st_atime = bigint_to_int(item[b'mtime']) / 1e9
if b'ctime' in item:
entry.st_ctime = bigint_to_int(item[b'ctime']) / 1e9
else:
entry.st_ctime = bigint_to_int(item[b'mtime']) / 1e9
entry.st_mtime = mtime_ns / 1e9
entry.st_atime = item.get('atime', mtime_ns) / 1e9
entry.st_ctime = item.get('ctime', mtime_ns) / 1e9
return entry
def listxattr(self, inode, ctx=None):
item = self.get_item(inode)
return item.get(b'xattrs', {}).keys()
return item.get('xattrs', {}).keys()
def getxattr(self, inode, name, ctx=None):
item = self.get_item(inode)
try:
return item.get(b'xattrs', {})[name]
return item.get('xattrs', {})[name]
except KeyError:
raise llfuse.FUSEError(errno.ENODATA) from None
raise llfuse.FUSEError(llfuse.ENOATTR) from None
def _load_pending_archive(self, inode):
# Check if this is an archive we need to load
@ -225,6 +253,15 @@ class FuseOperations(llfuse.Operations):
return self.getattr(inode)
def open(self, inode, flags, ctx=None):
if not self.allow_damaged_files:
item = self.get_item(inode)
if 'chunks_healthy' in item:
# Processed archive items don't carry the path anymore; for converting the inode
# to the path we'd either have to store the inverse of the current structure,
# or search the entire archive. So we just don't print it. It's easy to correlate anyway.
logger.warning('File has damaged (all-zero) chunks. Try running borg check --repair. '
'Mount with allow_damaged_files to read damaged files.')
raise llfuse.FUSEError(errno.EIO)
return inode
def opendir(self, inode, ctx=None):
@ -234,7 +271,7 @@ class FuseOperations(llfuse.Operations):
def read(self, fh, offset, size):
parts = []
item = self.get_item(fh)
for id, s, csize in item[b'chunks']:
for id, s, csize in item.chunks:
if s < offset:
offset -= s
continue
@ -264,24 +301,4 @@ class FuseOperations(llfuse.Operations):
def readlink(self, inode, ctx=None):
item = self.get_item(inode)
return os.fsencode(item[b'source'])
def mount(self, mountpoint, extra_options, foreground=False):
options = ['fsname=borgfs', 'ro']
if extra_options:
options.extend(extra_options.split(','))
llfuse.init(self, mountpoint, options)
if not foreground:
daemonize()
# If the file system crashes, we do not want to umount because in that
# case the mountpoint suddenly appears to become empty. This can have
# nasty consequences, imagine the user has e.g. an active rsync mirror
# job - seeing the mountpoint empty, rsync would delete everything in the
# mirror.
umount = False
try:
signal = fuse_main()
umount = (signal is None) # no crash and no signal -> umount request
finally:
llfuse.close(umount)
return os.fsencode(item.source)

View file

@ -18,8 +18,6 @@ cdef extern from "_hashindex.c":
HashIndex *hashindex_read(char *path)
HashIndex *hashindex_init(int capacity, int key_size, int value_size)
void hashindex_free(HashIndex *index)
void hashindex_merge(HashIndex *index, HashIndex *other)
void hashindex_add(HashIndex *index, void *key, void *value)
int hashindex_get_size(HashIndex *index)
int hashindex_write(HashIndex *index, char *path)
void *hashindex_get(HashIndex *index, void *key)
@ -323,7 +321,8 @@ cdef class ChunkIndex(IndexBase):
values[1] = data[1]
values[2] = data[2]
else:
hashindex_set(self.index, key, data)
if not hashindex_set(self.index, key, data):
raise Exception('hashindex_set failed')
def merge(self, ChunkIndex other):
cdef void *key = NULL

View file

@ -5,14 +5,17 @@ import os
import os.path
import platform
import re
import signal
import socket
import sys
import stat
import textwrap
import time
import unicodedata
import uuid
from binascii import hexlify
from collections import namedtuple, deque
from contextlib import contextmanager
from datetime import datetime, timezone, timedelta
from fnmatch import translate
from functools import wraps, partial
@ -68,18 +71,6 @@ class ErrorWithTraceback(Error):
traceback = True
class InternalOSError(Error):
"""Error while accessing repository: [Errno {}] {}: {}"""
def __init__(self, os_error):
self.errno = os_error.errno
self.strerror = os_error.strerror
self.filename = os_error.filename
def get_message(self):
return self.__doc__.format(self.errno, self.strerror, self.filename)
class IntegrityError(ErrorWithTraceback):
"""Data integrity error"""
@ -88,6 +79,14 @@ class ExtensionModuleError(Error):
"""The Borg binary extension modules do not seem to be properly installed"""
class NoManifestError(Error):
"""Repository has no manifest."""
class PlaceholderError(Error):
"""Formatting Error: "{}".format({}): {}({})"""
def check_extension_modules():
from . import platform
if hashindex.API_VERSION != 2:
@ -104,11 +103,12 @@ class Manifest:
MANIFEST_ID = b'\0' * 32
def __init__(self, key, repository):
def __init__(self, key, repository, item_keys=None):
self.archives = {}
self.config = {}
self.key = key
self.repository = repository
self.item_keys = frozenset(item_keys) if item_keys is not None else ITEM_KEYS
@property
def id_str(self):
@ -117,7 +117,11 @@ class Manifest:
@classmethod
def load(cls, repository, key=None):
from .key import key_factory
cdata = repository.get(cls.MANIFEST_ID)
from .repository import Repository
try:
cdata = repository.get(cls.MANIFEST_ID)
except Repository.ObjectNotFound:
raise NoManifestError
if not key:
key = key_factory(repository, cdata)
manifest = cls(key, repository)
@ -131,6 +135,8 @@ class Manifest:
if manifest.timestamp:
manifest.timestamp = manifest.timestamp.decode('ascii')
manifest.config = m[b'config']
# valid item keys are whatever is known in the repo or every key we know
manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get(b'item_keys', []))
return manifest, key
def write(self):
@ -140,6 +146,7 @@ class Manifest:
'archives': self.archives,
'timestamp': self.timestamp,
'config': self.config,
'item_keys': tuple(self.item_keys),
}))
self.id = self.key.id_hash(data)
self.repository.put(self.MANIFEST_ID, self.key.encrypt(Chunk(data)))
@ -516,6 +523,10 @@ def CompressionSpec(s):
raise ValueError
def PrefixSpec(s):
return replace_placeholders(s)
def dir_is_cachedir(path):
"""Determines whether the specified path is a cache directory (and
therefore should potentially be excluded from the backup) according to
@ -567,18 +578,25 @@ def partial_format(format, mapping):
def format_line(format, data):
# TODO: Filter out unwanted properties of str.format(), because "format" is user provided.
try:
return format.format(**data)
except (KeyError, ValueError) as e:
# this should catch format errors
print('Error in lineformat: "{}" - reason "{}"'.format(format, str(e)))
except Exception as e:
# something unexpected, print error and raise exception
print('Error in lineformat: "{}" - reason "{}"'.format(format, str(e)))
raise
return ''
raise PlaceholderError(format, data, e.__class__.__name__, str(e))
def replace_placeholders(text):
"""Replace placeholders in text with their values."""
current_time = datetime.now()
data = {
'pid': os.getpid(),
'fqdn': socket.getfqdn(),
'hostname': socket.gethostname(),
'now': current_time.now(),
'utcnow': current_time.utcnow(),
'user': uid2user(os.getuid(), os.getuid()),
'uuid4': str(uuid.uuid4()),
}
return format_line(text, data)
def safe_timestamp(item_timestamp_ns):
@ -777,21 +795,8 @@ class Location:
if not self.parse(self.orig):
raise ValueError
def preformat_text(self, text):
"""Format repository and archive path with common tags"""
current_time = datetime.now()
data = {
'pid': os.getpid(),
'fqdn': socket.getfqdn(),
'hostname': socket.gethostname(),
'now': current_time.now(),
'utcnow': current_time.utcnow(),
'user': uid2user(getuid(), getuid())
}
return format_line(text, data)
def parse(self, text):
text = self.preformat_text(text)
text = replace_placeholders(text)
valid = self._parse(text)
if valid:
return True
@ -995,8 +1000,7 @@ def yes(msg=None, false_msg=None, true_msg=None, default_msg=None,
retry_msg=None, invalid_msg=None, env_msg=None,
falsish=FALSISH, truish=TRUISH, defaultish=DEFAULTISH,
default=False, retry=True, env_var_override=None, ofile=None, input=input):
"""
Output <msg> (usually a question) and let user input an answer.
"""Output <msg> (usually a question) and let user input an answer.
Qualifies the answer according to falsish, truish and defaultish as True, False or <default>.
If it didn't qualify and retry_msg is None (no retries wanted),
return the default [which defaults to False]. Otherwise let user retry
@ -1180,7 +1184,7 @@ def log_multi(*msgs, level=logging.INFO, logger=logger):
"""
log multiple lines of text, each line by a separate logging call for cosmetic reasons
each positional argument may be a single or multiple lines (separated by \n) of text.
each positional argument may be a single or multiple lines (separated by newlines) of text.
"""
lines = []
for msg in msgs:
@ -1189,7 +1193,7 @@ def log_multi(*msgs, level=logging.INFO, logger=logger):
logger.log(level, line)
class ItemFormatter:
class BaseFormatter:
FIXED_KEYS = {
# Formatting aids
'LF': '\n',
@ -1200,19 +1204,54 @@ class ItemFormatter:
'NEWLINE': os.linesep,
'NL': os.linesep,
}
def get_item_data(self, item):
raise NotImplementedError
def format_item(self, item):
return self.format.format_map(self.get_item_data(item))
@staticmethod
def keys_help():
return " - NEWLINE: OS dependent line separator\n" \
" - NL: alias of NEWLINE\n" \
" - NUL: NUL character for creating print0 / xargs -0 like output, see barchive/bpath\n" \
" - SPACE\n" \
" - TAB\n" \
" - CR\n" \
" - LF"
class ArchiveFormatter(BaseFormatter):
def __init__(self, format):
self.format = partial_format(format, self.FIXED_KEYS)
def get_item_data(self, archive):
return {
'barchive': archive.name,
'archive': remove_surrogates(archive.name),
'id': bin_to_hex(archive.id),
'time': format_time(to_localtime(archive.ts)),
}
@staticmethod
def keys_help():
return " - archive: archive name interpreted as text (might be missing non-text characters, see barchive)\n" \
" - barchive: verbatim archive name, can contain any character except NUL\n" \
" - time: time of creation of the archive\n" \
" - id: internal ID of the archive"
class ItemFormatter(BaseFormatter):
KEY_DESCRIPTIONS = {
'bpath': 'verbatim POSIX path, can contain any character except NUL',
'path': 'path interpreted as text (might be missing non-text characters, see bpath)',
'source': 'link target for links (identical to linktarget)',
'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links',
'csize': 'compressed size',
'num_chunks': 'number of chunks in this file',
'unique_chunks': 'number of unique chunks in this file',
'NEWLINE': 'OS dependent line separator',
'NL': 'alias of NEWLINE',
'NUL': 'NUL character for creating print0 / xargs -0 like ouput, see bpath',
}
KEY_GROUPS = (
('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget', 'flags'),
@ -1220,7 +1259,6 @@ class ItemFormatter:
('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'),
tuple(sorted(hashlib.algorithms_guaranteed)),
('archiveid', 'archivename', 'extra'),
('NEWLINE', 'NL', 'NUL', 'SPACE', 'TAB', 'CR', 'LF'),
)
@classmethod
@ -1228,10 +1266,8 @@ class ItemFormatter:
class FakeArchive:
fpr = name = ""
fake_item = {
b'mode': 0, b'path': '', b'user': '', b'group': '', b'mtime': 0,
b'uid': 0, b'gid': 0,
}
from .item import Item
fake_item = Item(mode=0, path='', user='', group='', mtime=0, uid=0, gid=0)
formatter = cls(FakeArchive, "")
keys = []
keys.extend(formatter.call_keys.keys())
@ -1242,6 +1278,9 @@ class ItemFormatter:
def keys_help(cls):
help = []
keys = cls.available_keys()
for key in cls.FIXED_KEYS:
keys.remove(key)
for group in cls.KEY_GROUPS:
for key in group:
keys.remove(key)
@ -1267,12 +1306,12 @@ class ItemFormatter:
'csize': self.calculate_csize,
'num_chunks': self.calculate_num_chunks,
'unique_chunks': self.calculate_unique_chunks,
'isomtime': partial(self.format_time, b'mtime'),
'isoctime': partial(self.format_time, b'ctime'),
'isoatime': partial(self.format_time, b'atime'),
'mtime': partial(self.time, b'mtime'),
'ctime': partial(self.time, b'ctime'),
'atime': partial(self.time, b'atime'),
'isomtime': partial(self.format_time, 'mtime'),
'isoctime': partial(self.format_time, 'ctime'),
'isoatime': partial(self.format_time, 'atime'),
'mtime': partial(self.time, 'mtime'),
'ctime': partial(self.time, 'ctime'),
'atime': partial(self.time, 'atime'),
}
for hash_function in hashlib.algorithms_guaranteed:
self.add_key(hash_function, partial(self.hash_item, hash_function))
@ -1284,11 +1323,11 @@ class ItemFormatter:
self.used_call_keys = set(self.call_keys) & self.format_keys
def get_item_data(self, item):
mode = stat.filemode(item[b'mode'])
mode = stat.filemode(item.mode)
item_type = mode[0]
item_data = self.item_data
source = item.get(b'source', '')
source = item.get('source', '')
extra = ''
if source:
source = remove_surrogates(source)
@ -1299,49 +1338,46 @@ class ItemFormatter:
extra = ' link to %s' % source
item_data['type'] = item_type
item_data['mode'] = mode
item_data['user'] = item[b'user'] or item[b'uid']
item_data['group'] = item[b'group'] or item[b'gid']
item_data['uid'] = item[b'uid']
item_data['gid'] = item[b'gid']
item_data['path'] = remove_surrogates(item[b'path'])
item_data['bpath'] = item[b'path']
item_data['user'] = item.user or item.uid
item_data['group'] = item.group or item.gid
item_data['uid'] = item.uid
item_data['gid'] = item.gid
item_data['path'] = remove_surrogates(item.path)
item_data['bpath'] = item.path
item_data['source'] = source
item_data['linktarget'] = source
item_data['extra'] = extra
item_data['flags'] = item.get(b'bsdflags')
item_data['flags'] = item.get('bsdflags')
for key in self.used_call_keys:
item_data[key] = self.call_keys[key](item)
return item_data
def format_item(self, item):
return self.format.format_map(self.get_item_data(item))
def calculate_num_chunks(self, item):
return len(item.get(b'chunks', []))
return len(item.get('chunks', []))
def calculate_unique_chunks(self, item):
chunk_index = self.archive.cache.chunks
return sum(1 for c in item.get(b'chunks', []) if chunk_index[c.id].refcount == 1)
return sum(1 for c in item.get('chunks', []) if chunk_index[c.id].refcount == 1)
def calculate_size(self, item):
return sum(c.size for c in item.get(b'chunks', []))
return sum(c.size for c in item.get('chunks', []))
def calculate_csize(self, item):
return sum(c.csize for c in item.get(b'chunks', []))
return sum(c.csize for c in item.get('chunks', []))
def hash_item(self, hash_function, item):
if b'chunks' not in item:
if 'chunks' not in item:
return ""
hash = hashlib.new(hash_function)
for _, data in self.archive.pipeline.fetch_many([c.id for c in item[b'chunks']]):
for _, data in self.archive.pipeline.fetch_many([c.id for c in item.chunks]):
hash.update(data)
return hash.hexdigest()
def format_time(self, key, item):
return format_time(safe_timestamp(item.get(key) or item[b'mtime']))
return format_time(safe_timestamp(item.get(key) or item.mtime))
def time(self, key, item):
return safe_timestamp(item.get(key) or item[b'mtime'])
return safe_timestamp(item.get(key) or item.mtime)
class ChunkIteratorFileWrapper:
@ -1385,7 +1421,7 @@ class ChunkIteratorFileWrapper:
def open_item(archive, item):
"""Return file-like object for archived item (with chunks)."""
chunk_iterator = archive.pipeline.fetch_many([c.id for c in item[b'chunks']])
chunk_iterator = archive.pipeline.fetch_many([c.id for c in item.chunks])
return ChunkIteratorFileWrapper(chunk_iterator)
@ -1572,3 +1608,12 @@ class CompressionDecider2:
compr_args.update(compr_spec)
logger.debug("len(data) == %d, len(lz4(data)) == %d, choosing %s", data_len, cdata_len, compr_spec)
return compr_args, Chunk(data, **meta)
@contextmanager
def signal_handler(signo, handler):
old_signal_handler = signal.signal(signo, handler)
try:
yield
finally:
signal.signal(signo, old_signal_handler)

View file

@ -21,25 +21,34 @@ class PropDict:
__slots__ = ("_dict", ) # avoid setting attributes not supported by properties
def __init__(self, data_dict=None, **kw):
def __init__(self, data_dict=None, internal_dict=None, **kw):
if data_dict is None:
data = kw
elif not isinstance(data_dict, dict):
raise TypeError("data_dict must be dict")
else:
data = data_dict
# internally, we want an dict with only str-typed keys
_dict = {}
for k, v in data.items():
self._dict = {}
self.update_internal(internal_dict or {})
self.update(data)
def update(self, d):
for k, v in d.items():
if isinstance(k, bytes):
k = k.decode()
elif not isinstance(k, str):
raise TypeError("dict keys must be str or bytes, not %r" % k)
_dict[k] = v
unknown_keys = set(_dict) - self.VALID_KEYS
if unknown_keys:
raise ValueError("dict contains unknown keys %s" % ','.join(unknown_keys))
self._dict = _dict
setattr(self, self._check_key(k), v)
def update_internal(self, d):
for k, v in d.items():
if isinstance(k, bytes):
k = k.decode()
self._dict[k] = v
def __eq__(self, other):
return self.as_dict() == other.as_dict()
def __repr__(self):
return '%s(internal_dict=%r)' % (self.__class__.__name__, self._dict)
def as_dict(self):
"""return the internal dictionary"""
@ -110,7 +119,7 @@ class Item(PropDict):
If an Item shall be serialized, give as_dict() method output to msgpack packer.
"""
VALID_KEYS = set(key.decode() for key in ITEM_KEYS) # we want str-typed keys
VALID_KEYS = ITEM_KEYS | {'deleted', 'nlink', } # str-typed keys
__slots__ = ("_dict", ) # avoid setting attributes not supported by properties
@ -118,14 +127,14 @@ class Item(PropDict):
path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
source = PropDict._make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
acl_access = PropDict._make_property('acl_access', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
acl_default = PropDict._make_property('acl_default', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
acl_extended = PropDict._make_property('acl_extended', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
acl_nfs4 = PropDict._make_property('acl_nfs4', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
acl_access = PropDict._make_property('acl_access', bytes)
acl_default = PropDict._make_property('acl_default', bytes)
acl_extended = PropDict._make_property('acl_extended', bytes)
acl_nfs4 = PropDict._make_property('acl_nfs4', bytes)
mode = PropDict._make_property('mode', int)
uid = PropDict._make_property('uid', int)
gid = PropDict._make_property('gid', int)
@ -138,6 +147,58 @@ class Item(PropDict):
hardlink_master = PropDict._make_property('hardlink_master', bool)
chunks = PropDict._make_property('chunks', list)
chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')
chunks_healthy = PropDict._make_property('chunks_healthy', (list, type(None)), 'list or None')
xattrs = PropDict._make_property('xattrs', StableDict)
deleted = PropDict._make_property('deleted', bool)
nlink = PropDict._make_property('nlink', int)
class EncryptedKey(PropDict):
"""
EncryptedKey abstraction that deals with validation and the low-level details internally:
A EncryptedKey is created either from msgpack unpacker output, from another dict, from kwargs or
built step-by-step by setting attributes.
msgpack gives us a dict with bytes-typed keys, just give it to EncryptedKey(d) and use enc_key.xxx later.
If a EncryptedKey shall be serialized, give as_dict() method output to msgpack packer.
"""
VALID_KEYS = {'version', 'algorithm', 'iterations', 'salt', 'hash', 'data'} # str-typed keys
__slots__ = ("_dict", ) # avoid setting attributes not supported by properties
version = PropDict._make_property('version', int)
algorithm = PropDict._make_property('algorithm', str, encode=str.encode, decode=bytes.decode)
iterations = PropDict._make_property('iterations', int)
salt = PropDict._make_property('salt', bytes)
hash = PropDict._make_property('hash', bytes)
data = PropDict._make_property('data', bytes)
class Key(PropDict):
"""
Key abstraction that deals with validation and the low-level details internally:
A Key is created either from msgpack unpacker output, from another dict, from kwargs or
built step-by-step by setting attributes.
msgpack gives us a dict with bytes-typed keys, just give it to Key(d) and use key.xxx later.
If a Key shall be serialized, give as_dict() method output to msgpack packer.
"""
VALID_KEYS = {'version', 'repository_id', 'enc_key', 'enc_hmac_key', 'id_key', 'chunk_seed'} # str-typed keys
__slots__ = ("_dict", ) # avoid setting attributes not supported by properties
version = PropDict._make_property('version', int)
repository_id = PropDict._make_property('repository_id', bytes)
enc_key = PropDict._make_property('enc_key', bytes)
enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes)
id_key = PropDict._make_property('id_key', bytes)
chunk_seed = PropDict._make_property('chunk_seed', int)

View file

@ -21,6 +21,7 @@ from .helpers import yes
from .helpers import get_keys_dir
from .helpers import bin_to_hex
from .helpers import CompressionDecider2, CompressionSpec
from .item import Key, EncryptedKey
PREFIX = b'\0' * 8
@ -341,24 +342,26 @@ class KeyfileKeyBase(AESKeyBase):
cdata = a2b_base64(key_data)
data = self.decrypt_key_file(cdata, passphrase)
if data:
key = msgpack.unpackb(data)
if key[b'version'] != 1:
data = msgpack.unpackb(data)
key = Key(internal_dict=data)
if key.version != 1:
raise IntegrityError('Invalid key file header')
self.repository_id = key[b'repository_id']
self.enc_key = key[b'enc_key']
self.enc_hmac_key = key[b'enc_hmac_key']
self.id_key = key[b'id_key']
self.chunk_seed = key[b'chunk_seed']
self.repository_id = key.repository_id
self.enc_key = key.enc_key
self.enc_hmac_key = key.enc_hmac_key
self.id_key = key.id_key
self.chunk_seed = key.chunk_seed
return True
return False
def decrypt_key_file(self, data, passphrase):
d = msgpack.unpackb(data)
assert d[b'version'] == 1
assert d[b'algorithm'] == b'sha256'
key = passphrase.kdf(d[b'salt'], d[b'iterations'], 32)
data = AES(is_encrypt=False, key=key).decrypt(d[b'data'])
if hmac_sha256(key, data) == d[b'hash']:
data = msgpack.unpackb(data)
enc_key = EncryptedKey(internal_dict=data)
assert enc_key.version == 1
assert enc_key.algorithm == 'sha256'
key = passphrase.kdf(enc_key.salt, enc_key.iterations, 32)
data = AES(is_encrypt=False, key=key).decrypt(enc_key.data)
if hmac_sha256(key, data) == enc_key.hash:
return data
def encrypt_key_file(self, data, passphrase):
@ -367,26 +370,26 @@ class KeyfileKeyBase(AESKeyBase):
key = passphrase.kdf(salt, iterations, 32)
hash = hmac_sha256(key, data)
cdata = AES(is_encrypt=True, key=key).encrypt(data)
d = {
'version': 1,
'salt': salt,
'iterations': iterations,
'algorithm': 'sha256',
'hash': hash,
'data': cdata,
}
return msgpack.packb(d)
enc_key = EncryptedKey(
version=1,
salt=salt,
iterations=iterations,
algorithm='sha256',
hash=hash,
data=cdata,
)
return msgpack.packb(enc_key.as_dict())
def _save(self, passphrase):
key = {
'version': 1,
'repository_id': self.repository_id,
'enc_key': self.enc_key,
'enc_hmac_key': self.enc_hmac_key,
'id_key': self.id_key,
'chunk_seed': self.chunk_seed,
}
data = self.encrypt_key_file(msgpack.packb(key), passphrase)
key = Key(
version=1,
repository_id=self.repository_id,
enc_key=self.enc_key,
enc_hmac_key=self.enc_hmac_key,
id_key=self.id_key,
chunk_seed=self.chunk_seed,
)
data = self.encrypt_key_file(msgpack.packb(key.as_dict()), passphrase)
key_data = '\n'.join(textwrap.wrap(b2a_base64(data).decode('ascii')))
return key_data

View file

@ -101,9 +101,11 @@ class NotMyLock(LockErrorT):
class ExclusiveLock:
"""An exclusive Lock based on mkdir fs operation being atomic.
If possible, try to use the contextmanager here like:
with ExclusiveLock(...) as lock:
...
If possible, try to use the contextmanager here like::
with ExclusiveLock(...) as lock:
...
This makes sure the lock is released again if the block is left, no
matter how (e.g. if an exception occurred).
"""
@ -222,9 +224,11 @@ class UpgradableLock:
noone is allowed reading) and read access to a resource needs a shared
lock (multiple readers are allowed).
If possible, try to use the contextmanager here like:
with UpgradableLock(...) as lock:
...
If possible, try to use the contextmanager here like::
with UpgradableLock(...) as lock:
...
This makes sure the lock is released again if the block is left, no
matter how (e.g. if an exception occurred).
"""

View file

@ -1,3 +1,4 @@
import errno
import os
"""
@ -52,6 +53,11 @@ def sync_dir(path):
fd = os.open(path, os.O_RDONLY)
try:
os.fsync(fd)
except OSError as os_error:
# Some network filesystems don't support this and fail with EINVAL.
# Other error codes (e.g. EIO) shouldn't be silenced.
if os_error.errno != errno.EINVAL:
raise
finally:
os.close(fd)
@ -75,7 +81,7 @@ class SyncFile:
"""
def __init__(self, path):
self.fd = open(path, 'wb')
self.fd = open(path, 'xb')
self.fileno = self.fd.fileno()
def __enter__(self):

View file

@ -62,9 +62,9 @@ def acl_get(path, item, st, numeric_owner=False):
if text == NULL:
return
if numeric_owner:
item[b'acl_extended'] = _remove_non_numeric_identifier(text)
item['acl_extended'] = _remove_non_numeric_identifier(text)
else:
item[b'acl_extended'] = text
item['acl_extended'] = text
finally:
acl_free(text)
acl_free(acl)
@ -72,18 +72,16 @@ def acl_get(path, item, st, numeric_owner=False):
def acl_set(path, item, numeric_owner=False):
cdef acl_t acl = NULL
try:
acl_text = item.get('acl_extended')
if acl_text is not None:
try:
if numeric_owner:
acl = acl_from_text(item[b'acl_extended'])
acl = acl_from_text(acl_text)
else:
acl = acl_from_text(<bytes>_remove_numeric_id_if_possible(item[b'acl_extended']))
except KeyError:
return
if acl == NULL:
return
if acl_set_link_np(<bytes>os.fsencode(path), ACL_TYPE_EXTENDED, acl):
return
finally:
acl_free(acl)
acl = acl_from_text(<bytes>_remove_numeric_id_if_possible(acl_text))
if acl == NULL:
return
if acl_set_link_np(<bytes>os.fsencode(path), ACL_TYPE_EXTENDED, acl):
return
finally:
acl_free(acl)

View file

@ -57,10 +57,10 @@ def acl_get(path, item, st, numeric_owner=False):
return
flags |= ACL_TEXT_NUMERIC_IDS if numeric_owner else 0
if ret > 0:
_get_acl(p, ACL_TYPE_NFS4, item, b'acl_nfs4', flags)
_get_acl(p, ACL_TYPE_NFS4, item, 'acl_nfs4', flags)
else:
_get_acl(p, ACL_TYPE_ACCESS, item, b'acl_access', flags)
_get_acl(p, ACL_TYPE_DEFAULT, item, b'acl_default', flags)
_get_acl(p, ACL_TYPE_ACCESS, item, 'acl_access', flags)
_get_acl(p, ACL_TYPE_DEFAULT, item, 'acl_default', flags)
cdef _set_acl(p, type, item, attribute, numeric_owner=False):
@ -98,6 +98,6 @@ def acl_set(path, item, numeric_owner=False):
of the user/group names
"""
p = os.fsencode(path)
_set_acl(p, ACL_TYPE_NFS4, item, b'acl_nfs4', numeric_owner)
_set_acl(p, ACL_TYPE_ACCESS, item, b'acl_access', numeric_owner)
_set_acl(p, ACL_TYPE_DEFAULT, item, b'acl_default', numeric_owner)
_set_acl(p, ACL_TYPE_NFS4, item, 'acl_nfs4', numeric_owner)
_set_acl(p, ACL_TYPE_ACCESS, item, 'acl_access', numeric_owner)
_set_acl(p, ACL_TYPE_DEFAULT, item, 'acl_default', numeric_owner)

View file

@ -171,12 +171,12 @@ def acl_get(path, item, st, numeric_owner=False):
if access_acl:
access_text = acl_to_text(access_acl, NULL)
if access_text:
item[b'acl_access'] = converter(access_text)
item['acl_access'] = converter(access_text)
default_acl = acl_get_file(p, ACL_TYPE_DEFAULT)
if default_acl:
default_text = acl_to_text(default_acl, NULL)
if default_text:
item[b'acl_default'] = converter(default_text)
item['acl_default'] = converter(default_text)
finally:
acl_free(default_text)
acl_free(default_acl)
@ -193,8 +193,8 @@ def acl_set(path, item, numeric_owner=False):
converter = posix_acl_use_stored_uid_gid
else:
converter = acl_use_local_uid_gid
access_text = item.get(b'acl_access')
default_text = item.get(b'acl_default')
access_text = item.get('acl_access')
default_text = item.get('acl_default')
if access_text:
try:
access_acl = acl_from_text(<bytes>converter(access_text))
@ -214,7 +214,7 @@ cdef _sync_file_range(fd, offset, length, flags):
assert offset & PAGE_MASK == 0, "offset %d not page-aligned" % offset
assert length & PAGE_MASK == 0, "length %d not page-aligned" % length
if sync_file_range(fd, offset, length, flags) != 0:
raise OSError(errno, os.strerror(errno))
raise OSError(errno.errno, os.strerror(errno.errno))
os.posix_fadvise(fd, offset, length, os.POSIX_FADV_DONTNEED)
cdef unsigned PAGE_MASK = resource.getpagesize() - 1

View file

@ -60,9 +60,10 @@ class RepositoryServer: # pragma: no cover
'break_lock',
)
def __init__(self, restrict_to_paths):
def __init__(self, restrict_to_paths, append_only):
self.repository = None
self.restrict_to_paths = restrict_to_paths
self.append_only = append_only
def serve(self):
stdin_fd = sys.stdin.fileno()
@ -129,7 +130,7 @@ class RepositoryServer: # pragma: no cover
break
else:
raise PathNotAllowed(path)
self.repository = Repository(path, create, lock_wait=lock_wait, lock=lock)
self.repository = Repository(path, create, lock_wait=lock_wait, lock=lock, append_only=self.append_only)
self.repository.__enter__() # clean exit handled by serve() method
return self.repository.id
@ -159,6 +160,7 @@ class RemoteRepository:
# pyinstaller binary adds LD_LIBRARY_PATH=/tmp/_ME... but we do not want
# that the system's ssh binary picks up (non-matching) libraries from there
env.pop('LD_LIBRARY_PATH', None)
env.pop('BORG_PASSPHRASE', None) # security: do not give secrets to subprocess
self.p = Popen(borg_cmd, bufsize=0, stdin=PIPE, stdout=PIPE, stderr=PIPE, env=env)
self.stdin_fd = self.p.stdin.fileno()
self.stdout_fd = self.p.stdout.fileno()
@ -194,9 +196,14 @@ class RemoteRepository:
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if exc_type is not None:
self.rollback()
self.close()
try:
if exc_type is not None:
self.rollback()
finally:
# in any case, we want to cleanly close the repo, even if the
# rollback can not succeed (e.g. because the connection was
# already closed) and raised another exception:
self.close()
@property
def id_str(self):
@ -224,7 +231,8 @@ class RemoteRepository:
if testing:
return [sys.executable, '-m', 'borg.archiver', 'serve'] + opts + self.extra_test_args
else: # pragma: no cover
return [args.remote_path, 'serve'] + opts
remote_path = args.remote_path or os.environ.get('BORG_REMOTE_PATH', 'borg')
return [remote_path, 'serve'] + opts
def ssh_cmd(self, location):
"""return a ssh command line that can be prefixed to a borg command line"""
@ -251,6 +259,24 @@ class RemoteRepository:
del self.cache[args]
return msgid
def handle_error(error, res):
if error == b'DoesNotExist':
raise Repository.DoesNotExist(self.location.orig)
elif error == b'AlreadyExists':
raise Repository.AlreadyExists(self.location.orig)
elif error == b'CheckNeeded':
raise Repository.CheckNeeded(self.location.orig)
elif error == b'IntegrityError':
raise IntegrityError(res)
elif error == b'PathNotAllowed':
raise PathNotAllowed(*res)
elif error == b'ObjectNotFound':
raise Repository.ObjectNotFound(res[0], self.location.orig)
elif error == b'InvalidRPCMethod':
raise InvalidRPCMethod(*res)
else:
raise self.RPCError(res.decode('utf-8'))
calls = list(calls)
waiting_for = []
while wait or calls:
@ -259,22 +285,7 @@ class RemoteRepository:
error, res = self.responses.pop(waiting_for[0])
waiting_for.pop(0)
if error:
if error == b'DoesNotExist':
raise Repository.DoesNotExist(self.location.orig)
elif error == b'AlreadyExists':
raise Repository.AlreadyExists(self.location.orig)
elif error == b'CheckNeeded':
raise Repository.CheckNeeded(self.location.orig)
elif error == b'IntegrityError':
raise IntegrityError(res)
elif error == b'PathNotAllowed':
raise PathNotAllowed(*res)
elif error == b'ObjectNotFound':
raise Repository.ObjectNotFound(res[0], self.location.orig)
elif error == b'InvalidRPCMethod':
raise InvalidRPCMethod(*res)
else:
raise self.RPCError(res.decode('utf-8'))
handle_error(error, res)
else:
yield res
if not waiting_for and not calls:
@ -300,6 +311,8 @@ class RemoteRepository:
type, msgid, error, res = unpacked
if msgid in self.ignore_responses:
self.ignore_responses.remove(msgid)
if error:
handle_error(error, res)
else:
self.responses[msgid] = error, res
elif fd is self.stderr_fd:

View file

@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
from .constants import * # NOQA
from .hashindex import NSIndex
from .helpers import Error, ErrorWithTraceback, IntegrityError, InternalOSError
from .helpers import Error, ErrorWithTraceback, IntegrityError
from .helpers import Location
from .helpers import ProgressIndicatorPercent
from .helpers import bin_to_hex
@ -96,7 +96,7 @@ class Repository:
class ObjectNotFound(ErrorWithTraceback):
"""Object with key {} not found in repository {}."""
def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True):
def __init__(self, path, create=False, exclusive=False, lock_wait=None, lock=True, append_only=False):
self.path = os.path.abspath(path)
self._location = Location('file://%s' % self.path)
self.io = None
@ -107,6 +107,7 @@ class Repository:
self.do_lock = lock
self.do_create = create
self.exclusive = exclusive
self.append_only = append_only
def __del__(self):
if self.lock:
@ -125,6 +126,12 @@ class Repository:
def __exit__(self, exc_type, exc_val, exc_tb):
if exc_type is not None:
no_space_left_on_device = exc_type is OSError and exc_val.errno == errno.ENOSPC
# The ENOSPC could have originated somewhere else besides the Repository. The cleanup is always safe, unless
# EIO or FS corruption ensues, which is why we specifically check for ENOSPC.
if self._active_txn and no_space_left_on_device:
logger.warning('No space left on device, cleaning up partial transaction to free space.')
self.io.cleanup(self.io.get_segments_transaction_id())
self.rollback()
self.close()
@ -176,7 +183,9 @@ class Repository:
shutil.rmtree(self.path)
def get_index_transaction_id(self):
indices = sorted((int(name[6:]) for name in os.listdir(self.path) if name.startswith('index.') and name[6:].isdigit()))
indices = sorted(int(fn[6:])
for fn in os.listdir(self.path)
if fn.startswith('index.') and fn[6:].isdigit() and os.stat(os.path.join(self.path, fn)).st_size != 0)
if indices:
return indices[-1]
else:
@ -217,7 +226,9 @@ class Repository:
raise self.InvalidRepository(path)
self.max_segment_size = self.config.getint('repository', 'max_segment_size')
self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
self.append_only = self.config.getboolean('repository', 'append_only', fallback=False)
# append_only can be set in the constructor
# it shouldn't be overridden (True -> False) here
self.append_only = self.append_only or self.config.getboolean('repository', 'append_only', fallback=False)
self.id = unhexlify(self.config.get('repository', 'id').strip())
self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir)
@ -247,18 +258,13 @@ class Repository:
except RuntimeError as error:
assert str(error) == 'hashindex_read failed' # everything else means we're in *deep* trouble
logger.warning('Repository index missing or corrupted, trying to recover')
try:
os.unlink(index_path)
except OSError as e:
raise InternalOSError(e) from None
os.unlink(index_path)
if not auto_recover:
raise
self.prepare_txn(self.get_transaction_id())
# don't leave an open transaction around
self.commit()
return self.open_index(self.get_transaction_id())
except OSError as e:
raise InternalOSError(e) from None
def prepare_txn(self, transaction_id, do_cleanup=True):
self._active_txn = True
@ -296,8 +302,6 @@ class Repository:
self.check_transaction()
self.prepare_txn(transaction_id)
return
except OSError as os_error:
raise InternalOSError(os_error) from None
if hints[b'version'] == 1:
logger.debug('Upgrading from v1 hints.%d', transaction_id)
self.segments = hints[b'segments']

View file

@ -5,7 +5,7 @@ Self testing module
The selftest() function runs a small test suite of relatively fast tests that are meant to discover issues
with the way Borg was compiled or packaged and also bugs in Borg itself.
Theses tests are a subset of the borg/testsuite and are run with Pythons built-in unittest, hence none of
These tests are a subset of the borg/testsuite and are run with Pythons built-in unittest, hence none of
the tests used for this can or should be ported to py.test currently.
To assert that self test discovery works correctly the number of tests is kept in the SELFTEST_COUNT
@ -30,7 +30,7 @@ SELFTEST_CASES = [
ChunkerTestCase,
]
SELFTEST_COUNT = 27
SELFTEST_COUNT = 29
class SelfTestResult(TestResult):

View file

@ -5,7 +5,7 @@ import re
def translate(pat):
"""Translate a shell-style pattern to a regular expression.
The pattern may include "**<sep>" (<sep> stands for the platform-specific path separator; "/" on POSIX systems) for
The pattern may include ``**<sep>`` (<sep> stands for the platform-specific path separator; "/" on POSIX systems) for
matching zero or more directory levels and "*" for matching zero or more arbitrary characters with the exception of
any path separator. Wrap meta-characters in brackets for a literal match (i.e. "[?]" to match the literal character
"?").

View file

@ -117,6 +117,24 @@ class BaseTestCase(unittest.TestCase):
for sub_diff in diff.subdirs.values():
self._assert_dirs_equal_cmp(sub_diff)
@contextmanager
def fuse_mount(self, location, mountpoint, mount_options=None):
os.mkdir(mountpoint)
args = ['mount', location, mountpoint]
if mount_options:
args += '-o', mount_options
self.cmd(*args, fork=True)
self.wait_for_mount(mountpoint)
yield
if sys.platform.startswith('linux'):
cmd = 'fusermount -u %s' % mountpoint
else:
cmd = 'umount %s' % mountpoint
os.system(cmd)
os.rmdir(mountpoint)
# Give the daemon some time to exit
time.sleep(.2)
def wait_for_mount(self, path, timeout=5):
"""Wait until a filesystem is mounted on `path`
"""

View file

@ -6,7 +6,9 @@ from unittest.mock import Mock
import pytest
import msgpack
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, Statistics
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics
from ..archive import BackupOSError, backup_io, backup_io_iter
from ..item import Item
from ..key import PlaintextKey
from ..helpers import Manifest
from . import BaseTestCase
@ -38,12 +40,12 @@ def tests_stats_progress(stats, columns=80):
out = StringIO()
stats.update(10**3, 0, unique=False)
stats.show_progress(item={b'path': 'foo'}, final=False, stream=out)
stats.show_progress(item=Item(path='foo'), final=False, stream=out)
s = '1.02 kB O 10 B C 10 B D 0 N foo'
buf = ' ' * (columns - len(s))
assert out.getvalue() == s + buf + "\r"
out = StringIO()
stats.show_progress(item={b'path': 'foo'*40}, final=False, stream=out)
stats.show_progress(item=Item(path='foo'*40), final=False, stream=out)
s = '1.02 kB O 10 B C 10 B D 0 N foofoofoofoofoofoofoofo...oofoofoofoofoofoofoofoofoo'
buf = ' ' * (columns - len(s))
assert out.getvalue() == s + buf + "\r"
@ -93,7 +95,7 @@ class ArchiveTimestampTestCase(BaseTestCase):
class ChunkBufferTestCase(BaseTestCase):
def test(self):
data = [{b'foo': 1}, {b'bar': 2}]
data = [Item(path='p1'), Item(path='p2')]
cache = MockCache()
key = PlaintextKey(None)
chunks = CacheChunkBuffer(cache, key, None)
@ -105,11 +107,11 @@ class ChunkBufferTestCase(BaseTestCase):
unpacker = msgpack.Unpacker()
for id in chunks.chunks:
unpacker.feed(cache.objects[id])
self.assert_equal(data, list(unpacker))
self.assert_equal(data, [Item(internal_dict=d) for d in unpacker])
def test_partial(self):
big = b"0123456789" * 10000
data = [{b'full': 1, b'data': big}, {b'partial': 2, b'data': big}]
big = "0123456789" * 10000
data = [Item(path='full', source=big), Item(path='partial', source=big)]
cache = MockCache()
key = PlaintextKey(None)
chunks = CacheChunkBuffer(cache, key, None)
@ -126,7 +128,7 @@ class ChunkBufferTestCase(BaseTestCase):
unpacker = msgpack.Unpacker()
for id in chunks.chunks:
unpacker.feed(cache.objects[id])
self.assert_equal(data, list(unpacker))
self.assert_equal(data, [Item(internal_dict=d) for d in unpacker])
class RobustUnpackerTestCase(BaseTestCase):
@ -138,7 +140,7 @@ class RobustUnpackerTestCase(BaseTestCase):
return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
def process(self, input):
unpacker = RobustUnpacker(validator=self._validator)
unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
result = []
for should_sync, chunks in input:
if should_sync:
@ -183,3 +185,59 @@ class RobustUnpackerTestCase(BaseTestCase):
input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
result = self.process(input)
self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
@pytest.fixture
def item_keys_serialized():
return [msgpack.packb(name) for name in ITEM_KEYS]
@pytest.mark.parametrize('packed',
[b'', b'x', b'foobar', ] +
[msgpack.packb(o) for o in (
[None, 0, 0.0, False, '', {}, [], ()] +
[42, 23.42, True, b'foobar', {b'foo': b'bar'}, [b'foo', b'bar'], (b'foo', b'bar')]
)])
def test_invalid_msgpacked_item(packed, item_keys_serialized):
assert not valid_msgpacked_dict(packed, item_keys_serialized)
@pytest.mark.parametrize('packed',
[msgpack.packb(o) for o in [
{b'path': b'/a/b/c'}, # small (different msgpack mapping type!)
dict((k, b'') for k in ITEM_KEYS), # as big (key count) as it gets
dict((k, b'x' * 1000) for k in ITEM_KEYS), # as big (key count and volume) as it gets
]])
def test_valid_msgpacked_items(packed, item_keys_serialized):
assert valid_msgpacked_dict(packed, item_keys_serialized)
def test_key_length_msgpacked_items():
key = b'x' * 32 # 31 bytes is the limit for fixstr msgpack type
data = {key: b''}
item_keys_serialized = [msgpack.packb(key), ]
assert valid_msgpacked_dict(msgpack.packb(data), item_keys_serialized)
def test_backup_io():
with pytest.raises(BackupOSError):
with backup_io():
raise OSError(123)
def test_backup_io_iter():
class Iterator:
def __init__(self, exc):
self.exc = exc
def __next__(self):
raise self.exc()
oserror_iterator = Iterator(OSError)
with pytest.raises(BackupOSError):
for _ in backup_io_iter(oserror_iterator):
pass
normal_iterator = Iterator(StopIteration)
for _ in backup_io_iter(normal_iterator):
assert False, 'StopIteration handled incorrectly'

View file

@ -23,7 +23,7 @@ except ImportError:
pass
from .. import xattr, helpers, platform
from ..archive import Archive, ChunkBuffer, ArchiveRecreater
from ..archive import Archive, ChunkBuffer, ArchiveRecreater, flags_noatime, flags_normal
from ..archiver import Archiver
from ..cache import Cache
from ..constants import * # NOQA
@ -225,7 +225,8 @@ class ArchiverTestCaseBase(BaseTestCase):
def tearDown(self):
os.chdir(self._old_wd)
shutil.rmtree(self.tmpdir)
# note: ignore_errors=True as workaround for issue #862
shutil.rmtree(self.tmpdir, ignore_errors=True)
def cmd(self, *args, **kw):
exit_code = kw.pop('exit_code', 0)
@ -241,6 +242,13 @@ class ArchiverTestCaseBase(BaseTestCase):
def create_src_archive(self, name):
self.cmd('create', self.repository_location + '::' + name, src_dir)
def open_archive(self, name):
repository = Repository(self.repository_path)
with repository:
manifest, key = Manifest.load(repository)
archive = Archive(repository, key, manifest, name)
return archive, repository
def create_regular_file(self, name, size=0, contents=None):
filename = os.path.join(self.input_path, name)
if not os.path.exists(os.path.dirname(filename)):
@ -294,10 +302,14 @@ class ArchiverTestCaseBase(BaseTestCase):
# File mode
os.chmod('input/dir2', 0o555) # if we take away write perms, we need root to remove contents
# File owner
os.chown('input/file1', 100, 200)
os.chown('input/file1', 100, 200) # raises OSError invalid argument on cygwin
have_root = True # we have (fake)root
except PermissionError:
have_root = False
except OSError as e:
if e.errno != errno.EINVAL:
raise
have_root = False
return have_root
else:
return False
@ -389,8 +401,20 @@ class ArchiverTestCase(ArchiverTestCaseBase):
assert os.readlink('input/link1') == 'somewhere'
def test_atime(self):
def has_noatime(some_file):
atime_before = os.stat(some_file).st_atime_ns
try:
os.close(os.open(some_file, flags_noatime))
except PermissionError:
return False
else:
atime_after = os.stat(some_file).st_atime_ns
noatime_used = flags_noatime != flags_normal
return noatime_used and atime_before == atime_after
self.create_test_files()
atime, mtime = 123456780, 234567890
have_noatime = has_noatime('input/file1')
os.utime('input/file1', (atime, mtime))
self.cmd('init', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
@ -399,7 +423,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
sti = os.stat('input/file1')
sto = os.stat('output/input/file1')
assert sti.st_mtime_ns == sto.st_mtime_ns == mtime * 1e9
if hasattr(os, 'O_NOATIME'):
if have_noatime:
assert sti.st_atime_ns == sto.st_atime_ns == atime * 1e9
else:
# it touched the input file's atime while backing it up
@ -419,11 +443,30 @@ class ArchiverTestCase(ArchiverTestCaseBase):
return repository.id
def test_sparse_file(self):
# no sparse file support on Mac OS X
sparse_support = sys.platform != 'darwin'
def is_sparse(fn, total_size, hole_size):
st = os.stat(fn)
assert st.st_size == total_size
sparse = True
if sparse and hasattr(st, 'st_blocks') and st.st_blocks * 512 >= st.st_size:
sparse = False
if sparse and hasattr(os, 'SEEK_HOLE') and hasattr(os, 'SEEK_DATA'):
with open(fn, 'rb') as fd:
# only check if the first hole is as expected, because the 2nd hole check
# is problematic on xfs due to its "dynamic speculative EOF preallocation
try:
if fd.seek(0, os.SEEK_HOLE) != 0:
sparse = False
if fd.seek(0, os.SEEK_DATA) != hole_size:
sparse = False
except OSError:
# OS/FS does not really support SEEK_HOLE/SEEK_DATA
sparse = False
return sparse
filename = os.path.join(self.input_path, 'sparse')
content = b'foobar'
hole_size = 5 * (1 << CHUNK_MAX_EXP) # 5 full chunker buffers
total_size = hole_size + len(content) + hole_size
with open(filename, 'wb') as fd:
# create a file that has a hole at the beginning and end (if the
# OS and filesystem supports sparse files)
@ -432,26 +475,23 @@ class ArchiverTestCase(ArchiverTestCaseBase):
fd.seek(hole_size, 1)
pos = fd.tell()
fd.truncate(pos)
total_len = hole_size + len(content) + hole_size
st = os.stat(filename)
self.assert_equal(st.st_size, total_len)
if sparse_support and hasattr(st, 'st_blocks'):
self.assert_true(st.st_blocks * 512 < total_len / 9) # is input sparse?
self.cmd('init', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
with changedir('output'):
self.cmd('extract', '--sparse', self.repository_location + '::test')
self.assert_dirs_equal('input', 'output/input')
filename = os.path.join(self.output_path, 'input', 'sparse')
with open(filename, 'rb') as fd:
# check if file contents are as expected
self.assert_equal(fd.read(hole_size), b'\0' * hole_size)
self.assert_equal(fd.read(len(content)), content)
self.assert_equal(fd.read(hole_size), b'\0' * hole_size)
st = os.stat(filename)
self.assert_equal(st.st_size, total_len)
if sparse_support and hasattr(st, 'st_blocks'):
self.assert_true(st.st_blocks * 512 < total_len / 9) # is output sparse?
# we first check if we could create a sparse input file:
sparse_support = is_sparse(filename, total_size, hole_size)
if sparse_support:
# we could create a sparse input file, so creating a backup of it and
# extracting it again (as sparse) should also work:
self.cmd('init', self.repository_location)
self.cmd('create', self.repository_location + '::test', 'input')
with changedir(self.output_path):
self.cmd('extract', '--sparse', self.repository_location + '::test')
self.assert_dirs_equal('input', 'output/input')
filename = os.path.join(self.output_path, 'input', 'sparse')
with open(filename, 'rb') as fd:
# check if file contents are as expected
self.assert_equal(fd.read(hole_size), b'\0' * hole_size)
self.assert_equal(fd.read(len(content)), content)
self.assert_equal(fd.read(hole_size), b'\0' * hole_size)
self.assert_true(is_sparse(filename, total_size, hole_size))
def test_unusual_filenames(self):
filenames = ['normal', 'with some blanks', '(with_parens)', ]
@ -1168,6 +1208,18 @@ class ArchiverTestCase(ArchiverTestCaseBase):
self.assertEqual(output_1, output_2)
self.assertNotEqual(output_1, output_3)
def test_list_repository_format(self):
self.cmd('init', self.repository_location)
self.cmd('create', self.repository_location + '::test-1', src_dir)
self.cmd('create', self.repository_location + '::test-2', src_dir)
output_1 = self.cmd('list', self.repository_location)
output_2 = self.cmd('list', '--format', '{archive:<36} {time} [{id}]{NL}', self.repository_location)
self.assertEqual(output_1, output_2)
output_1 = self.cmd('list', '--short', self.repository_location)
self.assertEqual(output_1, 'test-1\ntest-2\n')
output_1 = self.cmd('list', '--format', '{barchive}/', self.repository_location)
self.assertEqual(output_1, 'test-1/test-2/')
def test_list_hash(self):
self.create_regular_file('empty_file', size=0)
self.create_regular_file('amb', contents=b'a' * 1000000)
@ -1278,52 +1330,96 @@ class ArchiverTestCase(ArchiverTestCaseBase):
assert 'This command initializes' not in self.cmd('help', 'init', '--usage-only')
@unittest.skipUnless(has_llfuse and sys.platform != 'win32', 'llfuse not installed')
def test_fuse_mount_repository(self):
mountpoint = os.path.join(self.tmpdir, 'mountpoint')
os.mkdir(mountpoint)
def test_fuse(self):
self.cmd('init', self.repository_location)
self.create_test_files()
self.cmd('create', self.repository_location + '::archive', 'input')
self.cmd('create', self.repository_location + '::archive2', 'input')
try:
self.cmd('mount', self.repository_location, mountpoint, fork=True)
self.wait_for_mount(mountpoint)
if has_lchflags:
# remove the file we did not backup, so input and output become equal
os.remove(os.path.join('input', 'flagfile'))
if has_lchflags:
# remove the file we did not backup, so input and output become equal
os.remove(os.path.join('input', 'flagfile'))
mountpoint = os.path.join(self.tmpdir, 'mountpoint')
# mount the whole repository, archive contents shall show up in archivename subdirs of mountpoint:
with self.fuse_mount(self.repository_location, mountpoint):
self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive', 'input'))
self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive2', 'input'))
finally:
if sys.platform.startswith('linux'):
os.system('fusermount -u ' + mountpoint)
# mount only 1 archive, its contents shall show up directly in mountpoint:
with self.fuse_mount(self.repository_location + '::archive', mountpoint):
self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input'))
# regular file
in_fn = 'input/file1'
out_fn = os.path.join(mountpoint, 'input', 'file1')
# stat
sti1 = os.stat(in_fn)
sto1 = os.stat(out_fn)
assert sti1.st_mode == sto1.st_mode
assert sti1.st_uid == sto1.st_uid
assert sti1.st_gid == sto1.st_gid
assert sti1.st_size == sto1.st_size
assert sti1.st_atime == sto1.st_atime
assert sti1.st_ctime == sto1.st_ctime
assert sti1.st_mtime == sto1.st_mtime
# note: there is another hardlink to this, see below
assert sti1.st_nlink == sto1.st_nlink == 2
# read
with open(in_fn, 'rb') as in_f, open(out_fn, 'rb') as out_f:
assert in_f.read() == out_f.read()
# list/read xattrs
if xattr.is_enabled(self.input_path):
assert xattr.listxattr(out_fn) == ['user.foo', ]
assert xattr.getxattr(out_fn, 'user.foo') == b'bar'
else:
os.system('umount ' + mountpoint)
os.rmdir(mountpoint)
# Give the daemon some time to exit
time.sleep(.2)
assert xattr.listxattr(out_fn) == []
try:
xattr.getxattr(out_fn, 'user.foo')
except OSError as e:
assert e.errno == llfuse.ENOATTR
else:
assert False, "expected OSError(ENOATTR), but no error was raised"
# hardlink (to 'input/file1')
in_fn = 'input/hardlink'
out_fn = os.path.join(mountpoint, 'input', 'hardlink')
sti2 = os.stat(in_fn)
sto2 = os.stat(out_fn)
assert sti2.st_nlink == sto2.st_nlink == 2
assert sto1.st_ino == sto2.st_ino
# symlink
in_fn = 'input/link1'
out_fn = os.path.join(mountpoint, 'input', 'link1')
sti = os.stat(in_fn, follow_symlinks=False)
sto = os.stat(out_fn, follow_symlinks=False)
assert stat.S_ISLNK(sti.st_mode)
assert stat.S_ISLNK(sto.st_mode)
assert os.readlink(in_fn) == os.readlink(out_fn)
# FIFO
out_fn = os.path.join(mountpoint, 'input', 'fifo1')
sto = os.stat(out_fn)
assert stat.S_ISFIFO(sto.st_mode)
@unittest.skipUnless(has_llfuse and sys.platform != 'win32', 'llfuse not installed')
def test_fuse_mount_archive(self):
mountpoint = os.path.join(self.tmpdir, 'mountpoint')
os.mkdir(mountpoint)
def test_fuse_allow_damaged_files(self):
self.cmd('init', self.repository_location)
self.create_test_files()
self.cmd('create', self.repository_location + '::archive', 'input')
try:
self.cmd('mount', self.repository_location + '::archive', mountpoint, fork=True)
self.wait_for_mount(mountpoint)
if has_lchflags:
# remove the file we did not backup, so input and output become equal
os.remove(os.path.join('input', 'flagfile'))
self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input'))
finally:
if sys.platform.startswith('linux'):
os.system('fusermount -u ' + mountpoint)
self.create_src_archive('archive')
# Get rid of a chunk and repair it
archive, repository = self.open_archive('archive')
with repository:
for item in archive.iter_items():
if item.path.endswith('testsuite/archiver.py'):
repository.delete(item.chunks[-1].id)
path = item.path # store full path for later
break
else:
os.system('umount ' + mountpoint)
os.rmdir(mountpoint)
# Give the daemon some time to exit
time.sleep(.2)
assert False # missed the file
repository.commit()
self.cmd('check', '--repair', self.repository_location, exit_code=0)
mountpoint = os.path.join(self.tmpdir, 'mountpoint')
with self.fuse_mount(self.repository_location + '::archive', mountpoint):
with pytest.raises(OSError) as excinfo:
open(os.path.join(mountpoint, path))
assert excinfo.value.errno == errno.EIO
with self.fuse_mount(self.repository_location + '::archive', mountpoint, 'allow_damaged_files'):
open(os.path.join(mountpoint, path)).close()
def verify_aes_counter_uniqueness(self, method):
seen = set() # Chunks already seen
@ -1628,6 +1724,14 @@ class ArchiverTestCaseBinary(ArchiverTestCase):
def test_recreate_changed_source(self):
pass
@unittest.skip('test_basic_functionality seems incompatible with fakeroot and/or the binary.')
def test_basic_functionality(self):
pass
@unittest.skip('test_overwrite seems incompatible with fakeroot and/or the binary.')
def test_overwrite(self):
pass
class ArchiverCheckTestCase(ArchiverTestCaseBase):
@ -1638,13 +1742,6 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
self.create_src_archive('archive1')
self.create_src_archive('archive2')
def open_archive(self, name):
repository = Repository(self.repository_path)
with repository:
manifest, key = Manifest.load(repository)
archive = Archive(repository, key, manifest, name)
return archive, repository
def test_check_usage(self):
output = self.cmd('check', '-v', '--progress', self.repository_location, exit_code=0)
self.assert_in('Starting repository check', output)
@ -1666,13 +1763,46 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
archive, repository = self.open_archive('archive1')
with repository:
for item in archive.iter_items():
if item[b'path'].endswith('testsuite/archiver.py'):
repository.delete(item[b'chunks'][-1].id)
if item.path.endswith('testsuite/archiver.py'):
valid_chunks = item.chunks
killed_chunk = valid_chunks[-1]
repository.delete(killed_chunk.id)
break
else:
self.assert_true(False) # should not happen
repository.commit()
self.cmd('check', self.repository_location, exit_code=1)
self.cmd('check', '--repair', self.repository_location, exit_code=0)
output = self.cmd('check', '--repair', self.repository_location, exit_code=0)
self.assert_in('New missing file chunk detected', output)
self.cmd('check', self.repository_location, exit_code=0)
# check that the file in the old archives has now a different chunk list without the killed chunk
for archive_name in ('archive1', 'archive2'):
archive, repository = self.open_archive(archive_name)
with repository:
for item in archive.iter_items():
if item.path.endswith('testsuite/archiver.py'):
self.assert_not_equal(valid_chunks, item.chunks)
self.assert_not_in(killed_chunk, item.chunks)
break
else:
self.assert_true(False) # should not happen
# do a fresh backup (that will include the killed chunk)
with patch.object(ChunkBuffer, 'BUFFER_SIZE', 10):
self.create_src_archive('archive3')
# check should be able to heal the file now:
output = self.cmd('check', '-v', '--repair', self.repository_location, exit_code=0)
self.assert_in('Healed previously missing file chunk', output)
self.assert_in('testsuite/archiver.py: Completely healed previously damaged file!', output)
# check that the file in the old archives has the correct chunks again
for archive_name in ('archive1', 'archive2'):
archive, repository = self.open_archive(archive_name)
with repository:
for item in archive.iter_items():
if item.path.endswith('testsuite/archiver.py'):
self.assert_equal(valid_chunks, item.chunks)
break
else:
self.assert_true(False) # should not happen
def test_missing_archive_item_chunk(self):
archive, repository = self.open_archive('archive1')
@ -1721,8 +1851,8 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
archive, repository = self.open_archive('archive1')
with repository:
for item in archive.iter_items():
if item[b'path'].endswith('testsuite/archiver.py'):
chunk = item[b'chunks'][-1]
if item.path.endswith('testsuite/archiver.py'):
chunk = item.chunks[-1]
data = repository.get(chunk.id) + b'1234'
repository.put(chunk.id, data)
break
@ -1757,11 +1887,7 @@ class RemoteArchiverTestCase(ArchiverTestCase):
# this was introduced because some tests expect stderr contents to show up
# in "output" also. Also, the non-forking exec_cmd catches both, too.
@unittest.skip('deadlock issues')
def test_fuse_mount_repository(self):
pass
@unittest.skip('deadlock issues')
def test_fuse_mount_archive(self):
def test_fuse(self):
pass
@unittest.skip('only works locally')

View file

@ -1,6 +1,8 @@
from binascii import hexlify, unhexlify
from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, hmac_sha256
from ..crypto import increment_iv, bytes16_to_int, int_to_bytes16
from . import BaseTestCase
# Note: these tests are part of the self test, do not use or import py.test functionality here.
@ -16,6 +18,27 @@ class CryptoTestCase(BaseTestCase):
self.assert_equal(bytes_to_long(b'\0\0\0\0\0\0\0\1'), 1)
self.assert_equal(long_to_bytes(1), b'\0\0\0\0\0\0\0\1')
def test_bytes16_to_int(self):
self.assert_equal(bytes16_to_int(b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1'), 1)
self.assert_equal(int_to_bytes16(1), b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1')
self.assert_equal(bytes16_to_int(b'\0\0\0\0\0\0\0\1\0\0\0\0\0\0\0\0'), 2 ** 64)
self.assert_equal(int_to_bytes16(2 ** 64), b'\0\0\0\0\0\0\0\1\0\0\0\0\0\0\0\0')
def test_increment_iv(self):
iv0 = b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0'
iv1 = b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1'
iv2 = b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2'
self.assert_equal(increment_iv(iv0, 0), iv0)
self.assert_equal(increment_iv(iv0, 1), iv1)
self.assert_equal(increment_iv(iv0, 2), iv2)
iva = b'\0\0\0\0\0\0\0\0\xff\xff\xff\xff\xff\xff\xff\xff'
ivb = b'\0\0\0\0\0\0\0\1\x00\x00\x00\x00\x00\x00\x00\x00'
ivc = b'\0\0\0\0\0\0\0\1\x00\x00\x00\x00\x00\x00\x00\x01'
self.assert_equal(increment_iv(iva, 0), iva)
self.assert_equal(increment_iv(iva, 1), ivb)
self.assert_equal(increment_iv(iva, 2), ivc)
self.assert_equal(increment_iv(iv0, 2**64), ivb)
def test_aes(self):
key = b'X' * 32
data = b'foo' * 10

View file

@ -10,7 +10,7 @@ import msgpack
import msgpack.fallback
from ..helpers import Location
from ..helpers import partial_format, format_file_size, format_timedelta
from ..helpers import partial_format, format_file_size, format_timedelta, format_line, PlaceholderError
from ..helpers import make_path_safe, clean_lines
from ..helpers import prune_within, prune_split
from ..helpers import get_cache_dir, get_keys_dir
@ -22,6 +22,7 @@ from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
from ..helpers import load_excludes
from ..helpers import CompressionSpec, CompressionDecider1, CompressionDecider2
from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
from . import BaseTestCase, environment_variable, FakeInputs
if sys.platform == 'win32':
@ -958,3 +959,18 @@ def test_compression_decider2():
assert compr_spec['name'] == 'zlib'
compr_spec, chunk = cd.decide(Chunk(None, compress=CompressionSpec('lzma')))
assert compr_spec['name'] == 'lzma'
def test_format_line():
data = dict(foo='bar baz')
assert format_line('', data) == ''
assert format_line('{foo}', data) == 'bar baz'
assert format_line('foo{foo}foo', data) == 'foobar bazfoo'
def test_format_line_erroneous():
data = dict()
with pytest.raises(PlaceholderError):
assert format_line('{invalid}', data)
with pytest.raises(PlaceholderError):
assert format_line('{}', data)

View file

@ -35,13 +35,13 @@ def test_item_empty():
def test_item_from_dict():
# does not matter whether we get str or bytes keys
item = Item({b'path': b'/a/b/c', b'mode': 0o666})
item = Item({b'path': '/a/b/c', b'mode': 0o666})
assert item.path == '/a/b/c'
assert item.mode == 0o666
assert 'path' in item
# does not matter whether we get str or bytes keys
item = Item({'path': b'/a/b/c', 'mode': 0o666})
item = Item({'path': '/a/b/c', 'mode': 0o666})
assert item.path == '/a/b/c'
assert item.mode == 0o666
assert 'mode' in item
@ -60,7 +60,7 @@ def test_item_from_dict():
def test_item_from_kw():
item = Item(path=b'/a/b/c', mode=0o666)
item = Item(path='/a/b/c', mode=0o666)
assert item.path == '/a/b/c'
assert item.mode == 0o666
@ -107,7 +107,7 @@ def test_item_se_str_property():
item.path = 42
# non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut
item = Item({'path': b'/a/\xfc/c'})
item = Item(internal_dict={'path': b'/a/\xfc/c'})
assert item.path == '/a/\udcfc/c' # getting a surrogate-escaped representation
assert item.as_dict() == {'path': b'/a/\xfc/c'}
del item.path

View file

@ -51,26 +51,26 @@ class PlatformLinuxTestCase(BaseTestCase):
return item
def set_acl(self, path, access=None, default=None, numeric_owner=False):
item = {b'acl_access': access, b'acl_default': default}
item = {'acl_access': access, 'acl_default': default}
acl_set(path, item, numeric_owner=numeric_owner)
def test_access_acl(self):
file = tempfile.NamedTemporaryFile()
self.assert_equal(self.get_acl(file.name), {})
self.set_acl(file.name, access=b'user::rw-\ngroup::r--\nmask::rw-\nother::---\nuser:root:rw-:9999\ngroup:root:rw-:9999\n', numeric_owner=False)
self.assert_in(b'user:root:rw-:0', self.get_acl(file.name)[b'acl_access'])
self.assert_in(b'group:root:rw-:0', self.get_acl(file.name)[b'acl_access'])
self.assert_in(b'user:0:rw-:0', self.get_acl(file.name, numeric_owner=True)[b'acl_access'])
self.assert_in(b'user:root:rw-:0', self.get_acl(file.name)['acl_access'])
self.assert_in(b'group:root:rw-:0', self.get_acl(file.name)['acl_access'])
self.assert_in(b'user:0:rw-:0', self.get_acl(file.name, numeric_owner=True)['acl_access'])
file2 = tempfile.NamedTemporaryFile()
self.set_acl(file2.name, access=b'user::rw-\ngroup::r--\nmask::rw-\nother::---\nuser:root:rw-:9999\ngroup:root:rw-:9999\n', numeric_owner=True)
self.assert_in(b'user:9999:rw-:9999', self.get_acl(file2.name)[b'acl_access'])
self.assert_in(b'group:9999:rw-:9999', self.get_acl(file2.name)[b'acl_access'])
self.assert_in(b'user:9999:rw-:9999', self.get_acl(file2.name)['acl_access'])
self.assert_in(b'group:9999:rw-:9999', self.get_acl(file2.name)['acl_access'])
def test_default_acl(self):
self.assert_equal(self.get_acl(self.tmpdir), {})
self.set_acl(self.tmpdir, access=ACCESS_ACL, default=DEFAULT_ACL)
self.assert_equal(self.get_acl(self.tmpdir)[b'acl_access'], ACCESS_ACL)
self.assert_equal(self.get_acl(self.tmpdir)[b'acl_default'], DEFAULT_ACL)
self.assert_equal(self.get_acl(self.tmpdir)['acl_access'], ACCESS_ACL)
self.assert_equal(self.get_acl(self.tmpdir)['acl_default'], DEFAULT_ACL)
def test_non_ascii_acl(self):
# Testing non-ascii ACL processing to see whether our code is robust.
@ -86,18 +86,18 @@ class PlatformLinuxTestCase(BaseTestCase):
group_entry_numeric = 'group:666:rw-:666'.encode('ascii')
acl = b'\n'.join([nothing_special, user_entry, group_entry])
self.set_acl(file.name, access=acl, numeric_owner=False)
acl_access = self.get_acl(file.name, numeric_owner=False)[b'acl_access']
acl_access = self.get_acl(file.name, numeric_owner=False)['acl_access']
self.assert_in(user_entry, acl_access)
self.assert_in(group_entry, acl_access)
acl_access_numeric = self.get_acl(file.name, numeric_owner=True)[b'acl_access']
acl_access_numeric = self.get_acl(file.name, numeric_owner=True)['acl_access']
self.assert_in(user_entry_numeric, acl_access_numeric)
self.assert_in(group_entry_numeric, acl_access_numeric)
file2 = tempfile.NamedTemporaryFile()
self.set_acl(file2.name, access=acl, numeric_owner=True)
acl_access = self.get_acl(file2.name, numeric_owner=False)[b'acl_access']
acl_access = self.get_acl(file2.name, numeric_owner=False)['acl_access']
self.assert_in(user_entry, acl_access)
self.assert_in(group_entry, acl_access)
acl_access_numeric = self.get_acl(file.name, numeric_owner=True)[b'acl_access']
acl_access_numeric = self.get_acl(file.name, numeric_owner=True)['acl_access']
self.assert_in(user_entry_numeric, acl_access_numeric)
self.assert_in(group_entry_numeric, acl_access_numeric)
@ -125,7 +125,7 @@ class PlatformDarwinTestCase(BaseTestCase):
return item
def set_acl(self, path, acl, numeric_owner=False):
item = {b'acl_extended': acl}
item = {'acl_extended': acl}
acl_set(path, item, numeric_owner=numeric_owner)
def test_access_acl(self):
@ -133,11 +133,11 @@ class PlatformDarwinTestCase(BaseTestCase):
file2 = tempfile.NamedTemporaryFile()
self.assert_equal(self.get_acl(file.name), {})
self.set_acl(file.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=False)
self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000014:staff:20:allow:read', self.get_acl(file.name)[b'acl_extended'])
self.assert_in(b'user:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read', self.get_acl(file.name)[b'acl_extended'])
self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000014:staff:20:allow:read', self.get_acl(file.name)['acl_extended'])
self.assert_in(b'user:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read', self.get_acl(file.name)['acl_extended'])
self.set_acl(file2.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=True)
self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)[b'acl_extended'])
self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)[b'acl_extended'])
self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)['acl_extended'])
self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)['acl_extended'])
@unittest.skipUnless(sys.platform.startswith(('linux', 'freebsd', 'darwin')), 'POSIX only tests')

View file

@ -8,7 +8,7 @@ from unittest.mock import patch
from ..hashindex import NSIndex
from ..helpers import Location
from ..helpers import IntegrityError, InternalOSError
from ..helpers import IntegrityError
from ..locking import UpgradableLock, LockFailed
from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint, handle_remote_line
from ..repository import Repository, LoggedIO, MAGIC
@ -244,11 +244,14 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase):
class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase):
def open(self, create=False):
return Repository(os.path.join(self.tmppath, 'repository'), create=create, append_only=True)
def test_destroy_append_only(self):
# Can't destroy append only repo (via the API)
self.repository.append_only = True
with self.assert_raises(ValueError):
self.repository.destroy()
assert self.repository.append_only
def test_append_only(self):
def segments_in_repository():
@ -300,7 +303,7 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
hints = os.path.join(self.repository.path, 'hints.1')
os.unlink(hints)
os.mkdir(hints)
with self.assert_raises(InternalOSError):
with self.assert_raises(OSError):
self.do_commit()
def test_index(self):
@ -318,7 +321,7 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
index = os.path.join(self.repository.path, 'index.1')
os.unlink(index)
os.mkdir(index)
with self.assert_raises(InternalOSError):
with self.assert_raises(OSError):
self.do_commit()

View file

@ -2,6 +2,7 @@
"""
import errno
import os
import re
import subprocess
import sys
import tempfile
@ -52,23 +53,25 @@ if libc_name is None:
# the 'test_extract_capabilities' test, but also allows xattrs to work with fakeroot on Linux in normal use.
# TODO: Check whether fakeroot supports xattrs on all platforms supported below.
# TODO: If that's the case then we can make Borg fakeroot-xattr-compatible on these as well.
LD_PRELOAD = os.environ.get('LD_PRELOAD', '')
XATTR_FAKEROOT = False
if sys.platform.startswith('linux') and 'fakeroot' in LD_PRELOAD:
fakeroot_version = LooseVersion(subprocess.check_output(['fakeroot', '-v']).decode('ascii').split()[-1])
if fakeroot_version >= LooseVersion("1.20.2"):
# 1.20.2 has been confirmed to have xattr support
# 1.18.2 has been confirmed not to have xattr support
# Versions in-between are unknown
libc_name = LD_PRELOAD
XATTR_FAKEROOT = True
if sys.platform.startswith('linux'):
LD_PRELOAD = os.environ.get('LD_PRELOAD', '')
preloads = re.split("[ :]", LD_PRELOAD)
for preload in preloads:
if preload.startswith("libfakeroot"):
fakeroot_version = LooseVersion(subprocess.check_output(['fakeroot', '-v']).decode('ascii').split()[-1])
if fakeroot_version >= LooseVersion("1.20.2"):
# 1.20.2 has been confirmed to have xattr support
# 1.18.2 has been confirmed not to have xattr support
# Versions in-between are unknown
libc_name = preload
XATTR_FAKEROOT = True
break
try:
libc = CDLL(libc_name, use_errno=True)
except OSError as e:
msg = "Can't find C library [%s]. Try installing ldconfig, gcc/cc or objdump." % e
logger.error(msg)
raise Exception(msg)

View file

@ -7,7 +7,7 @@ envlist = py{34,35,36},flake8
[testenv]
deps =
-rrequirements.d/development.txt
attic
-rrequirements.d/attic.txt
commands = py.test --cov=borg --cov-config=.coveragerc --benchmark-skip --pyargs {posargs:borg.testsuite}
# fakeroot -u needs some env vars:
passenv = *