diff --git a/README.rst b/README.rst index 2c407c1d3..07e8b0d39 100644 --- a/README.rst +++ b/README.rst @@ -89,9 +89,7 @@ Initialize a new backup repository and create a backup archive:: $ borg init /path/to/repo $ borg create /path/to/repo::Saturday1 ~/Documents -Now doing another backup, just to show off the great deduplication: - -.. code-block:: none +Now doing another backup, just to show off the great deduplication:: $ borg create -v --stats /path/to/repo::Saturday2 ~/Documents ----------------------------------------------------------------------------- @@ -141,6 +139,8 @@ THIS IS SOFTWARE IN DEVELOPMENT, DECIDE YOURSELF WHETHER IT FITS YOUR NEEDS. Security issues should be reported to the `Security contact`_ (or see ``docs/suppport.rst`` in the source distribution). +.. start-badges + |doc| |build| |coverage| |bestpractices| .. |doc| image:: https://readthedocs.org/projects/borgbackup/badge/?version=stable @@ -162,3 +162,5 @@ see ``docs/suppport.rst`` in the source distribution). .. |bestpractices| image:: https://bestpractices.coreinfrastructure.org/projects/271/badge :alt: Best Practices Score :target: https://bestpractices.coreinfrastructure.org/projects/271 + +.. end-badges diff --git a/Vagrantfile b/Vagrantfile index 956afba7d..81e9ac9c8 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -65,9 +65,9 @@ def packages_darwin # install all the (security and other) updates sudo softwareupdate --install --all # get osxfuse 3.x release code from github: - curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.5.2/osxfuse-3.5.2.dmg >osxfuse.dmg + curl -s -L https://github.com/osxfuse/osxfuse/releases/download/osxfuse-3.5.3/osxfuse-3.5.3.dmg >osxfuse.dmg MOUNTDIR=$(echo `hdiutil mount osxfuse.dmg | tail -1 | awk '{$1="" ; print $0}'` | xargs -0 echo) \ - && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for macOS 3.5.2.pkg" -target / + && sudo installer -pkg "${MOUNTDIR}/Extras/FUSE for macOS 3.5.3.pkg" -target / sudo chown -R vagrant /usr/local # brew must be able to create stuff here ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" brew update @@ -172,14 +172,14 @@ def packages_cygwin(version) set CYGSETUP=#{setup_exe} REM --- Install build version of CygWin in a subfolder set OURPATH=%cd% - set CYGBUILD="C:\\cygwin\\CygWin" - set CYGMIRROR=ftp://mirrors.kernel.org/sourceware/cygwin/ - set BUILDPKGS=python3,python3-setuptools,binutils,gcc-g++,libopenssl,openssl-devel,git,make,openssh,liblz4-devel,liblz4_1,rsync,curl,python-devel + set CYGBUILD="C:\\cygwin\\CygWin" + set CYGMIRROR=ftp://mirrors.kernel.org/sourceware/cygwin/ + set BUILDPKGS=python3,python3-setuptools,binutils,gcc-g++,libopenssl,openssl-devel,git,make,openssh,liblz4-devel,liblz4_1,rsync,curl,python-devel %CYGSETUP% -q -B -o -n -R %CYGBUILD% -L -D -s %CYGMIRROR% -P %BUILDPKGS% cd /d C:\\cygwin\\CygWin\\bin regtool set /HKLM/SYSTEM/CurrentControlSet/Services/OpenSSHd/ImagePath "C:\\cygwin\\CygWin\\bin\\cygrunsrv.exe" bash -c "ssh-host-config --no" - ' > /cygdrive/c/cygwin/install.bat + ' > /cygdrive/c/cygwin/install.bat cd /cygdrive/c/cygwin && cmd.exe /c install.bat echo "alias mkdir='mkdir -p'" > ~/.profile @@ -201,7 +201,6 @@ def install_cygwin_venv EOF end - def install_pyenv(boxname) return <<-EOF curl -s -L https://raw.githubusercontent.com/yyuu/pyenv-installer/master/bin/pyenv-installer | bash @@ -248,8 +247,8 @@ def build_pyenv_venv(boxname) EOF end -def install_borg(boxname) - return <<-EOF +def install_borg(fuse) + script = <<-EOF . ~/.bash_profile cd /vagrant/borg . borg-env/bin/activate @@ -260,31 +259,24 @@ def install_borg(boxname) rm -f borg/{chunker,crypto,compress,hashindex,platform_linux}.c rm -rf borg/__pycache__ borg/support/__pycache__ borg/testsuite/__pycache__ pip install -r requirements.d/development.txt - # by using [fuse], setup.py can handle different fuse requirements: - pip install -e .[fuse] EOF + if fuse + script += <<-EOF + # by using [fuse], setup.py can handle different fuse requirements: + pip install -e .[fuse] + EOF + else + script += <<-EOF + pip install -e . + # do not install llfuse into the virtualenvs built by tox: + sed -i.bak '/fuse.txt/d' tox.ini + EOF + end + return script end -def install_borg_no_fuse(boxname) - return <<-EOF - . ~/.bash_profile - cd /vagrant/borg - . borg-env/bin/activate - pip install -U wheel # upgrade wheel, too old for 3.5 - cd borg - # clean up (wrong/outdated) stuff we likely got via rsync: - rm -f borg/*.so borg/*.cpy* - rm -f borg/{chunker,crypto,compress,hashindex,platform_linux}.c - rm -rf borg/__pycache__ borg/support/__pycache__ borg/testsuite/__pycache__ - pip install -r requirements.d/development.txt - pip install -e . - # do not install llfuse into the virtualenvs built by tox: - sed -i.bak '/fuse.txt/d' tox.ini - EOF -end - -def install_pyinstaller(boxname) - return <<-EOF +def install_pyinstaller(bootloader) + script = <<-EOF . ~/.bash_profile cd /vagrant/borg . borg-env/bin/activate @@ -292,25 +284,19 @@ def install_pyinstaller(boxname) cd pyinstaller # develop branch, with fixed / freshly rebuilt bootloaders git checkout fresh-bootloader + EOF + if bootloader + script += <<-EOF + # build bootloader, if it is not included + cd bootloader + python ./waf all + cd .. + EOF + end + script += <<-EOF pip install -e . EOF -end - -def install_pyinstaller_bootloader(boxname) - return <<-EOF - . ~/.bash_profile - cd /vagrant/borg - . borg-env/bin/activate - git clone https://github.com/thomaswaldmann/pyinstaller.git - cd pyinstaller - # develop branch, with fixed / freshly rebuilt bootloaders - git checkout fresh-bootloader - # build bootloader, if it is not included - cd bootloader - python ./waf all - cd .. - pip install -e . - EOF + return script end def build_binary_with_pyinstaller(boxname) @@ -347,13 +333,11 @@ end def fix_perms return <<-EOF # . ~/.profile - if id "vagrant" >/dev/null 2>&1; then chown -R vagrant /vagrant/borg else chown -R ubuntu /vagrant/borg fi - EOF end @@ -381,7 +365,7 @@ Vagrant.configure(2) do |config| b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos7_64") b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos7_64") b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos7_64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("centos7_64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true) b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos7_64") end @@ -391,7 +375,7 @@ Vagrant.configure(2) do |config| b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos6_32") b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos6_32") b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos6_32") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("centos6_32") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(false) b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos6_32") end @@ -404,7 +388,7 @@ Vagrant.configure(2) do |config| b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("centos6_64") b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("centos6_64") b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("centos6_64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("centos6_64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(false) b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("centos6_64") end @@ -415,7 +399,7 @@ Vagrant.configure(2) do |config| end b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("xenial64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("xenial64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true) b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("xenial64") end @@ -426,7 +410,7 @@ Vagrant.configure(2) do |config| end b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("trusty64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("trusty64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true) b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("trusty64") end @@ -437,7 +421,7 @@ Vagrant.configure(2) do |config| end b.vm.provision "packages debianoid", :type => :shell, :inline => packages_debianoid b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("jessie64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("jessie64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true) b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("jessie64") end @@ -448,8 +432,8 @@ Vagrant.configure(2) do |config| b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy32") b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("wheezy32") b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("wheezy32") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("wheezy32") - b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("wheezy32") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true) + b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller(false) b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("wheezy32") b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("wheezy32") end @@ -461,8 +445,8 @@ Vagrant.configure(2) do |config| b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("wheezy64") b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("wheezy64") b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("wheezy64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("wheezy64") - b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("wheezy64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true) + b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller(false) b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("wheezy64") b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("wheezy64") end @@ -475,8 +459,8 @@ Vagrant.configure(2) do |config| b.vm.provision "fix pyenv", :type => :shell, :privileged => false, :inline => fix_pyenv_darwin("darwin64") b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("darwin64") b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("darwin64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("darwin64") - b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller("darwin64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true) + b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller(false) b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("darwin64") b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("darwin64") end @@ -491,8 +475,8 @@ Vagrant.configure(2) do |config| b.vm.provision "install pyenv", :type => :shell, :privileged => false, :inline => install_pyenv("freebsd") b.vm.provision "install pythons", :type => :shell, :privileged => false, :inline => install_pythons("freebsd") b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_pyenv_venv("freebsd") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg("freebsd") - b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller_bootloader("freebsd") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(true) + b.vm.provision "install pyinstaller", :type => :shell, :privileged => false, :inline => install_pyinstaller(true) b.vm.provision "build binary with pyinstaller", :type => :shell, :privileged => false, :inline => build_binary_with_pyinstaller("freebsd") b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("freebsd") end @@ -504,7 +488,7 @@ Vagrant.configure(2) do |config| end b.vm.provision "packages openbsd", :type => :shell, :inline => packages_openbsd b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("openbsd64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("openbsd64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(false) b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("openbsd64") end @@ -515,7 +499,7 @@ Vagrant.configure(2) do |config| end b.vm.provision "packages netbsd", :type => :shell, :inline => packages_netbsd b.vm.provision "build env", :type => :shell, :privileged => false, :inline => build_sys_venv("netbsd64") - b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("netbsd64") + b.vm.provision "install borg", :type => :shell, :privileged => false, :inline => install_borg(false) b.vm.provision "run tests", :type => :shell, :privileged => false, :inline => run_tests("netbsd64") end @@ -542,7 +526,7 @@ Vagrant.configure(2) do |config| b.vm.provision :reload b.vm.provision "cygwin install pip", :type => :shell, :privileged => false, :inline => install_cygwin_venv b.vm.provision "cygwin build env", :type => :shell, :privileged => false, :inline => build_sys_venv("windows10") - b.vm.provision "cygwin install borg", :type => :shell, :privileged => false, :inline => install_borg_no_fuse("windows10") + b.vm.provision "cygwin install borg", :type => :shell, :privileged => false, :inline => install_borg(false) b.vm.provision "cygwin run tests", :type => :shell, :privileged => false, :inline => run_tests("windows10") end end diff --git a/docs/changes.rst b/docs/changes.rst index 52fa39510..3cd75c4a7 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -1,8 +1,25 @@ -Changelog -========= +Important notes +=============== -Important note about pre-1.0.4 potential repo corruption --------------------------------------------------------- +This section is used for infos about e.g. security and corruption issues. + +Pre-1.0.9 potential data loss +----------------------------- + +If you have archives in your repository that were made with attic <= 0.13 +(and later migrated to borg), running borg check would report errors in these +archives. See issue #1837. + +The reason for this is a invalid (and useless) metadata key that was +always added due to a bug in these old attic versions. + +If you run borg check --repair, things escalate quickly: all archive items +with invalid metadata will be killed. Due to that attic bug, that means all +items in all archives made with these old attic versions. + + +Pre-1.0.4 potential repo corruption +----------------------------------- Some external errors (like network or disk I/O errors) could lead to corruption of the backup repository due to issue #1138. @@ -49,6 +66,69 @@ The best check that everything is ok is to run a dry-run extraction:: borg extract -v --dry-run REPO::ARCHIVE +.. _changelog: + +Changelog +========= + +Version 1.0.9rc1 (2016-11-27) +----------------------------- + +Bug fixes: + +- files cache: fix determination of newest mtime in backup set (which is + used in cache cleanup and led to wrong "A" [added] status for unchanged + files in next backup), #1860. + +- borg check: + + - fix incorrectly reporting attic 0.13 and earlier archives as corrupt + - handle repo w/o objects gracefully and also bail out early if repo is + *completely* empty, #1815. +- fix tox/pybuild in 1.0-maint +- at xattr module import time, loggers are not initialized yet + +New features: + +- borg umount + exposed already existing umount code via the CLI api, so users can use it, + which is more consistent than using borg to mount and fusermount -u (or + umount) to un-mount, #1855. +- implement borg create --noatime --noctime, fixes #1853 + +Other changes: + +- docs: + + - display README correctly on PyPI + - improve cache / index docs, esp. files cache docs, fixes #1825 + - different pattern matching for --exclude, #1779 + - datetime formatting examples for {now} placeholder, #1822 + - clarify passphrase mode attic repo upgrade, #1854 + - clarify --umask usage, #1859 + - clarify how to choose PR target branch + - clarify prune behavior for different archive contents, #1824 + - fix PDF issues, add logo, fix authors, headings, TOC + - move security verification to support section + - fix links in standalone README (:ref: tags) + - add link to security contact in README + - add FAQ about security + - move fork differences to FAQ + - add more details about resource usage +- tests: skip remote tests on cygwin, #1268 +- travis: + + - allow OS X failures until the brew cask osxfuse issue is fixed + - caskroom osxfuse-beta gone, it's osxfuse now (3.5.3) +- vagrant: + + - upgrade OSXfuse / FUSE for macOS to 3.5.3 + - remove llfuse from tox.ini at a central place + - do not try to install llfuse on centos6 + - fix fuse test for darwin, #1546 + - add windows virtual machine with cygwin + - Vagrantfile cleanup / code deduplication + Version 1.1.0b2 (2016-10-01) ---------------------------- diff --git a/docs/faq.rst b/docs/faq.rst index cb97e9216..9c9793bc0 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -540,7 +540,7 @@ Here's a (incomplete) list of some major changes: * better logging, screen output, progress indication * tested on misc. Linux systems, 32 and 64bit, FreeBSD, OpenBSD, NetBSD, Mac OS X -Please read the `ChangeLog`_ (or ``docs/changes.rst`` in the source distribution) for more +Please read the :ref:`changelog` (or ``docs/changes.rst`` in the source distribution) for more information. Borg is not compatible with original attic (but there is a one-way conversion). diff --git a/docs/internals.rst b/docs/internals.rst index 798ce8566..138761b2d 100644 --- a/docs/internals.rst +++ b/docs/internals.rst @@ -252,44 +252,94 @@ For some more general usage hints see also ``--chunker-params``. Indexes / Caches ---------------- -The **files cache** is stored in ``cache/files`` and is indexed on the -``file path hash``. At backup time, it is used to quickly determine whether we -need to chunk a given file (or whether it is unchanged and we already have all -its pieces). -It contains: +The **files cache** is stored in ``cache/files`` and is used at backup time to +quickly determine whether a given file is unchanged and we have all its chunks. -* age -* file inode number -* file size -* file mtime_ns -* file content chunk hashes +The files cache is a key -> value mapping and contains: -The inode number is stored to make sure we distinguish between +* key: + + - full, absolute file path id_hash +* value: + + - file inode number + - file size + - file mtime_ns + - list of file content chunk id hashes + - age (0 [newest], 1, 2, 3, ..., BORG_FILES_CACHE_TTL - 1) + +To determine whether a file has not changed, cached values are looked up via +the key in the mapping and compared to the current file attribute values. + +If the file's size, mtime_ns and inode number is still the same, it is +considered to not have changed. In that case, we check that all file content +chunks are (still) present in the repository (we check that via the chunks +cache). + +If everything is matching and all chunks are present, the file is not read / +chunked / hashed again (but still a file metadata item is written to the +archive, made from fresh file metadata read from the filesystem). This is +what makes borg so fast when processing unchanged files. + +If there is a mismatch or a chunk is missing, the file is read / chunked / +hashed. Chunks already present in repo won't be transferred to repo again. + +The inode number is stored and compared to make sure we distinguish between different files, as a single path may not be unique across different archives in different setups. -The files cache is stored as a python associative array storing -python objects, which generates a lot of overhead. +Not all filesystems have stable inode numbers. If that is the case, borg can +be told to ignore the inode number in the check via --ignore-inode. -The **chunks cache** is stored in ``cache/chunks`` and is indexed on the -``chunk id_hash``. It is used to determine whether we already have a specific -chunk, to count references to it and also for statistics. -It contains: +The age value is used for cache management. If a file is "seen" in a backup +run, its age is reset to 0, otherwise its age is incremented by one. +If a file was not seen in BORG_FILES_CACHE_TTL backups, its cache entry is +removed. See also: :ref:`always_chunking` and :ref:`a_status_oddity` -* reference count -* size -* encrypted/compressed size +The files cache is a python dictionary, storing python objects, which +generates a lot of overhead. -The **repository index** is stored in ``repo/index.%d`` and is indexed on the -``chunk id_hash``. It is used to determine a chunk's location in the repository. -It contains: +Borg can also work without using the files cache (saves memory if you have a +lot of files or not much RAM free), then all files are assumed to have changed. +This is usually much slower than with files cache. -* segment (that contains the chunk) -* offset (where the chunk is located in the segment) +The **chunks cache** is stored in ``cache/chunks`` and is used to determine +whether we already have a specific chunk, to count references to it and also +for statistics. + +The chunks cache is a key -> value mapping and contains: + +* key: + + - chunk id_hash +* value: + + - reference count + - size + - encrypted/compressed size + +The chunks cache is a hashindex, a hash table implemented in C and tuned for +memory efficiency. + +The **repository index** is stored in ``repo/index.%d`` and is used to +determine a chunk's location in the repository. + +The repo index is a key -> value mapping and contains: + +* key: + + - chunk id_hash +* value: + + - segment (that contains the chunk) + - offset (where the chunk is located in the segment) + +The repo index is a hashindex, a hash table implemented in C and tuned for +memory efficiency. -The repository index file is random access. Hints are stored in a file (``repo/hints.%d``). + It contains: * version @@ -314,7 +364,7 @@ varies between 33% and 300%. Indexes / Caches memory usage ----------------------------- -Here is the estimated memory usage of |project_name|: +Here is the estimated memory usage of |project_name| - it's complicated: chunk_count ~= total_file_size / 2 ^ HASH_MASK_BITS @@ -327,6 +377,14 @@ Here is the estimated memory usage of |project_name|: mem_usage ~= repo_index_usage + chunks_cache_usage + files_cache_usage = chunk_count * 164 + total_file_count * 240 +Due to the hashtables, the best/usual/worst cases for memory allocation can +be estimated like that: + + mem_allocation = mem_usage / load_factor # l_f = 0.25 .. 0.75 + + mem_allocation_peak = mem_allocation * (1 + growth_factor) # g_f = 1.1 .. 2 + + All units are Bytes. It is assuming every chunk is referenced exactly once (if you have a lot of @@ -338,6 +396,17 @@ more chunks than estimated above, because 1 file is at least 1 chunk). If a remote repository is used the repo index will be allocated on the remote side. +The chunks cache, files cache and the repo index are all implemented as hash +tables. A hash table must have a significant amount of unused entries to be +fast - the so-called load factor gives the used/unused elements ratio. + +When a hash table gets full (load factor getting too high), it needs to be +grown (allocate new, bigger hash table, copy all elements over to it, free old +hash table) - this will lead to short-time peaks in memory usage each time this +happens. Usually does not happen for all hashtables at the same time, though. +For small hash tables, we start with a growth factor of 2, which comes down to +~1.1x for big hash tables. + E.g. backing up a total count of 1 Mi (IEC binary prefix i.e. 2^20) files with a total size of 1TiB. a) with ``create --chunker-params 10,23,16,4095`` (custom, like borg < 1.0 or attic): diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 8a10ac9b9..09e79ff23 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -59,7 +59,7 @@ A step by step example -rw-r--r-- user group 7961 Mon, 2016-02-15 18:22:30 home/user/Documents/Important.doc ... -6. Restore the *Monday* archive:: +6. Restore the *Monday* archive by extracting the files relative to the current directory:: $ borg extract /path/to/repo::Monday diff --git a/docs/usage.rst b/docs/usage.rst index 912adf1f5..51fa600a8 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -42,7 +42,7 @@ Note: you may also prepend a ``file://`` to a filesystem path to get URL style. ``user@host:~other/path/to/repo`` - path relative to other's home directory Note: giving ``user@host:/./path/to/repo`` or ``user@host:/~/path/to/repo`` or -``user@host:/~other/path/to/repo``is also supported, but not required here. +``user@host:/~other/path/to/repo`` is also supported, but not required here. **Remote repositories with relative pathes, alternative syntax with port**: @@ -220,36 +220,80 @@ Resource Usage |project_name| might use a lot of resources depending on the size of the data set it is dealing with. -CPU: +If one uses |project_name| in a client/server way (with a ssh: repository), +the resource usage occurs in part on the client and in another part on the +server. + +If one uses |project_name| as a single process (with a filesystem repo), +all the resource usage occurs in that one process, so just add up client + +server to get the approximate resource usage. + +CPU client: + borg create: does chunking, hashing, compression, crypto (high CPU usage) + chunks cache sync: quite heavy on CPU, doing lots of hashtable operations. + borg extract: crypto, decompression (medium to high CPU usage) + borg check: similar to extract, but depends on options given. + borg prune / borg delete archive: low to medium CPU usage + borg delete repo: done on the server It won't go beyond 100% of 1 core as the code is currently single-threaded. Especially higher zlib and lzma compression levels use significant amounts - of CPU cycles. + of CPU cycles. Crypto might be cheap on the CPU (if hardware accelerated) or + expensive (if not). -Memory (RAM): +CPU server: + It usually doesn't need much CPU, it just deals with the key/value store + (repository) and uses the repository index for that. + + borg check: the repository check computes the checksums of all chunks + (medium CPU usage) + borg delete repo: low CPU usage + +CPU (only for client/server operation): + When using borg in a client/server way with a ssh:-type repo, the ssh + processes used for the transport layer will need some CPU on the client and + on the server due to the crypto they are doing - esp. if you are pumping + big amounts of data. + +Memory (RAM) client: The chunks index and the files index are read into memory for performance - reasons. + reasons. Might need big amounts of memory (see below). Compression, esp. lzma compression with high levels might need substantial amounts of memory. -Temporary files: - Reading data and metadata from a FUSE mounted repository will consume about - the same space as the deduplicated chunks used to represent them in the - repository. +Memory (RAM) server: + The server process will load the repository index into memory. Might need + considerable amounts of memory, but less than on the client (see below). -Cache files: - Contains the chunks index and files index (plus a compressed collection of - single-archive chunk indexes). - -Chunks index: +Chunks index (client only): Proportional to the amount of data chunks in your repo. Lots of chunks in your repo imply a big chunks index. It is possible to tweak the chunker params (see create options). -Files index: - Proportional to the amount of files in your last backup. Can be switched - off (see create options), but next backup will be much slower if you do. +Files index (client only): + Proportional to the amount of files in your last backups. Can be switched + off (see create options), but next backup might be much slower if you do. + The speed benefit of using the files cache is proportional to file size. -Network: +Repository index (server only): + Proportional to the amount of data chunks in your repo. Lots of chunks + in your repo imply a big repository index. + It is possible to tweak the chunker params (see create options) to + influence the amount of chunks being created. + +Temporary files (client): + Reading data and metadata from a FUSE mounted repository will consume up to + the size of all deduplicated, small chunks in the repository. Big chunks + won't be locally cached. + +Temporary files (server): + None. + +Cache files (client only): + Contains the chunks index and files index (plus a collection of single- + archive chunk indexes which might need huge amounts of disk space, + depending on archive count and size - see FAQ about how to reduce). + +Network (only for client/server operation): If your repository is remote, all deduplicated (and optionally compressed/ encrypted) data of course has to go over the connection (ssh: repo url). If you use a locally mounted network filesystem, additionally some copy @@ -257,7 +301,8 @@ Network: you backup multiple sources to one target repository, additional traffic happens for cache resynchronization. -In case you are interested in more details, please read the internals documentation. +In case you are interested in more details (like formulas), please see +:ref:`internals`. File systems ~~~~~~~~~~~~ @@ -386,7 +431,19 @@ Examples # Use short hostname, user name and current time in archive name $ borg create /path/to/repo::{hostname}-{user}-{now} ~ - $ borg create /path/to/repo::{hostname}-{user}-{now:%Y-%m-%d_%H:%M:%S} ~ + # Similar, use the same datetime format as borg 1.1 will have as default + $ borg create /path/to/repo::{hostname}-{user}-{now:%Y-%m-%dT%H:%M:%S} ~ + # As above, but add nanoseconds + $ borg create /path/to/repo::{hostname}-{user}-{now:%Y-%m-%dT%H:%M:%S.%f} ~ + +Notes +~~~~~ + +- the --exclude patterns are not like tar. In tar --exclude .bundler/gems will + exclude foo/.bundler/gems. In borg it will not, you need to use --exclude + '\*/.bundler/gems' to get the same effect. See ``borg help patterns`` for + more information. + .. include:: usage/extract.rst.inc @@ -705,6 +762,20 @@ Examples no key file found for repository +Upgrading a passphrase encrypted attic repo +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +attic offered a "passphrase" encryption mode, but this was removed in borg 1.0 +and replaced by the "repokey" mode (which stores the passphrase-protected +encryption key into the repository config). + +Thus, to upgrade a "passphrase" attic repo to a "repokey" borg repo, 2 steps +are needed, in this order: + +- borg upgrade repo +- borg migrate-to-repokey repo + + .. include:: usage/recreate.rst.inc Examples @@ -861,6 +932,17 @@ a new repository when changing chunker params. For more details, see :ref:`chunker_details`. + +--umask +~~~~~~~ + +If you use ``--umask``, make sure that all repository-modifying borg commands +(create, delete, prune) that access the repository in question use the same +``--umask`` value. + +If multiple machines access the same repository, this should hold true for all +of them. + --read-special ~~~~~~~~~~~~~~ diff --git a/setup.py b/setup.py index 648824573..e2998c8e0 100644 --- a/setup.py +++ b/setup.py @@ -179,6 +179,12 @@ if libb2_prefix: with open('README.rst', 'r') as fd: long_description = fd.read() + # remove badges + long_description = re.compile(r'^\.\. start-badges.*^\.\. end-badges', re.M | re.S).sub('', long_description) + # remove |substitutions| + long_description = re.compile(r'\|screencast\|').sub('', long_description) + # remove unknown directives + long_description = re.compile(r'^\.\. highlight:: \w+$', re.M).sub('', long_description) class build_usage(Command): diff --git a/src/borg/archive.py b/src/borg/archive.py index cc90f165f..73a54e17f 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -241,7 +241,7 @@ class Archive: """Failed to encode filename "{}" into file system encoding "{}". Consider configuring the LANG environment variable.""" def __init__(self, repository, key, manifest, name, cache=None, create=False, - checkpoint_interval=300, numeric_owner=False, progress=False, + checkpoint_interval=300, numeric_owner=False, noatime=False, noctime=False, progress=False, chunker_params=CHUNKER_PARAMS, start=None, end=None, compression=None, compression_files=None, consider_part_files=False): self.cwd = os.getcwd() @@ -255,6 +255,8 @@ class Archive: self.name = name self.checkpoint_interval = checkpoint_interval self.numeric_owner = numeric_owner + self.noatime = noatime + self.noctime = noctime if start is None: start = datetime.utcnow() self.chunker_params = chunker_params @@ -685,10 +687,15 @@ Number of files: {0.stats.nfiles}'''.format( mode=st.st_mode, uid=st.st_uid, gid=st.st_gid, - atime=st.st_atime_ns, - ctime=st.st_ctime_ns, mtime=st.st_mtime_ns, ) + # borg can work with archives only having mtime (older attic archives do not have + # atime/ctime). it can be useful to omit atime/ctime, if they change without the + # file content changing - e.g. to get better metadata deduplication. + if not self.noatime: + attrs['atime'] = st.st_atime_ns + if not self.noctime: + attrs['ctime'] = st.st_ctime_ns if self.numeric_owner: attrs['user'] = attrs['group'] = None else: diff --git a/src/borg/archiver.py b/src/borg/archiver.py index ffb8ea404..6b443257d 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -344,7 +344,8 @@ class Archiver: with Cache(repository, key, manifest, do_files=args.cache_files, lock_wait=self.lock_wait) as cache: archive = Archive(repository, key, manifest, args.location.archive, cache=cache, create=True, checkpoint_interval=args.checkpoint_interval, - numeric_owner=args.numeric_owner, progress=args.progress, + numeric_owner=args.numeric_owner, noatime=args.noatime, noctime=args.noctime, + progress=args.progress, chunker_params=args.chunker_params, start=t0, compression=args.compression, compression_files=args.compression_files) create_inner(archive, cache) @@ -1880,6 +1881,12 @@ class Archiver: fs_group.add_argument('--numeric-owner', dest='numeric_owner', action='store_true', default=False, help='only store numeric user and group identifiers') + fs_group.add_argument('--noatime', dest='noatime', + action='store_true', default=False, + help='do not store atime into archive') + fs_group.add_argument('--noctime', dest='noctime', + action='store_true', default=False, + help='do not store ctime into archive') fs_group.add_argument('--ignore-inode', dest='ignore_inode', action='store_true', default=False, help='ignore inode data in the file metadata cache used to detect unchanged files.') diff --git a/src/borg/cache.py b/src/borg/cache.py index 452056897..aa91e7b3d 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -217,7 +217,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" def _read_files(self): self.files = {} - self._newest_mtime = 0 + self._newest_mtime = None logger.debug('Reading files cache ...') with open(os.path.join(self.path, 'files'), 'rb') as fd: u = msgpack.Unpacker(use_list=True) @@ -254,8 +254,11 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" return pi = ProgressIndicatorMessage() if self.files is not None: - pi.output('Saving files cache') + if self._newest_mtime is None: + # was never set because no files were modified/added + self._newest_mtime = 2 ** 63 - 1 # nanoseconds, good until y2262 ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20)) + pi.output('Saving files cache') with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd: for path_hash, item in self.files.items(): # Only keep files seen in this backup that are older than newest mtime seen in this backup - @@ -484,4 +487,4 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" return entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, mtime=int_to_bigint(st.st_mtime_ns), chunk_ids=ids) self.files[path_hash] = msgpack.packb(entry) - self._newest_mtime = max(self._newest_mtime, st.st_mtime_ns) + self._newest_mtime = max(self._newest_mtime or 0, st.st_mtime_ns)