Merge branch '1.0-maint'

# Conflicts: # docs/changes.rst # docs/usage/mount.rst.inc # src/borg/archive.py # src/borg/archiver.py # src/borg/fuse.py # src/borg/testsuite/archiver.py
2026-04-22 23:01:33 -04:00 · 2016-07-11 01:23:27 +02:00 · 2016-07-11 01:23:27 +02:00 · 3baa8a3728
commit 3baa8a3728
parent f0930b89ce b8198c4ff1
18 changed files with 447 additions and 235 deletions
--- a/docs/api.rst
+++ b/docs/api.rst
@ -6,50 +6,18 @@ API Documentation
    :members:
    :undoc-members:

-.. automodule:: borg.upgrader
-    :members:
-    :undoc-members:
-
 .. automodule:: borg.archive
    :members:
    :undoc-members:

-.. automodule:: borg.fuse
-    :members:
-    :undoc-members:
-
-.. automodule:: borg.platform
-    :members:
-    :undoc-members:
-
-.. automodule:: borg.locking
-    :members:
-    :undoc-members:
-
-.. automodule:: borg.shellpattern
-    :members:
-    :undoc-members:
-
 .. automodule:: borg.repository
    :members:
    :undoc-members:

-.. automodule:: borg.lrucache
-    :members:
-    :undoc-members:
-
 .. automodule:: borg.remote
    :members:
    :undoc-members:

-.. automodule:: borg.xattr
-    :members:
-    :undoc-members:
-
-.. automodule:: borg.helpers
-    :members:
-    :undoc-members:
-
 .. automodule:: borg.cache
    :members:
    :undoc-members:
@ -62,7 +30,31 @@ API Documentation
    :members:
    :undoc-members:

-.. automodule:: borg.platform_darwin
+.. automodule:: borg.helpers
+    :members:
+    :undoc-members:
+
+.. automodule:: borg.locking
+    :members:
+    :undoc-members:
+
+.. automodule:: borg.shellpattern
+    :members:
+    :undoc-members:
+
+.. automodule:: borg.lrucache
+    :members:
+    :undoc-members:
+
+.. automodule:: borg.fuse
+    :members:
+    :undoc-members:
+
+.. automodule:: borg.xattr
+    :members:
+    :undoc-members:
+
+.. automodule:: borg.platform
    :members:
    :undoc-members:

@ -75,7 +67,7 @@ API Documentation
    :undoc-members:

 .. automodule:: borg.compress
-    :members:
+    :members: get_compressor, Compressor, CompressorBase
    :undoc-members:

 .. automodule:: borg.chunker
@ -85,7 +77,3 @@ API Documentation
 .. automodule:: borg.crypto
    :members:
    :undoc-members:
-
-.. automodule:: borg.platform_freebsd
-    :members:
-    :undoc-members:
--- a/docs/changes.rst
+++ b/docs/changes.rst
@ -123,6 +123,51 @@ Other changes:
  - ChunkBuffer: add test for leaving partial chunk in buffer, fixes #945


+Version 1.0.6rc1 (2016-07-10)
+-----------------------------
+
+New features:
+
+- borg check --repair: heal damaged files if missing chunks re-appear (e.g. if
+  the previously missing chunk was added again in a later backup archive),
+  #148. (*) Also improved logging.
+
+Bug fixes:
+
+- sync_dir: silence fsync() failing with EINVAL, #1287
+  Some network filesystems (like smbfs) don't support this and we use this in
+  repository code.
+- borg mount (FUSE):
+
+  - fix directories being shadowed when contained paths were also specified,
+    #1295
+  - raise I/O Error (EIO) on damaged files (unless -o allow_damaged_files is
+    used), #1302. (*)
+- borg extract: warn if a damaged file is extracted, #1299. (*)
+- Added some missing return code checks (ChunkIndex._add, hashindex_resize).
+- borg check: fix/optimize initial hash table size, avoids resize of the table.
+
+Other changes:
+
+- tests:
+
+  - add more FUSE tests, #1284
+  - deduplicate fuse (u)mount code
+  - fix borg binary test issues, #862
+- docs:
+
+  - changelog: added release dates to older borg releases
+  - fix some sphinx (docs generator) warnings, #881
+
+Notes:
+
+(*) Some features depend on information (chunks_healthy list) added to item
+metadata when a file with missing chunks was "repaired" using all-zero
+replacement chunks. The chunks_healthy list is generated since borg 1.0.4,
+thus borg can't recognize such "repaired" (but content-damaged) files if the
+repair was done with an older borg version.
+
+
 Version 1.0.5 (2016-07-07)
 --------------------------

@ -247,8 +292,8 @@ Other changes:
 - add .eggs to .gitignore


-Version 1.0.3
-------------
+Version 1.0.3 (2016-05-20)
+--------------------------

 Bug fixes:

@ -277,8 +322,8 @@ Other changes:
  - borg create help: document format tags, #894


-Version 1.0.2
-------------
+Version 1.0.2 (2016-04-16)
+--------------------------

 Bug fixes:

@ -313,8 +358,8 @@ Other changes:
  - fix confusing usage of "repo" as archive name (use "arch")


-Version 1.0.1
-------------
+Version 1.0.1 (2016-04-08)
+--------------------------

 New features:

@ -365,8 +410,8 @@ Other changes:
  - Document logo font. Recreate logo png. Remove GIMP logo file.


-Version 1.0.0
-------------
+Version 1.0.0 (2016-03-05)
+--------------------------

 The major release number change (0.x -> 1.x) indicates bigger incompatible
 changes, please read the compatibility notes, adapt / test your scripts and
@ -449,8 +494,8 @@ Other changes:
  - FAQ: how to limit bandwidth


-Version 1.0.0rc2
----------------
+Version 1.0.0rc2 (2016-02-28)
+-----------------------------

 New features:

@ -491,8 +536,8 @@ Other changes:
  - "connection closed by remote": add FAQ entry and point to issue #636


-Version 1.0.0rc1
----------------
+Version 1.0.0rc1 (2016-02-07)
+-----------------------------

 New features:

@ -541,8 +586,8 @@ Other changes:
  - misc. updates and fixes


-Version 0.30.0
--------------
+Version 0.30.0 (2016-01-23)
+---------------------------

 Compatibility notes:

@ -619,8 +664,8 @@ Other changes:
  - add gcc gcc-c++ to redhat/fedora/corora install docs, fixes #583


-Version 0.29.0
--------------
+Version 0.29.0 (2015-12-13)
+---------------------------

 Compatibility notes:

@ -695,8 +740,8 @@ Other changes:
  - fix wrong installation instructions for archlinux


-Version 0.28.2
--------------
+Version 0.28.2 (2015-11-15)
+---------------------------

 New features:

@ -719,8 +764,8 @@ Other changes:
  - minor install docs improvements


-Version 0.28.1
--------------
+Version 0.28.1 (2015-11-08)
+---------------------------

 Bug fixes:

@ -734,8 +779,8 @@ Other changes:
 - fix build on readthedocs


-Version 0.28.0
--------------
+Version 0.28.0 (2015-11-08)
+---------------------------

 Compatibility notes:

@ -832,8 +877,8 @@ Other changes:
  - minor development docs update


-Version 0.27.0
--------------
+Version 0.27.0 (2015-10-07)
+---------------------------

 New features:

@ -867,8 +912,8 @@ Other changes:
  - hint to single-file pyinstaller binaries from README


-Version 0.26.1
--------------
+Version 0.26.1 (2015-09-28)
+---------------------------

 This is a minor update, just docs and new pyinstaller binaries.

@ -880,8 +925,8 @@ This is a minor update, just docs and new pyinstaller binaries.
 Note: if you did a python-based installation, there is no need to upgrade.


-Version 0.26.0
--------------
+Version 0.26.0 (2015-09-19)
+---------------------------

 New features:

@ -941,8 +986,8 @@ Other changes:
  - Darwin (OS X Yosemite)


-Version 0.25.0
--------------
+Version 0.25.0 (2015-08-29)
+---------------------------

 Compatibility notes:

@ -1008,8 +1053,8 @@ Other changes:
  - split install docs into system-specific preparations and generic instructions


-Version 0.24.0
--------------
+Version 0.24.0 (2015-08-09)
+---------------------------

 Incompatible changes (compared to 0.23):

@ -1112,8 +1157,8 @@ Other changes:
 - some easy micro optimizations


-Version 0.23.0
--------------
+Version 0.23.0 (2015-06-11)
+---------------------------

 Incompatible changes (compared to attic, fork related):

--- a/docs/conf.py
+++ b/docs/conf.py
@ -55,6 +55,8 @@ version = sw_version.split('-')[0]
 # The full version, including alpha/beta/rc tags.
 release = version

+suppress_warnings = ['image.nonlocal_uri']
+
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 #language = None
--- a/docs/resources.rst
+++ b/docs/resources.rst
@ -17,21 +17,15 @@ Some of them refer to attic, but you can do the same stuff (and more) with borgb

 - `TW's slides for borgbackup talks / lightning talks <https://slides.com/thomaswaldmann>`_ (just grab the latest ones)

- "Attic / Borg Backup" talk from GPN 2015 (video, german audio, english slides):
-  `media.ccc.de <https://media.ccc.de/browse/conferences/gpn/gpn15/gpn15-6942-attic_borg_backup.html#video>`_
-  or
-  `youtube <https://www.youtube.com/watch?v=Nb5nXEKSN-k>`_
+- `Attic / Borg Backup talk from GPN 2015 (media.ccc.de) <https://media.ccc.de/browse/conferences/gpn/gpn15/gpn15-6942-attic_borg_backup.html#video>`_
+- `Attic / Borg Backup talk from GPN 2015 (youtube) <https://www.youtube.com/watch?v=Nb5nXEKSN-k>`_

- "Attic" talk from Easterhegg 2015 (video, german audio, english slides):
-  `media.ccc.de <https://media.ccc.de/v/eh15_-_49_-__-_saal_-_201504042130_-_attic_-_the_holy_grail_of_backups_-_thomas#video>`_
-  or
-  `youtube <https://www.youtube.com/watch?v=96VEAAFDtJw>`_
+- `Attic talk from Easterhegg 2015 (media.ccc.de) <https://media.ccc.de/v/eh15_-_49_-__-_saal_-_201504042130_-_attic_-_the_holy_grail_of_backups_-_thomas#video>`_
+- `Attic talk from Easterhegg 2015 (youtube) <https://www.youtube.com/watch?v=96VEAAFDtJw>`_

- "Attic Backup: Mount your encrypted backups over ssh", 2014 (video, english):
-  `youtube <https://www.youtube.com/watch?v=BVXDFv9YMp8>`_
+- `Attic Backup: Mount your encrypted backups over ssh (youtube) <https://www.youtube.com/watch?v=BVXDFv9YMp8>`_

- "Evolution of Borg", Oct 2015 (gource visualization of attic and borg development):
-  `youtube <https://www.youtube.com/watch?v=K4k_4wDkG6Q>`_
+- `Evolution of Borg (youtube) <https://www.youtube.com/watch?v=K4k_4wDkG6Q>`_

 Software
 --------
--- a/docs/usage/check.rst.inc
+++ b/docs/usage/check.rst.inc
@ -59,9 +59,12 @@ Second, the consistency and correctness of the archive metadata is verified:
 - Check if archive metadata chunk is present. if not, remove archive from
  manifest.
 - For all files (items) in the archive, for all chunks referenced by these
-  files, check if chunk is present (if not and we are in repair mode, replace
-  it with a same-size chunk of zeros). This requires reading of archive and
-  file metadata, but not data.
+  files, check if chunk is present.
+  If a chunk is not present and we are in repair mode, replace it with a same-size
+  replacement chunk of zeros.
+  If a previously lost chunk reappears (e.g. via a later backup) and we are in
+  repair mode, the all-zero replacement chunk will be replaced by the correct chunk.
+  This requires reading of archive and file metadata, but not data.
 - If we are in repair mode and we checked all the archives: delete orphaned
  chunks from the repo.
 - if you use a remote repo server via ssh:, the archive check is executed on
--- a/docs/usage/help.rst.inc
+++ b/docs/usage/help.rst.inc
@ -1,48 +1,9 @@
 .. IMPORTANT: this file is auto-generated from borg's built-in help, do not edit!

-.. _borg_placeholders:
-
-borg help placeholders
-~~~~~~~~~~~~~~~~~~~~~~
-::
-
-
-Repository (or Archive) URLs and --prefix values support these placeholders:
-
-{hostname}
-
-    The (short) hostname of the machine.
-
-{fqdn}
-
-    The full name of the machine.
-
-{now}
-
-    The current local date and time.
-
-{utcnow}
-
-    The current UTC date and time.
-
-{user}
-
-    The user name (or UID, if no name is available) of the user running borg.
-
-{pid}
-
-    The current process ID.
-
-Examples::
-
-    borg create /path/to/repo::{hostname}-{user}-{utcnow} ...
-    borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ...
-    borg prune --prefix '{hostname}-' ...
 .. _borg_patterns:

 borg help patterns
 ~~~~~~~~~~~~~~~~~~
-::


 Exclusion patterns support four separate styles, fnmatch, shell, regular
@ -131,3 +92,42 @@ Examples::
    sh:/home/*/.thumbnails
    EOF
    $ borg create --exclude-from exclude.txt backup /
+
+.. _borg_placeholders:
+
+borg help placeholders
+~~~~~~~~~~~~~~~~~~~~~~
+
+
+Repository (or Archive) URLs and --prefix values support these placeholders:
+
+{hostname}
+
+    The (short) hostname of the machine.
+
+{fqdn}
+
+    The full name of the machine.
+
+{now}
+
+    The current local date and time.
+
+{utcnow}
+
+    The current UTC date and time.
+
+{user}
+
+    The user name (or UID, if no name is available) of the user running borg.
+
+{pid}
+
+    The current process ID.
+
+Examples::
+
+    borg create /path/to/repo::{hostname}-{user}-{utcnow} ...
+    borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ...
+    borg prune --prefix '{hostname}-' ...
+
--- a/docs/usage/mount.rst.inc
+++ b/docs/usage/mount.rst.inc
@ -38,6 +38,13 @@ used in fstab entries:
 To allow a regular user to use fstab entries, add the ``user`` option:
 ``/path/to/repo /mnt/point fuse.borgfs defaults,noauto,user 0 0``

+For mount options, see the fuse(8) manual page. Additional mount options
+supported by borg:
+
+- allow_damaged_files: by default damaged files (where missing chunks were
+  replaced with runs of zeros by borg check --repair) are not readable and
+  return EIO (I/O error). Set this option to read such files.
+
 The BORG_MOUNT_DATA_CACHE_ENTRIES environment variable is meant for advanced users
 to tweak the performance. It sets the number of cached data chunks; additional
 memory usage can be up to ~8 MiB times this number. The default is the number
--- a/setup.py
+++ b/setup.py
@ -187,7 +187,7 @@ class build_usage(Command):
                        params = {"topic": topic,
                                  "underline": '~' * len('borg help ' + topic)}
                        doc.write(".. _borg_{topic}:\n\n".format(**params))
-                        doc.write("borg help {topic}\n{underline}\n::\n\n".format(**params))
+                        doc.write("borg help {topic}\n{underline}\n\n".format(**params))
                        doc.write(Archiver.helptext[topic])
                else:
                    params = {"command": command,
--- a/src/borg/_hashindex.c
+++ b/src/borg/_hashindex.c
@ -100,6 +100,8 @@ static int hashindex_delete(HashIndex *index, const void *key);
 static void *hashindex_next_key(HashIndex *index, const void *key);

 /* Private API */
+static void hashindex_free(HashIndex *index);
+
 static int
 hashindex_index(HashIndex *index, const void *key)
 {
@ -148,7 +150,11 @@ hashindex_resize(HashIndex *index, int capacity)
        return 0;
    }
    while((key = hashindex_next_key(index, key))) {
-        hashindex_set(new, key, key + key_size);
+        if(!hashindex_set(new, key, key + key_size)) {
+            /* This can only happen if there's a bug in the code calculating capacity */
+            hashindex_free(new);
+            return 0;
+        }
    }
    free(index->buckets);
    index->buckets = new->buckets;
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@ -419,6 +419,7 @@ Number of files: {0.stats.nfiles}'''.format(
        :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
        :param original_path: 'path' key as stored in archive
        """
+        has_damaged_chunks = 'chunks_healthy' in item
        if dry_run or stdout:
            if 'chunks' in item:
                for _, data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True):
@ -426,6 +427,9 @@ Number of files: {0.stats.nfiles}'''.format(
                        sys.stdout.buffer.write(data)
                if stdout:
                    sys.stdout.buffer.flush()
+            if has_damaged_chunks:
+                logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' %
+                               remove_surrogates(item[b'path']))
            return

        original_path = original_path or item.path
@ -481,6 +485,9 @@ Number of files: {0.stats.nfiles}'''.format(
                    fd.truncate(pos)
                    fd.flush()
                    self.restore_attrs(path, item, fd=fd.fileno())
+            if has_damaged_chunks:
+                logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' %
+                               remove_surrogates(item.path))
            if hardlink_masters:
                # Update master entry with extracted file path, so that following hardlinks don't extract twice.
                hardlink_masters[item.get('source') or original_path] = (None, path)
@ -924,7 +931,7 @@ class ArchiveChecker:
        """
        # Explicitly set the initial hash table capacity to avoid performance issues
        # due to hash table "resonance"
-        capacity = int(len(self.repository) * 1.2)
+        capacity = int(len(self.repository) * 1.35 + 1)  # > len * 1.0 / HASH_MAX_LOAD (see _hashindex.c)
        self.chunks = ChunkIndex(capacity)
        marker = None
        while True:
@ -1033,31 +1040,53 @@ class ArchiveChecker:
                    self.repository.put(id_, cdata)

        def verify_file_chunks(item):
-            """Verifies that all file chunks are present
+            """Verifies that all file chunks are present.

-            Missing file chunks will be replaced with new chunks of the same
-            length containing all zeros.
+            Missing file chunks will be replaced with new chunks of the same length containing all zeros.
+            If a previously missing file chunk re-appears, the replacement chunk is replaced by the correct one.
            """
            offset = 0
            chunk_list = []
            chunks_replaced = False
-            for chunk_id, size, csize in item.chunks:
+            has_chunks_healthy = 'chunks_healthy' in item
+            chunks_current = item.chunks
+            chunks_healthy = item.chunks_healthy if has_chunks_healthy else chunks_current
+            assert len(chunks_current) == len(chunks_healthy)
+            for chunk_current, chunk_healthy in zip(chunks_current, chunks_healthy):
+                chunk_id, size, csize = chunk_healthy
                if chunk_id not in self.chunks:
-                    # If a file chunk is missing, create an all empty replacement chunk
-                    logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(item.path, offset, offset + size))
-                    self.error_found = chunks_replaced = True
-                    data = bytes(size)
-                    chunk_id = self.key.id_hash(data)
-                    cdata = self.key.encrypt(Chunk(data))
-                    csize = len(cdata)
-                    add_reference(chunk_id, size, csize, cdata)
+                    # a chunk of the healthy list is missing
+                    if chunk_current == chunk_healthy:
+                        logger.error('{}: New missing file chunk detected (Byte {}-{}). '
+                                     'Replacing with all-zero chunk.'.format(item.path, offset, offset + size))
+                        self.error_found = chunks_replaced = True
+                        data = bytes(size)
+                        chunk_id = self.key.id_hash(data)
+                        cdata = self.key.encrypt(Chunk(data))
+                        csize = len(cdata)
+                        add_reference(chunk_id, size, csize, cdata)
+                    else:
+                        logger.info('{}: Previously missing file chunk is still missing (Byte {}-{}). It has a '
+                                    'all-zero replacement chunk already.'.format(item.path, offset, offset + size))
+                        chunk_id, size, csize = chunk_current
+                        add_reference(chunk_id, size, csize)
                else:
-                    add_reference(chunk_id, size, csize)
-                chunk_list.append((chunk_id, size, csize))
+                    if chunk_current == chunk_healthy:
+                        # normal case, all fine.
+                        add_reference(chunk_id, size, csize)
+                    else:
+                        logger.info('{}: Healed previously missing file chunk! '
+                                    '(Byte {}-{}).'.format(item.path, offset, offset + size))
+                        add_reference(chunk_id, size, csize)
+                        mark_as_possibly_superseded(chunk_current[0])  # maybe orphaned the all-zero replacement chunk
+                chunk_list.append([chunk_id, size, csize])  # list-typed element as chunks_healthy is list-of-lists
                offset += size
-            if chunks_replaced and 'chunks_healthy' not in item:
+            if chunks_replaced and not has_chunks_healthy:
                # if this is first repair, remember the correct chunk IDs, so we can maybe heal the file later
                item.chunks_healthy = item.chunks
+            if has_chunks_healthy and chunk_list == chunks_healthy:
+                logger.info('{}: Completely healed previously damaged file!'.format(item.path))
+                del item.chunks_healthy
            item.chunks = chunk_list

        def robust_iterator(archive):
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@ -1134,8 +1134,7 @@ class Archiver:
            re:^/home/[^/]\.tmp/
            sh:/home/*/.thumbnails
            EOF
-            $ borg create --exclude-from exclude.txt backup /
-        ''')
+            $ borg create --exclude-from exclude.txt backup /\n\n''')
    helptext['placeholders'] = textwrap.dedent('''
        Repository (or Archive) URLs and --prefix values support these placeholders:

@ -1167,8 +1166,7 @@ class Archiver:

            borg create /path/to/repo::{hostname}-{user}-{utcnow} ...
            borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S} ...
-            borg prune --prefix '{hostname}-' ...
-        ''')
+            borg prune --prefix '{hostname}-' ...\n\n''')

    def do_help(self, parser, commands, args):
        if not args.topic:
@ -1331,9 +1329,12 @@ class Archiver:
        - Check if archive metadata chunk is present. if not, remove archive from
          manifest.
        - For all files (items) in the archive, for all chunks referenced by these
-          files, check if chunk is present (if not and we are in repair mode, replace
-          it with a same-size chunk of zeros). This requires reading of archive and
-          file metadata, but not data.
+          files, check if chunk is present.
+          If a chunk is not present and we are in repair mode, replace it with a same-size
+          replacement chunk of zeros.
+          If a previously lost chunk reappears (e.g. via a later backup) and we are in
+          repair mode, the all-zero replacement chunk will be replaced by the correct chunk.
+          This requires reading of archive and file metadata, but not data.
        - If we are in repair mode and we checked all the archives: delete orphaned
          chunks from the repo.
        - if you use a remote repo server via ssh:, the archive check is executed on
@ -1739,6 +1740,13 @@ class Archiver:
        To allow a regular user to use fstab entries, add the ``user`` option:
        ``/path/to/repo /mnt/point fuse.borgfs defaults,noauto,user 0 0``

+        For mount options, see the fuse(8) manual page. Additional mount options
+        supported by borg:
+
+        - allow_damaged_files: by default damaged files (where missing chunks were
+          replaced with runs of zeros by borg check --repair) are not readable and
+          return EIO (I/O error). Set this option to read such files.
+
        The BORG_MOUNT_DATA_CACHE_ENTRIES environment variable is meant for advanced users
        to tweak the performance. It sets the number of cached data chunks; additional
        memory usage can be up to ~8 MiB times this number. The default is the number
--- a/src/borg/fuse.py
+++ b/src/borg/fuse.py
@ -14,7 +14,7 @@ from .logger import create_logger
 logger = create_logger()

 from .archive import Archive
-from .helpers import daemonize
+from .helpers import daemonize, safe_encode
 from .item import Item
 from .lrucache import LRUCache

@ -50,6 +50,9 @@ class ItemCache:
 class FuseOperations(llfuse.Operations):
    """Export archive as a fuse filesystem
    """
+
+    allow_damaged_files = False
+
    def __init__(self, key, repository, manifest, archive, cached_repo):
        super().__init__()
        self._inode_count = 0
@ -79,6 +82,32 @@ class FuseOperations(llfuse.Operations):
                self.contents[1][os.fsencode(archive_name)] = archive_inode
                self.pending_archives[archive_inode] = Archive(repository, key, manifest, archive_name)

+    def mount(self, mountpoint, mount_options, foreground=False):
+        """Mount filesystem on *mountpoint* with *mount_options*."""
+        options = ['fsname=borgfs', 'ro']
+        if mount_options:
+            options.extend(mount_options.split(','))
+        try:
+            options.remove('allow_damaged_files')
+            self.allow_damaged_files = True
+        except ValueError:
+            pass
+        llfuse.init(self, mountpoint, options)
+        if not foreground:
+            daemonize()
+
+        # If the file system crashes, we do not want to umount because in that
+        # case the mountpoint suddenly appears to become empty. This can have
+        # nasty consequences, imagine the user has e.g. an active rsync mirror
+        # job - seeing the mountpoint empty, rsync would delete everything in the
+        # mirror.
+        umount = False
+        try:
+            signal = fuse_main()
+            umount = (signal is None)  # no crash and no signal -> umount request
+        finally:
+            llfuse.close(umount)
+
    def process_archive(self, archive, prefix=[]):
        """Build fuse inode hierarchy from archive metadata
        """
@ -88,6 +117,16 @@ class FuseOperations(llfuse.Operations):
            unpacker.feed(data)
            for item in unpacker:
                item = Item(internal_dict=item)
+                try:
+                    # This can happen if an archive was created with a command line like
+                    # $ borg create ... dir1/file dir1
+                    # In this case the code below will have created a default_dir inode for dir1 already.
+                    inode = self._find_inode(safe_encode(item.path), prefix)
+                except KeyError:
+                    pass
+                else:
+                    self.items[inode] = item
+                    continue
                segments = prefix + os.fsencode(os.path.normpath(item.path)).split(b'/')
                del item.path
                num_segments = len(segments)
@ -214,6 +253,15 @@ class FuseOperations(llfuse.Operations):
        return self.getattr(inode)

    def open(self, inode, flags, ctx=None):
+        if not self.allow_damaged_files:
+            item = self.get_item(inode)
+            if 'chunks_healthy' in item:
+                # Processed archive items don't carry the path anymore; for converting the inode
+                # to the path we'd either have to store the inverse of the current structure,
+                # or search the entire archive. So we just don't print it. It's easy to correlate anyway.
+                logger.warning('File has damaged (all-zero) chunks. Try running borg check --repair. '
+                               'Mount with allow_damaged_files to read damaged files.')
+                raise llfuse.FUSEError(errno.EIO)
        return inode

    def opendir(self, inode, ctx=None):
@ -254,23 +302,3 @@ class FuseOperations(llfuse.Operations):
    def readlink(self, inode, ctx=None):
        item = self.get_item(inode)
        return os.fsencode(item.source)
-
-    def mount(self, mountpoint, extra_options, foreground=False):
-        options = ['fsname=borgfs', 'ro']
-        if extra_options:
-            options.extend(extra_options.split(','))
-        llfuse.init(self, mountpoint, options)
-        if not foreground:
-            daemonize()
-
-        # If the file system crashes, we do not want to umount because in that
-        # case the mountpoint suddenly appears to become empty. This can have
-        # nasty consequences, imagine the user has e.g. an active rsync mirror
-        # job - seeing the mountpoint empty, rsync would delete everything in the
-        # mirror.
-        umount = False
-        try:
-            signal = fuse_main()
-            umount = (signal is None)  # no crash and no signal -> umount request
-        finally:
-            llfuse.close(umount)
--- a/src/borg/hashindex.pyx
+++ b/src/borg/hashindex.pyx
@ -18,8 +18,6 @@ cdef extern from "_hashindex.c":
    HashIndex *hashindex_read(char *path)
    HashIndex *hashindex_init(int capacity, int key_size, int value_size)
    void hashindex_free(HashIndex *index)
-    void hashindex_merge(HashIndex *index, HashIndex *other)
-    void hashindex_add(HashIndex *index, void *key, void *value)
    int hashindex_get_size(HashIndex *index)
    int hashindex_write(HashIndex *index, char *path)
    void *hashindex_get(HashIndex *index, void *key)
@ -323,7 +321,8 @@ cdef class ChunkIndex(IndexBase):
            values[1] = data[1]
            values[2] = data[2]
        else:
-            hashindex_set(self.index, key, data)
+            if not hashindex_set(self.index, key, data):
+                raise Exception('hashindex_set failed')

    def merge(self, ChunkIndex other):
        cdef void *key = NULL
--- a/src/borg/helpers.py
+++ b/src/borg/helpers.py
@ -927,8 +927,7 @@ def yes(msg=None, false_msg=None, true_msg=None, default_msg=None,
        retry_msg=None, invalid_msg=None, env_msg=None,
        falsish=FALSISH, truish=TRUISH, defaultish=DEFAULTISH,
        default=False, retry=True, env_var_override=None, ofile=None, input=input):
-    """
-    Output <msg> (usually a question) and let user input an answer.
+    """Output <msg> (usually a question) and let user input an answer.
    Qualifies the answer according to falsish, truish and defaultish as True, False or <default>.
    If it didn't qualify and retry_msg is None (no retries wanted),
    return the default [which defaults to False]. Otherwise let user retry
@ -1112,7 +1111,7 @@ def log_multi(*msgs, level=logging.INFO, logger=logger):
    """
    log multiple lines of text, each line by a separate logging call for cosmetic reasons

-    each positional argument may be a single or multiple lines (separated by \n) of text.
+    each positional argument may be a single or multiple lines (separated by newlines) of text.
    """
    lines = []
    for msg in msgs:
--- a/src/borg/locking.py
+++ b/src/borg/locking.py
@ -101,9 +101,11 @@ class NotMyLock(LockErrorT):
 class ExclusiveLock:
    """An exclusive Lock based on mkdir fs operation being atomic.

-    If possible, try to use the contextmanager here like:
-    with ExclusiveLock(...) as lock:
-        ...
+    If possible, try to use the contextmanager here like::
+
+        with ExclusiveLock(...) as lock:
+            ...
+
    This makes sure the lock is released again if the block is left, no
    matter how (e.g. if an exception occurred).
    """
@ -222,9 +224,11 @@ class UpgradableLock:
    noone is allowed reading) and read access to a resource needs a shared
    lock (multiple readers are allowed).

-    If possible, try to use the contextmanager here like:
-    with UpgradableLock(...) as lock:
-        ...
+    If possible, try to use the contextmanager here like::
+
+        with UpgradableLock(...) as lock:
+            ...
+
    This makes sure the lock is released again if the block is left, no
    matter how (e.g. if an exception occurred).
    """
--- a/src/borg/shellpattern.py
+++ b/src/borg/shellpattern.py
@ -5,7 +5,7 @@ import re
 def translate(pat):
    """Translate a shell-style pattern to a regular expression.

-    The pattern may include "**<sep>" (<sep> stands for the platform-specific path separator; "/" on POSIX systems) for
+    The pattern may include ``**<sep>`` (<sep> stands for the platform-specific path separator; "/" on POSIX systems) for
    matching zero or more directory levels and "*" for matching zero or more arbitrary characters with the exception of
    any path separator. Wrap meta-characters in brackets for a literal match (i.e. "[?]" to match the literal character
    "?").
--- a/src/borg/testsuite/init.py
+++ b/src/borg/testsuite/init.py
@ -116,6 +116,24 @@ class BaseTestCase(unittest.TestCase):
        for sub_diff in diff.subdirs.values():
            self._assert_dirs_equal_cmp(sub_diff)

+    @contextmanager
+    def fuse_mount(self, location, mountpoint, mount_options=None):
+        os.mkdir(mountpoint)
+        args = ['mount', location, mountpoint]
+        if mount_options:
+            args += '-o', mount_options
+        self.cmd(*args, fork=True)
+        self.wait_for_mount(mountpoint)
+        yield
+        if sys.platform.startswith('linux'):
+            cmd = 'fusermount -u %s' % mountpoint
+        else:
+            cmd = 'umount %s' % mountpoint
+        os.system(cmd)
+        os.rmdir(mountpoint)
+        # Give the daemon some time to exit
+        time.sleep(.2)
+
    def wait_for_mount(self, path, timeout=5):
        """Wait until a filesystem is mounted on `path`
        """
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@ -223,7 +223,8 @@ class ArchiverTestCaseBase(BaseTestCase):

    def tearDown(self):
        os.chdir(self._old_wd)
-        shutil.rmtree(self.tmpdir)
+        # note: ignore_errors=True as workaround for issue #862
+        shutil.rmtree(self.tmpdir, ignore_errors=True)

    def cmd(self, *args, **kw):
        exit_code = kw.pop('exit_code', 0)
@ -239,6 +240,13 @@ class ArchiverTestCaseBase(BaseTestCase):
    def create_src_archive(self, name):
        self.cmd('create', self.repository_location + '::' + name, src_dir)

+    def open_archive(self, name):
+        repository = Repository(self.repository_path)
+        with repository:
+            manifest, key = Manifest.load(repository)
+            archive = Archive(repository, key, manifest, name)
+        return archive, repository
+
    def create_regular_file(self, name, size=0, contents=None):
        filename = os.path.join(self.input_path, name)
        if not os.path.exists(os.path.dirname(filename)):
@ -1283,52 +1291,96 @@ class ArchiverTestCase(ArchiverTestCaseBase):
        assert 'This command initializes' not in self.cmd('help', 'init', '--usage-only')

    @unittest.skipUnless(has_llfuse, 'llfuse not installed')
-    def test_fuse_mount_repository(self):
-        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
-        os.mkdir(mountpoint)
+    def test_fuse(self):
        self.cmd('init', self.repository_location)
        self.create_test_files()
        self.cmd('create', self.repository_location + '::archive', 'input')
        self.cmd('create', self.repository_location + '::archive2', 'input')
-        try:
-            self.cmd('mount', self.repository_location, mountpoint, fork=True)
-            self.wait_for_mount(mountpoint)
-            if has_lchflags:
-                # remove the file we did not backup, so input and output become equal
-                os.remove(os.path.join('input', 'flagfile'))
+        if has_lchflags:
+            # remove the file we did not backup, so input and output become equal
+            os.remove(os.path.join('input', 'flagfile'))
+        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
+        # mount the whole repository, archive contents shall show up in archivename subdirs of mountpoint:
+        with self.fuse_mount(self.repository_location, mountpoint):
            self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive', 'input'))
            self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive2', 'input'))
-        finally:
-            if sys.platform.startswith('linux'):
-                os.system('fusermount -u ' + mountpoint)
+        # mount only 1 archive, its contents shall show up directly in mountpoint:
+        with self.fuse_mount(self.repository_location + '::archive', mountpoint):
+            self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input'))
+            # regular file
+            in_fn = 'input/file1'
+            out_fn = os.path.join(mountpoint, 'input', 'file1')
+            # stat
+            sti1 = os.stat(in_fn)
+            sto1 = os.stat(out_fn)
+            assert sti1.st_mode == sto1.st_mode
+            assert sti1.st_uid == sto1.st_uid
+            assert sti1.st_gid == sto1.st_gid
+            assert sti1.st_size == sto1.st_size
+            assert sti1.st_atime == sto1.st_atime
+            assert sti1.st_ctime == sto1.st_ctime
+            assert sti1.st_mtime == sto1.st_mtime
+            # note: there is another hardlink to this, see below
+            assert sti1.st_nlink == sto1.st_nlink == 2
+            # read
+            with open(in_fn, 'rb') as in_f, open(out_fn, 'rb') as out_f:
+                assert in_f.read() == out_f.read()
+            # list/read xattrs
+            if xattr.is_enabled(self.input_path):
+                assert xattr.listxattr(out_fn) == ['user.foo', ]
+                assert xattr.getxattr(out_fn, 'user.foo') == b'bar'
            else:
-                os.system('umount ' + mountpoint)
-            os.rmdir(mountpoint)
-            # Give the daemon some time to exit
-            time.sleep(.2)
+                assert xattr.listxattr(out_fn) == []
+                try:
+                    xattr.getxattr(out_fn, 'user.foo')
+                except OSError as e:
+                    assert e.errno == llfuse.ENOATTR
+                else:
+                    assert False, "expected OSError(ENOATTR), but no error was raised"
+            # hardlink (to 'input/file1')
+            in_fn = 'input/hardlink'
+            out_fn = os.path.join(mountpoint, 'input', 'hardlink')
+            sti2 = os.stat(in_fn)
+            sto2 = os.stat(out_fn)
+            assert sti2.st_nlink == sto2.st_nlink == 2
+            assert sto1.st_ino == sto2.st_ino
+            # symlink
+            in_fn = 'input/link1'
+            out_fn = os.path.join(mountpoint, 'input', 'link1')
+            sti = os.stat(in_fn, follow_symlinks=False)
+            sto = os.stat(out_fn, follow_symlinks=False)
+            assert stat.S_ISLNK(sti.st_mode)
+            assert stat.S_ISLNK(sto.st_mode)
+            assert os.readlink(in_fn) == os.readlink(out_fn)
+            # FIFO
+            out_fn = os.path.join(mountpoint, 'input', 'fifo1')
+            sto = os.stat(out_fn)
+            assert stat.S_ISFIFO(sto.st_mode)

    @unittest.skipUnless(has_llfuse, 'llfuse not installed')
-    def test_fuse_mount_archive(self):
-        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
-        os.mkdir(mountpoint)
+    def test_fuse_allow_damaged_files(self):
        self.cmd('init', self.repository_location)
-        self.create_test_files()
-        self.cmd('create', self.repository_location + '::archive', 'input')
-        try:
-            self.cmd('mount', self.repository_location + '::archive', mountpoint, fork=True)
-            self.wait_for_mount(mountpoint)
-            if has_lchflags:
-                # remove the file we did not backup, so input and output become equal
-                os.remove(os.path.join('input', 'flagfile'))
-            self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input'))
-        finally:
-            if sys.platform.startswith('linux'):
-                os.system('fusermount -u ' + mountpoint)
+        self.create_src_archive('archive')
+        # Get rid of a chunk and repair it
+        archive, repository = self.open_archive('archive')
+        with repository:
+            for item in archive.iter_items():
+                if item.path.endswith('testsuite/archiver.py'):
+                    repository.delete(item.chunks[-1].id)
+                    path = item.path  # store full path for later
+                    break
            else:
-                os.system('umount ' + mountpoint)
-            os.rmdir(mountpoint)
-            # Give the daemon some time to exit
-            time.sleep(.2)
+                assert False  # missed the file
+            repository.commit()
+        self.cmd('check', '--repair', self.repository_location, exit_code=0)
+
+        mountpoint = os.path.join(self.tmpdir, 'mountpoint')
+        with self.fuse_mount(self.repository_location + '::archive', mountpoint):
+            with pytest.raises(OSError) as excinfo:
+                open(os.path.join(mountpoint, path))
+            assert excinfo.value.errno == errno.EIO
+        with self.fuse_mount(self.repository_location + '::archive', mountpoint, 'allow_damaged_files'):
+            open(os.path.join(mountpoint, path)).close()

    def verify_aes_counter_uniqueness(self, method):
        seen = set()  # Chunks already seen
@ -1633,6 +1685,14 @@ class ArchiverTestCaseBinary(ArchiverTestCase):
    def test_recreate_changed_source(self):
        pass

+    @unittest.skip('test_basic_functionality seems incompatible with fakeroot and/or the binary.')
+    def test_basic_functionality(self):
+        pass
+
+    @unittest.skip('test_overwrite seems incompatible with fakeroot and/or the binary.')
+    def test_overwrite(self):
+        pass
+

 class ArchiverCheckTestCase(ArchiverTestCaseBase):

@ -1643,13 +1703,6 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
            self.create_src_archive('archive1')
            self.create_src_archive('archive2')

-    def open_archive(self, name):
-        repository = Repository(self.repository_path)
-        with repository:
-            manifest, key = Manifest.load(repository)
-            archive = Archive(repository, key, manifest, name)
-        return archive, repository
-
    def test_check_usage(self):
        output = self.cmd('check', '-v', '--progress', self.repository_location, exit_code=0)
        self.assert_in('Starting repository check', output)
@ -1672,12 +1725,45 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
        with repository:
            for item in archive.iter_items():
                if item.path.endswith('testsuite/archiver.py'):
-                    repository.delete(item.chunks[-1].id)
+                    valid_chunks = item.chunks
+                    killed_chunk = valid_chunks[-1]
+                    repository.delete(killed_chunk.id)
                    break
+            else:
+                self.assert_true(False)  # should not happen
            repository.commit()
        self.cmd('check', self.repository_location, exit_code=1)
-        self.cmd('check', '--repair', self.repository_location, exit_code=0)
+        output = self.cmd('check', '--repair', self.repository_location, exit_code=0)
+        self.assert_in('New missing file chunk detected', output)
        self.cmd('check', self.repository_location, exit_code=0)
+        # check that the file in the old archives has now a different chunk list without the killed chunk
+        for archive_name in ('archive1', 'archive2'):
+            archive, repository = self.open_archive(archive_name)
+            with repository:
+                for item in archive.iter_items():
+                    if item.path.endswith('testsuite/archiver.py'):
+                        self.assert_not_equal(valid_chunks, item.chunks)
+                        self.assert_not_in(killed_chunk, item.chunks)
+                        break
+                else:
+                    self.assert_true(False)  # should not happen
+        # do a fresh backup (that will include the killed chunk)
+        with patch.object(ChunkBuffer, 'BUFFER_SIZE', 10):
+            self.create_src_archive('archive3')
+        # check should be able to heal the file now:
+        output = self.cmd('check', '-v', '--repair', self.repository_location, exit_code=0)
+        self.assert_in('Healed previously missing file chunk', output)
+        self.assert_in('testsuite/archiver.py: Completely healed previously damaged file!', output)
+        # check that the file in the old archives has the correct chunks again
+        for archive_name in ('archive1', 'archive2'):
+            archive, repository = self.open_archive(archive_name)
+            with repository:
+                for item in archive.iter_items():
+                    if item.path.endswith('testsuite/archiver.py'):
+                        self.assert_equal(valid_chunks, item.chunks)
+                        break
+                else:
+                    self.assert_true(False)  # should not happen

    def test_missing_archive_item_chunk(self):
        archive, repository = self.open_archive('archive1')
@ -1762,11 +1848,7 @@ class RemoteArchiverTestCase(ArchiverTestCase):
    # this was introduced because some tests expect stderr contents to show up
    # in "output" also. Also, the non-forking exec_cmd catches both, too.
    @unittest.skip('deadlock issues')
-    def test_fuse_mount_repository(self):
-        pass
-
-    @unittest.skip('deadlock issues')
-    def test_fuse_mount_archive(self):
+    def test_fuse(self):
        pass

    @unittest.skip('only works locally')