Merge pull request #2568 from enkore/issue/1101.integration.cache

1101.integration.cache
2026-06-09 00:32:37 -04:00 · 2017-05-31 19:34:43 +02:00 · 2017-05-31 19:34:43 +02:00 · 349a4ade7c
commit 349a4ade7c
parent 578b76af3a 9032aa062b
7 changed files with 277 additions and 68 deletions
--- a/src/borg/_hashindex.c
+++ b/src/borg/_hashindex.c
@ -291,6 +291,20 @@ hashindex_read(PyObject *file_py)
        goto fail_decref_header;
    }

+    /*
+     * Hash the header
+     * If the header is corrupted this bails before doing something stupid (like allocating 3.8 TB of memory)
+     */
+    Py_XDECREF(PyObject_CallMethod(file_py, "hash_part", "s", "HashHeader"));
+    if(PyErr_Occurred()) {
+        if(PyErr_ExceptionMatches(PyExc_AttributeError)) {
+            /* Be able to work with regular file objects which do not have a hash_part method. */
+            PyErr_Clear();
+        } else {
+            goto fail_decref_header;
+        }
+    }
+
    /* Find length of file */
    length_object = PyObject_CallMethod(file_py, "seek", "ni", (Py_ssize_t)0, SEEK_END);
    if(PyErr_Occurred()) {
@ -473,6 +487,19 @@ hashindex_write(HashIndex *index, PyObject *file_py)
        return;
    }

+    /*
+     * Hash the header
+     */
+    Py_XDECREF(PyObject_CallMethod(file_py, "hash_part", "s", "HashHeader"));
+    if(PyErr_Occurred()) {
+        if(PyErr_ExceptionMatches(PyExc_AttributeError)) {
+            /* Be able to work with regular file objects which do not have a hash_part method. */
+            PyErr_Clear();
+        } else {
+            return;
+        }
+    }
+
    /* Note: explicitly construct view; BuildValue can convert (pointer, length) to Python objects, but copies them for doing so */
    buckets_view = PyMemoryView_FromMemory((char*)index->buckets, buckets_length, PyBUF_READ);
    if(!buckets_view) {
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@ -22,8 +22,10 @@ from .helpers import safe_ns
 from .helpers import yes, hostname_is_unique
 from .helpers import remove_surrogates
 from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage
+from .helpers import set_ec, EXIT_WARNING
 from .item import ArchiveItem, ChunkListEntry
 from .crypto.key import PlaintextKey
+from .crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError
 from .locking import Lock
 from .platform import SaveFile
 from .remote import cache_if_remote
@ -237,6 +239,8 @@ class CacheConfig:
        config.set('cache', 'version', '1')
        config.set('cache', 'repository', self.repository.id_str)
        config.set('cache', 'manifest', '')
+        config.add_section('integrity')
+        config.set('integrity', 'manifest', '')
        with SaveFile(self.config_path) as fd:
            config.write(fd)

@ -253,6 +257,20 @@ class CacheConfig:
        self.manifest_id = unhexlify(self._config.get('cache', 'manifest'))
        self.timestamp = self._config.get('cache', 'timestamp', fallback=None)
        self.key_type = self._config.get('cache', 'key_type', fallback=None)
+        try:
+            self.integrity = dict(self._config.items('integrity'))
+            if self._config.get('cache', 'manifest') != self.integrity.pop('manifest'):
+                # The cache config file is updated (parsed with ConfigParser, the state of the ConfigParser
+                # is modified and then written out.), not re-created.
+                # Thus, older versions will leave our [integrity] section alone, making the section's data invalid.
+                # Therefore, we also add the manifest ID to this section and
+                # can discern whether an older version interfered by comparing the manifest IDs of this section
+                # and the main [cache] section.
+                self.integrity = {}
+                logger.warning('Cache integrity data not available: old Borg version modified the cache.')
+        except configparser.NoSectionError:
+            logger.debug('Cache integrity: No integrity data found (files, chunks). Cache is from old version.')
+            self.integrity = {}
        previous_location = self._config.get('cache', 'previous_location', fallback=None)
        if previous_location:
            self.previous_location = recanonicalize_relative_location(previous_location, self.repository)
@ -263,6 +281,11 @@ class CacheConfig:
        if manifest:
            self._config.set('cache', 'manifest', manifest.id_str)
            self._config.set('cache', 'timestamp', manifest.timestamp)
+            if not self._config.has_section('integrity'):
+                self._config.add_section('integrity')
+            for file, integrity_data in self.integrity.items():
+                self._config.set('integrity', file, integrity_data)
+            self._config.set('integrity', 'manifest', manifest.id_str)
        if key:
            self._config.set('cache', 'key_type', str(key.TYPE))
        self._config.set('cache', 'previous_location', self.repository._location.canonical_path())
@ -392,14 +415,16 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
        with open(os.path.join(self.path, 'README'), 'w') as fd:
            fd.write(CACHE_README)
        self.cache_config.create()
-        ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8'))
+        ChunkIndex().write(os.path.join(self.path, 'chunks'))
        os.makedirs(os.path.join(self.path, 'chunks.archive.d'))
        with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd:
            pass  # empty file

    def _do_open(self):
        self.cache_config.load()
-        self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8'))
+        with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=False,
+                                  integrity_data=self.cache_config.integrity.get('chunks')) as fd:
+            self.chunks = ChunkIndex.read(fd)
        self.files = None

    def open(self):
@ -417,7 +442,9 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
        self.files = {}
        self._newest_mtime = None
        logger.debug('Reading files cache ...')
-        with open(os.path.join(self.path, 'files'), 'rb') as fd:
+
+        with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=False,
+                                  integrity_data=self.cache_config.integrity.get('files')) as fd:
            u = msgpack.Unpacker(use_list=True)
            while True:
                data = fd.read(64 * 1024)
@ -458,7 +485,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                self._newest_mtime = 2 ** 63 - 1  # nanoseconds, good until y2262
            ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20))
            pi.output('Saving files cache')
-            with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd:
+            with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=True) as fd:
                for path_hash, item in self.files.items():
                    # Only keep files seen in this backup that are older than newest mtime seen in this backup -
                    # this is to avoid issues with filesystem snapshots and mtime granularity.
@ -467,10 +494,13 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                    if entry.age == 0 and bigint_to_int(entry.mtime) < self._newest_mtime or \
                       entry.age > 0 and entry.age < ttl:
                        msgpack.pack((path_hash, entry), fd)
+            self.cache_config.integrity['files'] = fd.integrity_data
+        pi.output('Saving chunks cache')
+        with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=True) as fd:
+            self.chunks.write(fd)
+        self.cache_config.integrity['chunks'] = fd.integrity_data
        pi.output('Saving cache config')
        self.cache_config.save(self.manifest, self.key)
-        pi.output('Saving chunks cache')
-        self.chunks.write(os.path.join(self.path, 'chunks').encode('utf-8'))
        os.rename(os.path.join(self.path, 'txn.active'),
                  os.path.join(self.path, 'txn.tmp'))
        shutil.rmtree(os.path.join(self.path, 'txn.tmp'))
@ -510,7 +540,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
        def mkpath(id, suffix=''):
            id_hex = bin_to_hex(id)
            path = os.path.join(archive_path, id_hex + suffix)
-            return path.encode('utf-8')
+            return path

        def cached_archives():
            if self.do_cache:
@ -525,7 +555,14 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""

        def cleanup_outdated(ids):
            for id in ids:
-                os.unlink(mkpath(id))
+                cleanup_cached_archive(id)
+
+        def cleanup_cached_archive(id):
+            os.unlink(mkpath(id))
+            try:
+                os.unlink(mkpath(id) + '.integrity')
+            except FileNotFoundError:
+                pass

        def fetch_and_build_idx(archive_id, repository, key, chunk_idx):
            cdata = repository.get(archive_id)
@ -542,14 +579,16 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                for item in unpacker:
                    if not isinstance(item, dict):
                        logger.error('Error: Did not get expected metadata dict - archive corrupted!')
-                        continue
+                        continue   # XXX: continue?!
                    for chunk_id, size, csize in item.get(b'chunks', []):
                        chunk_idx.add(chunk_id, 1, size, csize)
            if self.do_cache:
                fn = mkpath(archive_id)
                fn_tmp = mkpath(archive_id, suffix='.tmp')
                try:
-                    chunk_idx.write(fn_tmp)
+                    with DetachedIntegrityCheckedFile(path=fn_tmp, write=True,
+                                                      filename=bin_to_hex(archive_id)) as fd:
+                        chunk_idx.write(fd)
                except Exception:
                    os.unlink(fn_tmp)
                else:
@ -564,9 +603,9 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
            logger.info('Synchronizing chunks cache...')
            cached_ids = cached_archives()
            archive_ids = repo_archives()
-            logger.info('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.' % (
+            logger.info('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.',
                len(archive_ids), len(cached_ids),
-                len(cached_ids - archive_ids), len(archive_ids - cached_ids), ))
+                len(cached_ids - archive_ids), len(archive_ids - cached_ids))
            # deallocates old hashindex, creates empty hashindex:
            chunk_idx.clear()
            cleanup_outdated(cached_ids - archive_ids)
@ -583,10 +622,20 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                    if self.do_cache:
                        if archive_id in cached_ids:
                            archive_chunk_idx_path = mkpath(archive_id)
-                            logger.info("Reading cached archive chunk index for %s ..." % archive_name)
-                            archive_chunk_idx = ChunkIndex.read(archive_chunk_idx_path)
-                        else:
-                            logger.info('Fetching and building archive index for %s ...' % archive_name)
+                            logger.info("Reading cached archive chunk index for %s ...", archive_name)
+                            try:
+                                with DetachedIntegrityCheckedFile(path=archive_chunk_idx_path, write=False) as fd:
+                                    archive_chunk_idx = ChunkIndex.read(fd)
+                            except FileIntegrityError as fie:
+                                logger.error('Cached archive chunk index of %s is corrupted: %s', archive_name, fie)
+                                # Delete it and fetch a new index
+                                cleanup_cached_archive(archive_id)
+                                cached_ids.remove(archive_id)
+                                set_ec(EXIT_WARNING)
+                        if archive_id not in cached_ids:
+                            # Do not make this an else branch; the FileIntegrityError exception handler
+                            # above can remove *archive_id* from *cached_ids*.
+                            logger.info('Fetching and building archive index for %s ...', archive_name)
                            archive_chunk_idx = ChunkIndex()
                            fetch_and_build_idx(archive_id, repository, self.key, archive_chunk_idx)
                        logger.info("Merging into master chunks index ...")
@ -599,7 +648,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                            chunk_idx.merge(archive_chunk_idx)
                    else:
                        chunk_idx = chunk_idx or ChunkIndex()
-                        logger.info('Fetching archive index for %s ...' % archive_name)
+                        logger.info('Fetching archive index for %s ...', archive_name)
                        fetch_and_build_idx(archive_id, repository, self.key, chunk_idx)
                if self.progress:
                    pi.finish()
--- a/src/borg/crypto/file_integrity.py
+++ b/src/borg/crypto/file_integrity.py
@ -104,7 +104,7 @@ class FileIntegrityError(IntegrityError):


 class IntegrityCheckedFile(FileLikeWrapper):
-    def __init__(self, path, write, filename=None, override_fd=None):
+    def __init__(self, path, write, filename=None, override_fd=None, integrity_data=None):
        self.path = path
        self.writing = write
        mode = 'wb' if write else 'rb'
@ -114,10 +114,10 @@ class IntegrityCheckedFile(FileLikeWrapper):

        self.hash_filename(filename)

-        if write:
+        if write or not integrity_data:
            self.digests = {}
        else:
-            self.digests = self.read_integrity_file(path, self.hasher)
+            self.digests = self.parse_integrity_data(path, integrity_data, self.hasher)
            # TODO: When we're reading but don't have any digests, i.e. no integrity file existed,
            # TODO: then we could just short-circuit.

@ -126,37 +126,33 @@ class IntegrityCheckedFile(FileLikeWrapper):
        # In Borg the name itself encodes the context (eg. index.N, cache, files),
        # while the path doesn't matter, and moving e.g. a repository or cache directory is supported.
        # Changing the name however imbues a change of context that is not permissible.
+        # While Borg does not use anything except ASCII in these file names, it's important to use
+        # the same encoding everywhere for portability. Using os.fsencode() would be wrong.
        filename = os.path.basename(filename or self.path)
        self.hasher.update(('%10d' % len(filename)).encode())
        self.hasher.update(filename.encode())

-    @staticmethod
-    def integrity_file_path(path):
-        return path + '.integrity'
-
    @classmethod
-    def read_integrity_file(cls, path, hasher):
+    def parse_integrity_data(cls, path: str, data: str, hasher: SHA512FileHashingWrapper):
        try:
-            with open(cls.integrity_file_path(path), 'r') as fd:
-                integrity_file = json.load(fd)
-                # Provisions for agility now, implementation later, but make sure the on-disk joint is oiled.
-                algorithm = integrity_file['algorithm']
-                if algorithm != hasher.ALGORITHM:
-                    logger.warning('Cannot verify integrity of %s: Unknown algorithm %r', path, algorithm)
-                    return
-                digests = integrity_file['digests']
-                # Require at least presence of the final digest
-                digests['final']
-                return digests
-        except FileNotFoundError:
-            logger.info('No integrity file found for %s', path)
-        except (OSError, ValueError, TypeError, KeyError) as e:
-            logger.warning('Could not read integrity file for %s: %s', path, e)
+            integrity_data = json.loads(data)
+            # Provisions for agility now, implementation later, but make sure the on-disk joint is oiled.
+            algorithm = integrity_data['algorithm']
+            if algorithm != hasher.ALGORITHM:
+                logger.warning('Cannot verify integrity of %s: Unknown algorithm %r', path, algorithm)
+                return
+            digests = integrity_data['digests']
+            # Require at least presence of the final digest
+            digests['final']
+            return digests
+        except (ValueError, TypeError, KeyError) as e:
+            logger.warning('Could not parse integrity data for %s: %s', path, e)
            raise FileIntegrityError(path)

    def hash_part(self, partname, is_final=False):
        if not self.writing and not self.digests:
            return
+        self.hasher.update(('%10d' % len(partname)).encode())
        self.hasher.update(partname.encode())
        self.hasher.hash_length(seek_to_end=is_final)
        digest = self.hasher.hexdigest()
@ -173,10 +169,41 @@ class IntegrityCheckedFile(FileLikeWrapper):
        if exception:
            return
        if self.writing:
-            with open(self.integrity_file_path(self.path), 'w') as fd:
-                json.dump({
-                    'algorithm': self.hasher.ALGORITHM,
-                    'digests': self.digests,
-                }, fd)
+            self.store_integrity_data(json.dumps({
+                'algorithm': self.hasher.ALGORITHM,
+                'digests': self.digests,
+            }))
        elif self.digests:
            logger.debug('Verified integrity of %s', self.path)
+
+    def store_integrity_data(self, data: str):
+        self.integrity_data = data
+
+
+class DetachedIntegrityCheckedFile(IntegrityCheckedFile):
+    def __init__(self, path, write, filename=None, override_fd=None):
+        super().__init__(path, write, filename, override_fd)
+        filename = filename or os.path.basename(path)
+        output_dir = os.path.dirname(path)
+        self.output_integrity_file = self.integrity_file_path(os.path.join(output_dir, filename))
+        if not write:
+            self.digests = self.read_integrity_file(self.path, self.hasher)
+
+    @staticmethod
+    def integrity_file_path(path):
+        return path + '.integrity'
+
+    @classmethod
+    def read_integrity_file(cls, path, hasher):
+        try:
+            with open(cls.integrity_file_path(path), 'r') as fd:
+                return cls.parse_integrity_data(path, fd.read(), hasher)
+        except FileNotFoundError:
+            logger.info('No integrity file found for %s', path)
+        except OSError as e:
+            logger.warning('Could not read integrity file for %s: %s', path, e)
+            raise FileIntegrityError(path)
+
+    def store_integrity_data(self, data: str):
+        with open(self.output_integrity_file, 'w') as fd:
+            fd.write(data)
--- a/src/borg/hashindex.pyx
+++ b/src/borg/hashindex.pyx
@ -67,8 +67,11 @@ cdef class IndexBase:
    def __cinit__(self, capacity=0, path=None, key_size=32):
        self.key_size = key_size
        if path:
-            with open(path, 'rb') as fd:
-                self.index = hashindex_read(fd)
+            if isinstance(path, (str, bytes)):
+                with open(path, 'rb') as fd:
+                    self.index = hashindex_read(fd)
+            else:
+                self.index = hashindex_read(path)
            assert self.index, 'hashindex_read() returned NULL with no exception set'
        else:
            self.index = hashindex_init(capacity, self.key_size, self.value_size)
@ -84,8 +87,11 @@ cdef class IndexBase:
        return cls(path=path)

    def write(self, path):
-        with open(path, 'wb') as fd:
-            hashindex_write(self.index, fd)
+        if isinstance(path, (str, bytes)):
+            with open(path, 'wb') as fd:
+                hashindex_write(self.index, fd)
+        else:
+            hashindex_write(self.index, path)

    def clear(self):
        hashindex_free(self.index)
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@ -1,5 +1,6 @@
 import argparse
 import errno
+import io
 import json
 import logging
 import os
@ -37,6 +38,7 @@ from ..constants import *  # NOQA
 from ..crypto.low_level import bytes_to_long, num_aes_blocks
 from ..crypto.key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError
 from ..crypto.keymanager import RepoIdMismatch, NotABorgKeyFile
+from ..crypto.file_integrity import FileIntegrityError
 from ..helpers import Location, get_security_dir
 from ..helpers import Manifest
 from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
@ -2886,6 +2888,82 @@ class RemoteArchiverTestCase(ArchiverTestCase):
                self.assert_true(marker not in res)


+class ArchiverCorruptionTestCase(ArchiverTestCaseBase):
+    def setUp(self):
+        super().setUp()
+        self.create_test_files()
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        self.cache_path = json.loads(self.cmd('info', self.repository_location, '--json'))['cache']['path']
+
+    def corrupt(self, file):
+        with open(file, 'r+b') as fd:
+            fd.seek(-1, io.SEEK_END)
+            fd.write(b'1')
+
+    def test_cache_chunks(self):
+        self.corrupt(os.path.join(self.cache_path, 'chunks'))
+
+        if self.FORK_DEFAULT:
+            out = self.cmd('info', self.repository_location, exit_code=2)
+            assert 'failed integrity check' in out
+        else:
+            with pytest.raises(FileIntegrityError):
+                self.cmd('info', self.repository_location)
+
+    def test_cache_files(self):
+        self.cmd('create', self.repository_location + '::test', 'input')
+        self.corrupt(os.path.join(self.cache_path, 'files'))
+
+        if self.FORK_DEFAULT:
+            out = self.cmd('create', self.repository_location + '::test1', 'input', exit_code=2)
+            assert 'failed integrity check' in out
+        else:
+            with pytest.raises(FileIntegrityError):
+                self.cmd('create', self.repository_location + '::test1', 'input')
+
+    def test_chunks_archive(self):
+        self.cmd('create', self.repository_location + '::test1', 'input')
+        # Find ID of test1 so we can corrupt it later :)
+        target_id = self.cmd('list', self.repository_location, '--format={id}{LF}').strip()
+        self.cmd('create', self.repository_location + '::test2', 'input')
+
+        # Force cache sync, creating archive chunks of test1 and test2 in chunks.archive.d
+        self.cmd('delete', '--cache-only', self.repository_location)
+        self.cmd('info', self.repository_location, '--json')
+
+        chunks_archive = os.path.join(self.cache_path, 'chunks.archive.d')
+        assert len(os.listdir(chunks_archive)) == 4  # two archives, one chunks cache and one .integrity file each
+
+        self.corrupt(os.path.join(chunks_archive, target_id))
+
+        # Trigger cache sync by changing the manifest ID in the cache config
+        config_path = os.path.join(self.cache_path, 'config')
+        config = ConfigParser(interpolation=None)
+        config.read(config_path)
+        config.set('cache', 'manifest', bin_to_hex(bytes(32)))
+        with open(config_path, 'w') as fd:
+            config.write(fd)
+
+        # Cache sync notices corrupted archive chunks, but automatically recovers.
+        out = self.cmd('create', '-v', self.repository_location + '::test3', 'input', exit_code=1)
+        assert 'Reading cached archive chunk index for test1' in out
+        assert 'Cached archive chunk index of test1 is corrupted' in out
+        assert 'Fetching and building archive index for test1' in out
+
+    def test_old_version_interfered(self):
+        # Modify the main manifest ID without touching the manifest ID in the integrity section.
+        # This happens if a version without integrity checking modifies the cache.
+        config_path = os.path.join(self.cache_path, 'config')
+        config = ConfigParser(interpolation=None)
+        config.read(config_path)
+        config.set('cache', 'manifest', bin_to_hex(bytes(32)))
+        with open(config_path, 'w') as fd:
+            config.write(fd)
+
+        out = self.cmd('info', self.repository_location)
+        assert 'Cache integrity data not available: old Borg version modified the cache.' in out
+
+
 class DiffArchiverTestCase(ArchiverTestCaseBase):
    def test_basic_functionality(self):
        # Initialize test folder
--- a/src/borg/testsuite/file_integrity.py
+++ b/src/borg/testsuite/file_integrity.py
@ -1,21 +1,21 @@

 import pytest

-from ..crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError
+from ..crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError


 class TestReadIntegrityFile:
    def test_no_integrity(self, tmpdir):
        protected_file = tmpdir.join('file')
        protected_file.write('1234')
-        assert IntegrityCheckedFile.read_integrity_file(str(protected_file), None) is None
+        assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None) is None

    def test_truncated_integrity(self, tmpdir):
        protected_file = tmpdir.join('file')
        protected_file.write('1234')
        tmpdir.join('file.integrity').write('')
        with pytest.raises(FileIntegrityError):
-            IntegrityCheckedFile.read_integrity_file(str(protected_file), None)
+            DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None)

    def test_unknown_algorithm(self, tmpdir):
        class SomeHasher:
@ -24,7 +24,7 @@ class TestReadIntegrityFile:
        protected_file = tmpdir.join('file')
        protected_file.write('1234')
        tmpdir.join('file.integrity').write('{"algorithm": "HMAC_SERIOUSHASH", "digests": "1234"}')
-        assert IntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) is None
+        assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) is None

    @pytest.mark.parametrize('json', (
        '{"ALGORITHM": "HMAC_SERIOUSHASH", "digests": "1234"}',
@ -38,7 +38,7 @@ class TestReadIntegrityFile:
        protected_file.write('1234')
        tmpdir.join('file.integrity').write(json)
        with pytest.raises(FileIntegrityError):
-            IntegrityCheckedFile.read_integrity_file(str(protected_file), None)
+            DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None)

    def test_valid(self, tmpdir):
        class SomeHasher:
@ -47,35 +47,35 @@ class TestReadIntegrityFile:
        protected_file = tmpdir.join('file')
        protected_file.write('1234')
        tmpdir.join('file.integrity').write('{"algorithm": "HMAC_FOO1", "digests": {"final": "1234"}}')
-        assert IntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) == {'final': '1234'}
+        assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) == {'final': '1234'}


-class TestIntegrityCheckedFile:
+class TestDetachedIntegrityCheckedFile:
    @pytest.fixture
    def integrity_protected_file(self, tmpdir):
        path = str(tmpdir.join('file'))
-        with IntegrityCheckedFile(path, write=True) as fd:
+        with DetachedIntegrityCheckedFile(path, write=True) as fd:
            fd.write(b'foo and bar')
        return path

    def test_simple(self, tmpdir, integrity_protected_file):
        assert tmpdir.join('file').check(file=True)
        assert tmpdir.join('file.integrity').check(file=True)
-        with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
+        with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
            assert fd.read() == b'foo and bar'

    def test_corrupted_file(self, integrity_protected_file):
        with open(integrity_protected_file, 'ab') as fd:
            fd.write(b' extra data')
        with pytest.raises(FileIntegrityError):
-            with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
+            with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
                assert fd.read() == b'foo and bar extra data'

    def test_corrupted_file_partial_read(self, integrity_protected_file):
        with open(integrity_protected_file, 'ab') as fd:
            fd.write(b' extra data')
        with pytest.raises(FileIntegrityError):
-            with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
+            with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
                data = b'foo and bar'
                assert fd.read(len(data)) == data

@ -88,7 +88,7 @@ class TestIntegrityCheckedFile:
        tmpdir.join('file').move(new_path)
        tmpdir.join('file.integrity').move(new_path + '.integrity')
        with pytest.raises(FileIntegrityError):
-            with IntegrityCheckedFile(str(new_path), write=False) as fd:
+            with DetachedIntegrityCheckedFile(str(new_path), write=False) as fd:
                assert fd.read() == b'foo and bar'

    def test_moved_file(self, tmpdir, integrity_protected_file):
@ -96,27 +96,27 @@ class TestIntegrityCheckedFile:
        tmpdir.join('file').move(new_dir.join('file'))
        tmpdir.join('file.integrity').move(new_dir.join('file.integrity'))
        new_path = str(new_dir.join('file'))
-        with IntegrityCheckedFile(new_path, write=False) as fd:
+        with DetachedIntegrityCheckedFile(new_path, write=False) as fd:
            assert fd.read() == b'foo and bar'

    def test_no_integrity(self, tmpdir, integrity_protected_file):
        tmpdir.join('file.integrity').remove()
-        with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
+        with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
            assert fd.read() == b'foo and bar'


-class TestIntegrityCheckedFileParts:
+class TestDetachedIntegrityCheckedFileParts:
    @pytest.fixture
    def integrity_protected_file(self, tmpdir):
        path = str(tmpdir.join('file'))
-        with IntegrityCheckedFile(path, write=True) as fd:
+        with DetachedIntegrityCheckedFile(path, write=True) as fd:
            fd.write(b'foo and bar')
            fd.hash_part('foopart')
            fd.write(b' other data')
        return path

    def test_simple(self, integrity_protected_file):
-        with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
+        with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
            data1 = b'foo and bar'
            assert fd.read(len(data1)) == data1
            fd.hash_part('foopart')
@ -127,7 +127,7 @@ class TestIntegrityCheckedFileParts:
            # Because some hash_part failed, the final digest will fail as well - again - even if we catch
            # the failing hash_part. This is intentional: (1) it makes the code simpler (2) it's a good fail-safe
            # against overly broad exception handling.
-            with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
+            with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
                data1 = b'foo and bar'
                assert fd.read(len(data1)) == data1
                with pytest.raises(FileIntegrityError):
@ -140,7 +140,7 @@ class TestIntegrityCheckedFileParts:
        with open(integrity_protected_file, 'ab') as fd:
            fd.write(b'some extra stuff that does not belong')
        with pytest.raises(FileIntegrityError):
-            with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
+            with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
                data1 = b'foo and bar'
                try:
                    assert fd.read(len(data1)) == data1
--- a/src/borg/testsuite/hashindex.py
+++ b/src/borg/testsuite/hashindex.py
@ -6,6 +6,7 @@ import zlib

 from ..hashindex import NSIndex, ChunkIndex
 from .. import hashindex
+from ..crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError
 from . import BaseTestCase

 # Note: these tests are part of the self test, do not use or import py.test functionality here.
@ -319,6 +320,27 @@ class HashIndexDataTestCase(BaseTestCase):
        assert idx1[H(3)] == (ChunkIndex.MAX_VALUE, 6, 7)


+class HashIndexIntegrityTestCase(HashIndexDataTestCase):
+    def write_integrity_checked_index(self, tempdir):
+        idx = self._deserialize_hashindex(self.HASHINDEX)
+        file = os.path.join(tempdir, 'idx')
+        with IntegrityCheckedFile(path=file, write=True) as fd:
+            idx.write(fd)
+        integrity_data = fd.integrity_data
+        assert 'final' in integrity_data
+        assert 'HashHeader' in integrity_data
+        return file, integrity_data
+
+    def test_integrity_checked_file(self):
+        with tempfile.TemporaryDirectory() as tempdir:
+            file, integrity_data = self.write_integrity_checked_index(tempdir)
+            with open(file, 'r+b') as fd:
+                fd.write(b'Foo')
+            with self.assert_raises(FileIntegrityError):
+                with IntegrityCheckedFile(path=file, write=False, integrity_data=integrity_data) as fd:
+                    ChunkIndex.read(fd)
+
+
 class NSIndexTestCase(BaseTestCase):
    def test_nsindex_segment_limit(self):
        idx = NSIndex()