diff --git a/src/borg/_hashindex.c b/src/borg/_hashindex.c index 289457fe5..b41d57b70 100644 --- a/src/borg/_hashindex.c +++ b/src/borg/_hashindex.c @@ -291,6 +291,20 @@ hashindex_read(PyObject *file_py) goto fail_decref_header; } + /* + * Hash the header + * If the header is corrupted this bails before doing something stupid (like allocating 3.8 TB of memory) + */ + Py_XDECREF(PyObject_CallMethod(file_py, "hash_part", "s", "HashHeader")); + if(PyErr_Occurred()) { + if(PyErr_ExceptionMatches(PyExc_AttributeError)) { + /* Be able to work with regular file objects which do not have a hash_part method. */ + PyErr_Clear(); + } else { + goto fail_decref_header; + } + } + /* Find length of file */ length_object = PyObject_CallMethod(file_py, "seek", "ni", (Py_ssize_t)0, SEEK_END); if(PyErr_Occurred()) { @@ -473,6 +487,19 @@ hashindex_write(HashIndex *index, PyObject *file_py) return; } + /* + * Hash the header + */ + Py_XDECREF(PyObject_CallMethod(file_py, "hash_part", "s", "HashHeader")); + if(PyErr_Occurred()) { + if(PyErr_ExceptionMatches(PyExc_AttributeError)) { + /* Be able to work with regular file objects which do not have a hash_part method. */ + PyErr_Clear(); + } else { + return; + } + } + /* Note: explicitly construct view; BuildValue can convert (pointer, length) to Python objects, but copies them for doing so */ buckets_view = PyMemoryView_FromMemory((char*)index->buckets, buckets_length, PyBUF_READ); if(!buckets_view) { diff --git a/src/borg/cache.py b/src/borg/cache.py index 882d98631..13045f0e9 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -22,8 +22,10 @@ from .helpers import safe_ns from .helpers import yes, hostname_is_unique from .helpers import remove_surrogates from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage +from .helpers import set_ec, EXIT_WARNING from .item import ArchiveItem, ChunkListEntry from .crypto.key import PlaintextKey +from .crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError from .locking import Lock from .platform import SaveFile from .remote import cache_if_remote @@ -237,6 +239,8 @@ class CacheConfig: config.set('cache', 'version', '1') config.set('cache', 'repository', self.repository.id_str) config.set('cache', 'manifest', '') + config.add_section('integrity') + config.set('integrity', 'manifest', '') with SaveFile(self.config_path) as fd: config.write(fd) @@ -253,6 +257,20 @@ class CacheConfig: self.manifest_id = unhexlify(self._config.get('cache', 'manifest')) self.timestamp = self._config.get('cache', 'timestamp', fallback=None) self.key_type = self._config.get('cache', 'key_type', fallback=None) + try: + self.integrity = dict(self._config.items('integrity')) + if self._config.get('cache', 'manifest') != self.integrity.pop('manifest'): + # The cache config file is updated (parsed with ConfigParser, the state of the ConfigParser + # is modified and then written out.), not re-created. + # Thus, older versions will leave our [integrity] section alone, making the section's data invalid. + # Therefore, we also add the manifest ID to this section and + # can discern whether an older version interfered by comparing the manifest IDs of this section + # and the main [cache] section. + self.integrity = {} + logger.warning('Cache integrity data not available: old Borg version modified the cache.') + except configparser.NoSectionError: + logger.debug('Cache integrity: No integrity data found (files, chunks). Cache is from old version.') + self.integrity = {} previous_location = self._config.get('cache', 'previous_location', fallback=None) if previous_location: self.previous_location = recanonicalize_relative_location(previous_location, self.repository) @@ -263,6 +281,11 @@ class CacheConfig: if manifest: self._config.set('cache', 'manifest', manifest.id_str) self._config.set('cache', 'timestamp', manifest.timestamp) + if not self._config.has_section('integrity'): + self._config.add_section('integrity') + for file, integrity_data in self.integrity.items(): + self._config.set('integrity', file, integrity_data) + self._config.set('integrity', 'manifest', manifest.id_str) if key: self._config.set('cache', 'key_type', str(key.TYPE)) self._config.set('cache', 'previous_location', self.repository._location.canonical_path()) @@ -392,14 +415,16 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" with open(os.path.join(self.path, 'README'), 'w') as fd: fd.write(CACHE_README) self.cache_config.create() - ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8')) + ChunkIndex().write(os.path.join(self.path, 'chunks')) os.makedirs(os.path.join(self.path, 'chunks.archive.d')) with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd: pass # empty file def _do_open(self): self.cache_config.load() - self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8')) + with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=False, + integrity_data=self.cache_config.integrity.get('chunks')) as fd: + self.chunks = ChunkIndex.read(fd) self.files = None def open(self): @@ -417,7 +442,9 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" self.files = {} self._newest_mtime = None logger.debug('Reading files cache ...') - with open(os.path.join(self.path, 'files'), 'rb') as fd: + + with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=False, + integrity_data=self.cache_config.integrity.get('files')) as fd: u = msgpack.Unpacker(use_list=True) while True: data = fd.read(64 * 1024) @@ -458,7 +485,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" self._newest_mtime = 2 ** 63 - 1 # nanoseconds, good until y2262 ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20)) pi.output('Saving files cache') - with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd: + with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=True) as fd: for path_hash, item in self.files.items(): # Only keep files seen in this backup that are older than newest mtime seen in this backup - # this is to avoid issues with filesystem snapshots and mtime granularity. @@ -467,10 +494,13 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" if entry.age == 0 and bigint_to_int(entry.mtime) < self._newest_mtime or \ entry.age > 0 and entry.age < ttl: msgpack.pack((path_hash, entry), fd) + self.cache_config.integrity['files'] = fd.integrity_data + pi.output('Saving chunks cache') + with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=True) as fd: + self.chunks.write(fd) + self.cache_config.integrity['chunks'] = fd.integrity_data pi.output('Saving cache config') self.cache_config.save(self.manifest, self.key) - pi.output('Saving chunks cache') - self.chunks.write(os.path.join(self.path, 'chunks').encode('utf-8')) os.rename(os.path.join(self.path, 'txn.active'), os.path.join(self.path, 'txn.tmp')) shutil.rmtree(os.path.join(self.path, 'txn.tmp')) @@ -510,7 +540,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" def mkpath(id, suffix=''): id_hex = bin_to_hex(id) path = os.path.join(archive_path, id_hex + suffix) - return path.encode('utf-8') + return path def cached_archives(): if self.do_cache: @@ -525,7 +555,14 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" def cleanup_outdated(ids): for id in ids: - os.unlink(mkpath(id)) + cleanup_cached_archive(id) + + def cleanup_cached_archive(id): + os.unlink(mkpath(id)) + try: + os.unlink(mkpath(id) + '.integrity') + except FileNotFoundError: + pass def fetch_and_build_idx(archive_id, repository, key, chunk_idx): cdata = repository.get(archive_id) @@ -542,14 +579,16 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" for item in unpacker: if not isinstance(item, dict): logger.error('Error: Did not get expected metadata dict - archive corrupted!') - continue + continue # XXX: continue?! for chunk_id, size, csize in item.get(b'chunks', []): chunk_idx.add(chunk_id, 1, size, csize) if self.do_cache: fn = mkpath(archive_id) fn_tmp = mkpath(archive_id, suffix='.tmp') try: - chunk_idx.write(fn_tmp) + with DetachedIntegrityCheckedFile(path=fn_tmp, write=True, + filename=bin_to_hex(archive_id)) as fd: + chunk_idx.write(fd) except Exception: os.unlink(fn_tmp) else: @@ -564,9 +603,9 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" logger.info('Synchronizing chunks cache...') cached_ids = cached_archives() archive_ids = repo_archives() - logger.info('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.' % ( + logger.info('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.', len(archive_ids), len(cached_ids), - len(cached_ids - archive_ids), len(archive_ids - cached_ids), )) + len(cached_ids - archive_ids), len(archive_ids - cached_ids)) # deallocates old hashindex, creates empty hashindex: chunk_idx.clear() cleanup_outdated(cached_ids - archive_ids) @@ -583,10 +622,20 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" if self.do_cache: if archive_id in cached_ids: archive_chunk_idx_path = mkpath(archive_id) - logger.info("Reading cached archive chunk index for %s ..." % archive_name) - archive_chunk_idx = ChunkIndex.read(archive_chunk_idx_path) - else: - logger.info('Fetching and building archive index for %s ...' % archive_name) + logger.info("Reading cached archive chunk index for %s ...", archive_name) + try: + with DetachedIntegrityCheckedFile(path=archive_chunk_idx_path, write=False) as fd: + archive_chunk_idx = ChunkIndex.read(fd) + except FileIntegrityError as fie: + logger.error('Cached archive chunk index of %s is corrupted: %s', archive_name, fie) + # Delete it and fetch a new index + cleanup_cached_archive(archive_id) + cached_ids.remove(archive_id) + set_ec(EXIT_WARNING) + if archive_id not in cached_ids: + # Do not make this an else branch; the FileIntegrityError exception handler + # above can remove *archive_id* from *cached_ids*. + logger.info('Fetching and building archive index for %s ...', archive_name) archive_chunk_idx = ChunkIndex() fetch_and_build_idx(archive_id, repository, self.key, archive_chunk_idx) logger.info("Merging into master chunks index ...") @@ -599,7 +648,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" chunk_idx.merge(archive_chunk_idx) else: chunk_idx = chunk_idx or ChunkIndex() - logger.info('Fetching archive index for %s ...' % archive_name) + logger.info('Fetching archive index for %s ...', archive_name) fetch_and_build_idx(archive_id, repository, self.key, chunk_idx) if self.progress: pi.finish() diff --git a/src/borg/crypto/file_integrity.py b/src/borg/crypto/file_integrity.py index 5c1fa4e1c..032b8672d 100644 --- a/src/borg/crypto/file_integrity.py +++ b/src/borg/crypto/file_integrity.py @@ -104,7 +104,7 @@ class FileIntegrityError(IntegrityError): class IntegrityCheckedFile(FileLikeWrapper): - def __init__(self, path, write, filename=None, override_fd=None): + def __init__(self, path, write, filename=None, override_fd=None, integrity_data=None): self.path = path self.writing = write mode = 'wb' if write else 'rb' @@ -114,10 +114,10 @@ class IntegrityCheckedFile(FileLikeWrapper): self.hash_filename(filename) - if write: + if write or not integrity_data: self.digests = {} else: - self.digests = self.read_integrity_file(path, self.hasher) + self.digests = self.parse_integrity_data(path, integrity_data, self.hasher) # TODO: When we're reading but don't have any digests, i.e. no integrity file existed, # TODO: then we could just short-circuit. @@ -126,37 +126,33 @@ class IntegrityCheckedFile(FileLikeWrapper): # In Borg the name itself encodes the context (eg. index.N, cache, files), # while the path doesn't matter, and moving e.g. a repository or cache directory is supported. # Changing the name however imbues a change of context that is not permissible. + # While Borg does not use anything except ASCII in these file names, it's important to use + # the same encoding everywhere for portability. Using os.fsencode() would be wrong. filename = os.path.basename(filename or self.path) self.hasher.update(('%10d' % len(filename)).encode()) self.hasher.update(filename.encode()) - @staticmethod - def integrity_file_path(path): - return path + '.integrity' - @classmethod - def read_integrity_file(cls, path, hasher): + def parse_integrity_data(cls, path: str, data: str, hasher: SHA512FileHashingWrapper): try: - with open(cls.integrity_file_path(path), 'r') as fd: - integrity_file = json.load(fd) - # Provisions for agility now, implementation later, but make sure the on-disk joint is oiled. - algorithm = integrity_file['algorithm'] - if algorithm != hasher.ALGORITHM: - logger.warning('Cannot verify integrity of %s: Unknown algorithm %r', path, algorithm) - return - digests = integrity_file['digests'] - # Require at least presence of the final digest - digests['final'] - return digests - except FileNotFoundError: - logger.info('No integrity file found for %s', path) - except (OSError, ValueError, TypeError, KeyError) as e: - logger.warning('Could not read integrity file for %s: %s', path, e) + integrity_data = json.loads(data) + # Provisions for agility now, implementation later, but make sure the on-disk joint is oiled. + algorithm = integrity_data['algorithm'] + if algorithm != hasher.ALGORITHM: + logger.warning('Cannot verify integrity of %s: Unknown algorithm %r', path, algorithm) + return + digests = integrity_data['digests'] + # Require at least presence of the final digest + digests['final'] + return digests + except (ValueError, TypeError, KeyError) as e: + logger.warning('Could not parse integrity data for %s: %s', path, e) raise FileIntegrityError(path) def hash_part(self, partname, is_final=False): if not self.writing and not self.digests: return + self.hasher.update(('%10d' % len(partname)).encode()) self.hasher.update(partname.encode()) self.hasher.hash_length(seek_to_end=is_final) digest = self.hasher.hexdigest() @@ -173,10 +169,41 @@ class IntegrityCheckedFile(FileLikeWrapper): if exception: return if self.writing: - with open(self.integrity_file_path(self.path), 'w') as fd: - json.dump({ - 'algorithm': self.hasher.ALGORITHM, - 'digests': self.digests, - }, fd) + self.store_integrity_data(json.dumps({ + 'algorithm': self.hasher.ALGORITHM, + 'digests': self.digests, + })) elif self.digests: logger.debug('Verified integrity of %s', self.path) + + def store_integrity_data(self, data: str): + self.integrity_data = data + + +class DetachedIntegrityCheckedFile(IntegrityCheckedFile): + def __init__(self, path, write, filename=None, override_fd=None): + super().__init__(path, write, filename, override_fd) + filename = filename or os.path.basename(path) + output_dir = os.path.dirname(path) + self.output_integrity_file = self.integrity_file_path(os.path.join(output_dir, filename)) + if not write: + self.digests = self.read_integrity_file(self.path, self.hasher) + + @staticmethod + def integrity_file_path(path): + return path + '.integrity' + + @classmethod + def read_integrity_file(cls, path, hasher): + try: + with open(cls.integrity_file_path(path), 'r') as fd: + return cls.parse_integrity_data(path, fd.read(), hasher) + except FileNotFoundError: + logger.info('No integrity file found for %s', path) + except OSError as e: + logger.warning('Could not read integrity file for %s: %s', path, e) + raise FileIntegrityError(path) + + def store_integrity_data(self, data: str): + with open(self.output_integrity_file, 'w') as fd: + fd.write(data) diff --git a/src/borg/hashindex.pyx b/src/borg/hashindex.pyx index fba8c7a38..2409836fe 100644 --- a/src/borg/hashindex.pyx +++ b/src/borg/hashindex.pyx @@ -67,8 +67,11 @@ cdef class IndexBase: def __cinit__(self, capacity=0, path=None, key_size=32): self.key_size = key_size if path: - with open(path, 'rb') as fd: - self.index = hashindex_read(fd) + if isinstance(path, (str, bytes)): + with open(path, 'rb') as fd: + self.index = hashindex_read(fd) + else: + self.index = hashindex_read(path) assert self.index, 'hashindex_read() returned NULL with no exception set' else: self.index = hashindex_init(capacity, self.key_size, self.value_size) @@ -84,8 +87,11 @@ cdef class IndexBase: return cls(path=path) def write(self, path): - with open(path, 'wb') as fd: - hashindex_write(self.index, fd) + if isinstance(path, (str, bytes)): + with open(path, 'wb') as fd: + hashindex_write(self.index, fd) + else: + hashindex_write(self.index, path) def clear(self): hashindex_free(self.index) diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 508b1586c..36d32ba82 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -1,5 +1,6 @@ import argparse import errno +import io import json import logging import os @@ -37,6 +38,7 @@ from ..constants import * # NOQA from ..crypto.low_level import bytes_to_long, num_aes_blocks from ..crypto.key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError from ..crypto.keymanager import RepoIdMismatch, NotABorgKeyFile +from ..crypto.file_integrity import FileIntegrityError from ..helpers import Location, get_security_dir from ..helpers import Manifest from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR @@ -2886,6 +2888,82 @@ class RemoteArchiverTestCase(ArchiverTestCase): self.assert_true(marker not in res) +class ArchiverCorruptionTestCase(ArchiverTestCaseBase): + def setUp(self): + super().setUp() + self.create_test_files() + self.cmd('init', '--encryption=repokey', self.repository_location) + self.cache_path = json.loads(self.cmd('info', self.repository_location, '--json'))['cache']['path'] + + def corrupt(self, file): + with open(file, 'r+b') as fd: + fd.seek(-1, io.SEEK_END) + fd.write(b'1') + + def test_cache_chunks(self): + self.corrupt(os.path.join(self.cache_path, 'chunks')) + + if self.FORK_DEFAULT: + out = self.cmd('info', self.repository_location, exit_code=2) + assert 'failed integrity check' in out + else: + with pytest.raises(FileIntegrityError): + self.cmd('info', self.repository_location) + + def test_cache_files(self): + self.cmd('create', self.repository_location + '::test', 'input') + self.corrupt(os.path.join(self.cache_path, 'files')) + + if self.FORK_DEFAULT: + out = self.cmd('create', self.repository_location + '::test1', 'input', exit_code=2) + assert 'failed integrity check' in out + else: + with pytest.raises(FileIntegrityError): + self.cmd('create', self.repository_location + '::test1', 'input') + + def test_chunks_archive(self): + self.cmd('create', self.repository_location + '::test1', 'input') + # Find ID of test1 so we can corrupt it later :) + target_id = self.cmd('list', self.repository_location, '--format={id}{LF}').strip() + self.cmd('create', self.repository_location + '::test2', 'input') + + # Force cache sync, creating archive chunks of test1 and test2 in chunks.archive.d + self.cmd('delete', '--cache-only', self.repository_location) + self.cmd('info', self.repository_location, '--json') + + chunks_archive = os.path.join(self.cache_path, 'chunks.archive.d') + assert len(os.listdir(chunks_archive)) == 4 # two archives, one chunks cache and one .integrity file each + + self.corrupt(os.path.join(chunks_archive, target_id)) + + # Trigger cache sync by changing the manifest ID in the cache config + config_path = os.path.join(self.cache_path, 'config') + config = ConfigParser(interpolation=None) + config.read(config_path) + config.set('cache', 'manifest', bin_to_hex(bytes(32))) + with open(config_path, 'w') as fd: + config.write(fd) + + # Cache sync notices corrupted archive chunks, but automatically recovers. + out = self.cmd('create', '-v', self.repository_location + '::test3', 'input', exit_code=1) + assert 'Reading cached archive chunk index for test1' in out + assert 'Cached archive chunk index of test1 is corrupted' in out + assert 'Fetching and building archive index for test1' in out + + def test_old_version_interfered(self): + # Modify the main manifest ID without touching the manifest ID in the integrity section. + # This happens if a version without integrity checking modifies the cache. + config_path = os.path.join(self.cache_path, 'config') + config = ConfigParser(interpolation=None) + config.read(config_path) + config.set('cache', 'manifest', bin_to_hex(bytes(32))) + with open(config_path, 'w') as fd: + config.write(fd) + + out = self.cmd('info', self.repository_location) + assert 'Cache integrity data not available: old Borg version modified the cache.' in out + + class DiffArchiverTestCase(ArchiverTestCaseBase): def test_basic_functionality(self): # Initialize test folder diff --git a/src/borg/testsuite/file_integrity.py b/src/borg/testsuite/file_integrity.py index a8ef95f74..0dd323d61 100644 --- a/src/borg/testsuite/file_integrity.py +++ b/src/borg/testsuite/file_integrity.py @@ -1,21 +1,21 @@ import pytest -from ..crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError +from ..crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError class TestReadIntegrityFile: def test_no_integrity(self, tmpdir): protected_file = tmpdir.join('file') protected_file.write('1234') - assert IntegrityCheckedFile.read_integrity_file(str(protected_file), None) is None + assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None) is None def test_truncated_integrity(self, tmpdir): protected_file = tmpdir.join('file') protected_file.write('1234') tmpdir.join('file.integrity').write('') with pytest.raises(FileIntegrityError): - IntegrityCheckedFile.read_integrity_file(str(protected_file), None) + DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None) def test_unknown_algorithm(self, tmpdir): class SomeHasher: @@ -24,7 +24,7 @@ class TestReadIntegrityFile: protected_file = tmpdir.join('file') protected_file.write('1234') tmpdir.join('file.integrity').write('{"algorithm": "HMAC_SERIOUSHASH", "digests": "1234"}') - assert IntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) is None + assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) is None @pytest.mark.parametrize('json', ( '{"ALGORITHM": "HMAC_SERIOUSHASH", "digests": "1234"}', @@ -38,7 +38,7 @@ class TestReadIntegrityFile: protected_file.write('1234') tmpdir.join('file.integrity').write(json) with pytest.raises(FileIntegrityError): - IntegrityCheckedFile.read_integrity_file(str(protected_file), None) + DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None) def test_valid(self, tmpdir): class SomeHasher: @@ -47,35 +47,35 @@ class TestReadIntegrityFile: protected_file = tmpdir.join('file') protected_file.write('1234') tmpdir.join('file.integrity').write('{"algorithm": "HMAC_FOO1", "digests": {"final": "1234"}}') - assert IntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) == {'final': '1234'} + assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) == {'final': '1234'} -class TestIntegrityCheckedFile: +class TestDetachedIntegrityCheckedFile: @pytest.fixture def integrity_protected_file(self, tmpdir): path = str(tmpdir.join('file')) - with IntegrityCheckedFile(path, write=True) as fd: + with DetachedIntegrityCheckedFile(path, write=True) as fd: fd.write(b'foo and bar') return path def test_simple(self, tmpdir, integrity_protected_file): assert tmpdir.join('file').check(file=True) assert tmpdir.join('file.integrity').check(file=True) - with IntegrityCheckedFile(integrity_protected_file, write=False) as fd: + with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd: assert fd.read() == b'foo and bar' def test_corrupted_file(self, integrity_protected_file): with open(integrity_protected_file, 'ab') as fd: fd.write(b' extra data') with pytest.raises(FileIntegrityError): - with IntegrityCheckedFile(integrity_protected_file, write=False) as fd: + with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd: assert fd.read() == b'foo and bar extra data' def test_corrupted_file_partial_read(self, integrity_protected_file): with open(integrity_protected_file, 'ab') as fd: fd.write(b' extra data') with pytest.raises(FileIntegrityError): - with IntegrityCheckedFile(integrity_protected_file, write=False) as fd: + with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd: data = b'foo and bar' assert fd.read(len(data)) == data @@ -88,7 +88,7 @@ class TestIntegrityCheckedFile: tmpdir.join('file').move(new_path) tmpdir.join('file.integrity').move(new_path + '.integrity') with pytest.raises(FileIntegrityError): - with IntegrityCheckedFile(str(new_path), write=False) as fd: + with DetachedIntegrityCheckedFile(str(new_path), write=False) as fd: assert fd.read() == b'foo and bar' def test_moved_file(self, tmpdir, integrity_protected_file): @@ -96,27 +96,27 @@ class TestIntegrityCheckedFile: tmpdir.join('file').move(new_dir.join('file')) tmpdir.join('file.integrity').move(new_dir.join('file.integrity')) new_path = str(new_dir.join('file')) - with IntegrityCheckedFile(new_path, write=False) as fd: + with DetachedIntegrityCheckedFile(new_path, write=False) as fd: assert fd.read() == b'foo and bar' def test_no_integrity(self, tmpdir, integrity_protected_file): tmpdir.join('file.integrity').remove() - with IntegrityCheckedFile(integrity_protected_file, write=False) as fd: + with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd: assert fd.read() == b'foo and bar' -class TestIntegrityCheckedFileParts: +class TestDetachedIntegrityCheckedFileParts: @pytest.fixture def integrity_protected_file(self, tmpdir): path = str(tmpdir.join('file')) - with IntegrityCheckedFile(path, write=True) as fd: + with DetachedIntegrityCheckedFile(path, write=True) as fd: fd.write(b'foo and bar') fd.hash_part('foopart') fd.write(b' other data') return path def test_simple(self, integrity_protected_file): - with IntegrityCheckedFile(integrity_protected_file, write=False) as fd: + with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd: data1 = b'foo and bar' assert fd.read(len(data1)) == data1 fd.hash_part('foopart') @@ -127,7 +127,7 @@ class TestIntegrityCheckedFileParts: # Because some hash_part failed, the final digest will fail as well - again - even if we catch # the failing hash_part. This is intentional: (1) it makes the code simpler (2) it's a good fail-safe # against overly broad exception handling. - with IntegrityCheckedFile(integrity_protected_file, write=False) as fd: + with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd: data1 = b'foo and bar' assert fd.read(len(data1)) == data1 with pytest.raises(FileIntegrityError): @@ -140,7 +140,7 @@ class TestIntegrityCheckedFileParts: with open(integrity_protected_file, 'ab') as fd: fd.write(b'some extra stuff that does not belong') with pytest.raises(FileIntegrityError): - with IntegrityCheckedFile(integrity_protected_file, write=False) as fd: + with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd: data1 = b'foo and bar' try: assert fd.read(len(data1)) == data1 diff --git a/src/borg/testsuite/hashindex.py b/src/borg/testsuite/hashindex.py index 116399071..120c01b44 100644 --- a/src/borg/testsuite/hashindex.py +++ b/src/borg/testsuite/hashindex.py @@ -6,6 +6,7 @@ import zlib from ..hashindex import NSIndex, ChunkIndex from .. import hashindex +from ..crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError from . import BaseTestCase # Note: these tests are part of the self test, do not use or import py.test functionality here. @@ -319,6 +320,27 @@ class HashIndexDataTestCase(BaseTestCase): assert idx1[H(3)] == (ChunkIndex.MAX_VALUE, 6, 7) +class HashIndexIntegrityTestCase(HashIndexDataTestCase): + def write_integrity_checked_index(self, tempdir): + idx = self._deserialize_hashindex(self.HASHINDEX) + file = os.path.join(tempdir, 'idx') + with IntegrityCheckedFile(path=file, write=True) as fd: + idx.write(fd) + integrity_data = fd.integrity_data + assert 'final' in integrity_data + assert 'HashHeader' in integrity_data + return file, integrity_data + + def test_integrity_checked_file(self): + with tempfile.TemporaryDirectory() as tempdir: + file, integrity_data = self.write_integrity_checked_index(tempdir) + with open(file, 'r+b') as fd: + fd.write(b'Foo') + with self.assert_raises(FileIntegrityError): + with IntegrityCheckedFile(path=file, write=False, integrity_data=integrity_data) as fd: + ChunkIndex.read(fd) + + class NSIndexTestCase(BaseTestCase): def test_nsindex_segment_limit(self): idx = NSIndex()