Merge pull request #2568 from enkore/issue/1101.integration.cache

1101.integration.cache
This commit is contained in:
enkore 2017-05-31 19:34:43 +02:00 committed by GitHub
commit 349a4ade7c
7 changed files with 277 additions and 68 deletions

View file

@ -291,6 +291,20 @@ hashindex_read(PyObject *file_py)
goto fail_decref_header;
}
/*
* Hash the header
* If the header is corrupted this bails before doing something stupid (like allocating 3.8 TB of memory)
*/
Py_XDECREF(PyObject_CallMethod(file_py, "hash_part", "s", "HashHeader"));
if(PyErr_Occurred()) {
if(PyErr_ExceptionMatches(PyExc_AttributeError)) {
/* Be able to work with regular file objects which do not have a hash_part method. */
PyErr_Clear();
} else {
goto fail_decref_header;
}
}
/* Find length of file */
length_object = PyObject_CallMethod(file_py, "seek", "ni", (Py_ssize_t)0, SEEK_END);
if(PyErr_Occurred()) {
@ -473,6 +487,19 @@ hashindex_write(HashIndex *index, PyObject *file_py)
return;
}
/*
* Hash the header
*/
Py_XDECREF(PyObject_CallMethod(file_py, "hash_part", "s", "HashHeader"));
if(PyErr_Occurred()) {
if(PyErr_ExceptionMatches(PyExc_AttributeError)) {
/* Be able to work with regular file objects which do not have a hash_part method. */
PyErr_Clear();
} else {
return;
}
}
/* Note: explicitly construct view; BuildValue can convert (pointer, length) to Python objects, but copies them for doing so */
buckets_view = PyMemoryView_FromMemory((char*)index->buckets, buckets_length, PyBUF_READ);
if(!buckets_view) {

View file

@ -22,8 +22,10 @@ from .helpers import safe_ns
from .helpers import yes, hostname_is_unique
from .helpers import remove_surrogates
from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage
from .helpers import set_ec, EXIT_WARNING
from .item import ArchiveItem, ChunkListEntry
from .crypto.key import PlaintextKey
from .crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError
from .locking import Lock
from .platform import SaveFile
from .remote import cache_if_remote
@ -237,6 +239,8 @@ class CacheConfig:
config.set('cache', 'version', '1')
config.set('cache', 'repository', self.repository.id_str)
config.set('cache', 'manifest', '')
config.add_section('integrity')
config.set('integrity', 'manifest', '')
with SaveFile(self.config_path) as fd:
config.write(fd)
@ -253,6 +257,20 @@ class CacheConfig:
self.manifest_id = unhexlify(self._config.get('cache', 'manifest'))
self.timestamp = self._config.get('cache', 'timestamp', fallback=None)
self.key_type = self._config.get('cache', 'key_type', fallback=None)
try:
self.integrity = dict(self._config.items('integrity'))
if self._config.get('cache', 'manifest') != self.integrity.pop('manifest'):
# The cache config file is updated (parsed with ConfigParser, the state of the ConfigParser
# is modified and then written out.), not re-created.
# Thus, older versions will leave our [integrity] section alone, making the section's data invalid.
# Therefore, we also add the manifest ID to this section and
# can discern whether an older version interfered by comparing the manifest IDs of this section
# and the main [cache] section.
self.integrity = {}
logger.warning('Cache integrity data not available: old Borg version modified the cache.')
except configparser.NoSectionError:
logger.debug('Cache integrity: No integrity data found (files, chunks). Cache is from old version.')
self.integrity = {}
previous_location = self._config.get('cache', 'previous_location', fallback=None)
if previous_location:
self.previous_location = recanonicalize_relative_location(previous_location, self.repository)
@ -263,6 +281,11 @@ class CacheConfig:
if manifest:
self._config.set('cache', 'manifest', manifest.id_str)
self._config.set('cache', 'timestamp', manifest.timestamp)
if not self._config.has_section('integrity'):
self._config.add_section('integrity')
for file, integrity_data in self.integrity.items():
self._config.set('integrity', file, integrity_data)
self._config.set('integrity', 'manifest', manifest.id_str)
if key:
self._config.set('cache', 'key_type', str(key.TYPE))
self._config.set('cache', 'previous_location', self.repository._location.canonical_path())
@ -392,14 +415,16 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
with open(os.path.join(self.path, 'README'), 'w') as fd:
fd.write(CACHE_README)
self.cache_config.create()
ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8'))
ChunkIndex().write(os.path.join(self.path, 'chunks'))
os.makedirs(os.path.join(self.path, 'chunks.archive.d'))
with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd:
pass # empty file
def _do_open(self):
self.cache_config.load()
self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8'))
with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=False,
integrity_data=self.cache_config.integrity.get('chunks')) as fd:
self.chunks = ChunkIndex.read(fd)
self.files = None
def open(self):
@ -417,7 +442,9 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
self.files = {}
self._newest_mtime = None
logger.debug('Reading files cache ...')
with open(os.path.join(self.path, 'files'), 'rb') as fd:
with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=False,
integrity_data=self.cache_config.integrity.get('files')) as fd:
u = msgpack.Unpacker(use_list=True)
while True:
data = fd.read(64 * 1024)
@ -458,7 +485,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
self._newest_mtime = 2 ** 63 - 1 # nanoseconds, good until y2262
ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20))
pi.output('Saving files cache')
with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd:
with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=True) as fd:
for path_hash, item in self.files.items():
# Only keep files seen in this backup that are older than newest mtime seen in this backup -
# this is to avoid issues with filesystem snapshots and mtime granularity.
@ -467,10 +494,13 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
if entry.age == 0 and bigint_to_int(entry.mtime) < self._newest_mtime or \
entry.age > 0 and entry.age < ttl:
msgpack.pack((path_hash, entry), fd)
self.cache_config.integrity['files'] = fd.integrity_data
pi.output('Saving chunks cache')
with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=True) as fd:
self.chunks.write(fd)
self.cache_config.integrity['chunks'] = fd.integrity_data
pi.output('Saving cache config')
self.cache_config.save(self.manifest, self.key)
pi.output('Saving chunks cache')
self.chunks.write(os.path.join(self.path, 'chunks').encode('utf-8'))
os.rename(os.path.join(self.path, 'txn.active'),
os.path.join(self.path, 'txn.tmp'))
shutil.rmtree(os.path.join(self.path, 'txn.tmp'))
@ -510,7 +540,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
def mkpath(id, suffix=''):
id_hex = bin_to_hex(id)
path = os.path.join(archive_path, id_hex + suffix)
return path.encode('utf-8')
return path
def cached_archives():
if self.do_cache:
@ -525,7 +555,14 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
def cleanup_outdated(ids):
for id in ids:
os.unlink(mkpath(id))
cleanup_cached_archive(id)
def cleanup_cached_archive(id):
os.unlink(mkpath(id))
try:
os.unlink(mkpath(id) + '.integrity')
except FileNotFoundError:
pass
def fetch_and_build_idx(archive_id, repository, key, chunk_idx):
cdata = repository.get(archive_id)
@ -542,14 +579,16 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
for item in unpacker:
if not isinstance(item, dict):
logger.error('Error: Did not get expected metadata dict - archive corrupted!')
continue
continue # XXX: continue?!
for chunk_id, size, csize in item.get(b'chunks', []):
chunk_idx.add(chunk_id, 1, size, csize)
if self.do_cache:
fn = mkpath(archive_id)
fn_tmp = mkpath(archive_id, suffix='.tmp')
try:
chunk_idx.write(fn_tmp)
with DetachedIntegrityCheckedFile(path=fn_tmp, write=True,
filename=bin_to_hex(archive_id)) as fd:
chunk_idx.write(fd)
except Exception:
os.unlink(fn_tmp)
else:
@ -564,9 +603,9 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
logger.info('Synchronizing chunks cache...')
cached_ids = cached_archives()
archive_ids = repo_archives()
logger.info('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.' % (
logger.info('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.',
len(archive_ids), len(cached_ids),
len(cached_ids - archive_ids), len(archive_ids - cached_ids), ))
len(cached_ids - archive_ids), len(archive_ids - cached_ids))
# deallocates old hashindex, creates empty hashindex:
chunk_idx.clear()
cleanup_outdated(cached_ids - archive_ids)
@ -583,10 +622,20 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
if self.do_cache:
if archive_id in cached_ids:
archive_chunk_idx_path = mkpath(archive_id)
logger.info("Reading cached archive chunk index for %s ..." % archive_name)
archive_chunk_idx = ChunkIndex.read(archive_chunk_idx_path)
else:
logger.info('Fetching and building archive index for %s ...' % archive_name)
logger.info("Reading cached archive chunk index for %s ...", archive_name)
try:
with DetachedIntegrityCheckedFile(path=archive_chunk_idx_path, write=False) as fd:
archive_chunk_idx = ChunkIndex.read(fd)
except FileIntegrityError as fie:
logger.error('Cached archive chunk index of %s is corrupted: %s', archive_name, fie)
# Delete it and fetch a new index
cleanup_cached_archive(archive_id)
cached_ids.remove(archive_id)
set_ec(EXIT_WARNING)
if archive_id not in cached_ids:
# Do not make this an else branch; the FileIntegrityError exception handler
# above can remove *archive_id* from *cached_ids*.
logger.info('Fetching and building archive index for %s ...', archive_name)
archive_chunk_idx = ChunkIndex()
fetch_and_build_idx(archive_id, repository, self.key, archive_chunk_idx)
logger.info("Merging into master chunks index ...")
@ -599,7 +648,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
chunk_idx.merge(archive_chunk_idx)
else:
chunk_idx = chunk_idx or ChunkIndex()
logger.info('Fetching archive index for %s ...' % archive_name)
logger.info('Fetching archive index for %s ...', archive_name)
fetch_and_build_idx(archive_id, repository, self.key, chunk_idx)
if self.progress:
pi.finish()

View file

@ -104,7 +104,7 @@ class FileIntegrityError(IntegrityError):
class IntegrityCheckedFile(FileLikeWrapper):
def __init__(self, path, write, filename=None, override_fd=None):
def __init__(self, path, write, filename=None, override_fd=None, integrity_data=None):
self.path = path
self.writing = write
mode = 'wb' if write else 'rb'
@ -114,10 +114,10 @@ class IntegrityCheckedFile(FileLikeWrapper):
self.hash_filename(filename)
if write:
if write or not integrity_data:
self.digests = {}
else:
self.digests = self.read_integrity_file(path, self.hasher)
self.digests = self.parse_integrity_data(path, integrity_data, self.hasher)
# TODO: When we're reading but don't have any digests, i.e. no integrity file existed,
# TODO: then we could just short-circuit.
@ -126,37 +126,33 @@ class IntegrityCheckedFile(FileLikeWrapper):
# In Borg the name itself encodes the context (eg. index.N, cache, files),
# while the path doesn't matter, and moving e.g. a repository or cache directory is supported.
# Changing the name however imbues a change of context that is not permissible.
# While Borg does not use anything except ASCII in these file names, it's important to use
# the same encoding everywhere for portability. Using os.fsencode() would be wrong.
filename = os.path.basename(filename or self.path)
self.hasher.update(('%10d' % len(filename)).encode())
self.hasher.update(filename.encode())
@staticmethod
def integrity_file_path(path):
return path + '.integrity'
@classmethod
def read_integrity_file(cls, path, hasher):
def parse_integrity_data(cls, path: str, data: str, hasher: SHA512FileHashingWrapper):
try:
with open(cls.integrity_file_path(path), 'r') as fd:
integrity_file = json.load(fd)
# Provisions for agility now, implementation later, but make sure the on-disk joint is oiled.
algorithm = integrity_file['algorithm']
if algorithm != hasher.ALGORITHM:
logger.warning('Cannot verify integrity of %s: Unknown algorithm %r', path, algorithm)
return
digests = integrity_file['digests']
# Require at least presence of the final digest
digests['final']
return digests
except FileNotFoundError:
logger.info('No integrity file found for %s', path)
except (OSError, ValueError, TypeError, KeyError) as e:
logger.warning('Could not read integrity file for %s: %s', path, e)
integrity_data = json.loads(data)
# Provisions for agility now, implementation later, but make sure the on-disk joint is oiled.
algorithm = integrity_data['algorithm']
if algorithm != hasher.ALGORITHM:
logger.warning('Cannot verify integrity of %s: Unknown algorithm %r', path, algorithm)
return
digests = integrity_data['digests']
# Require at least presence of the final digest
digests['final']
return digests
except (ValueError, TypeError, KeyError) as e:
logger.warning('Could not parse integrity data for %s: %s', path, e)
raise FileIntegrityError(path)
def hash_part(self, partname, is_final=False):
if not self.writing and not self.digests:
return
self.hasher.update(('%10d' % len(partname)).encode())
self.hasher.update(partname.encode())
self.hasher.hash_length(seek_to_end=is_final)
digest = self.hasher.hexdigest()
@ -173,10 +169,41 @@ class IntegrityCheckedFile(FileLikeWrapper):
if exception:
return
if self.writing:
with open(self.integrity_file_path(self.path), 'w') as fd:
json.dump({
'algorithm': self.hasher.ALGORITHM,
'digests': self.digests,
}, fd)
self.store_integrity_data(json.dumps({
'algorithm': self.hasher.ALGORITHM,
'digests': self.digests,
}))
elif self.digests:
logger.debug('Verified integrity of %s', self.path)
def store_integrity_data(self, data: str):
self.integrity_data = data
class DetachedIntegrityCheckedFile(IntegrityCheckedFile):
def __init__(self, path, write, filename=None, override_fd=None):
super().__init__(path, write, filename, override_fd)
filename = filename or os.path.basename(path)
output_dir = os.path.dirname(path)
self.output_integrity_file = self.integrity_file_path(os.path.join(output_dir, filename))
if not write:
self.digests = self.read_integrity_file(self.path, self.hasher)
@staticmethod
def integrity_file_path(path):
return path + '.integrity'
@classmethod
def read_integrity_file(cls, path, hasher):
try:
with open(cls.integrity_file_path(path), 'r') as fd:
return cls.parse_integrity_data(path, fd.read(), hasher)
except FileNotFoundError:
logger.info('No integrity file found for %s', path)
except OSError as e:
logger.warning('Could not read integrity file for %s: %s', path, e)
raise FileIntegrityError(path)
def store_integrity_data(self, data: str):
with open(self.output_integrity_file, 'w') as fd:
fd.write(data)

View file

@ -67,8 +67,11 @@ cdef class IndexBase:
def __cinit__(self, capacity=0, path=None, key_size=32):
self.key_size = key_size
if path:
with open(path, 'rb') as fd:
self.index = hashindex_read(fd)
if isinstance(path, (str, bytes)):
with open(path, 'rb') as fd:
self.index = hashindex_read(fd)
else:
self.index = hashindex_read(path)
assert self.index, 'hashindex_read() returned NULL with no exception set'
else:
self.index = hashindex_init(capacity, self.key_size, self.value_size)
@ -84,8 +87,11 @@ cdef class IndexBase:
return cls(path=path)
def write(self, path):
with open(path, 'wb') as fd:
hashindex_write(self.index, fd)
if isinstance(path, (str, bytes)):
with open(path, 'wb') as fd:
hashindex_write(self.index, fd)
else:
hashindex_write(self.index, path)
def clear(self):
hashindex_free(self.index)

View file

@ -1,5 +1,6 @@
import argparse
import errno
import io
import json
import logging
import os
@ -37,6 +38,7 @@ from ..constants import * # NOQA
from ..crypto.low_level import bytes_to_long, num_aes_blocks
from ..crypto.key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError
from ..crypto.keymanager import RepoIdMismatch, NotABorgKeyFile
from ..crypto.file_integrity import FileIntegrityError
from ..helpers import Location, get_security_dir
from ..helpers import Manifest
from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
@ -2886,6 +2888,82 @@ class RemoteArchiverTestCase(ArchiverTestCase):
self.assert_true(marker not in res)
class ArchiverCorruptionTestCase(ArchiverTestCaseBase):
def setUp(self):
super().setUp()
self.create_test_files()
self.cmd('init', '--encryption=repokey', self.repository_location)
self.cache_path = json.loads(self.cmd('info', self.repository_location, '--json'))['cache']['path']
def corrupt(self, file):
with open(file, 'r+b') as fd:
fd.seek(-1, io.SEEK_END)
fd.write(b'1')
def test_cache_chunks(self):
self.corrupt(os.path.join(self.cache_path, 'chunks'))
if self.FORK_DEFAULT:
out = self.cmd('info', self.repository_location, exit_code=2)
assert 'failed integrity check' in out
else:
with pytest.raises(FileIntegrityError):
self.cmd('info', self.repository_location)
def test_cache_files(self):
self.cmd('create', self.repository_location + '::test', 'input')
self.corrupt(os.path.join(self.cache_path, 'files'))
if self.FORK_DEFAULT:
out = self.cmd('create', self.repository_location + '::test1', 'input', exit_code=2)
assert 'failed integrity check' in out
else:
with pytest.raises(FileIntegrityError):
self.cmd('create', self.repository_location + '::test1', 'input')
def test_chunks_archive(self):
self.cmd('create', self.repository_location + '::test1', 'input')
# Find ID of test1 so we can corrupt it later :)
target_id = self.cmd('list', self.repository_location, '--format={id}{LF}').strip()
self.cmd('create', self.repository_location + '::test2', 'input')
# Force cache sync, creating archive chunks of test1 and test2 in chunks.archive.d
self.cmd('delete', '--cache-only', self.repository_location)
self.cmd('info', self.repository_location, '--json')
chunks_archive = os.path.join(self.cache_path, 'chunks.archive.d')
assert len(os.listdir(chunks_archive)) == 4 # two archives, one chunks cache and one .integrity file each
self.corrupt(os.path.join(chunks_archive, target_id))
# Trigger cache sync by changing the manifest ID in the cache config
config_path = os.path.join(self.cache_path, 'config')
config = ConfigParser(interpolation=None)
config.read(config_path)
config.set('cache', 'manifest', bin_to_hex(bytes(32)))
with open(config_path, 'w') as fd:
config.write(fd)
# Cache sync notices corrupted archive chunks, but automatically recovers.
out = self.cmd('create', '-v', self.repository_location + '::test3', 'input', exit_code=1)
assert 'Reading cached archive chunk index for test1' in out
assert 'Cached archive chunk index of test1 is corrupted' in out
assert 'Fetching and building archive index for test1' in out
def test_old_version_interfered(self):
# Modify the main manifest ID without touching the manifest ID in the integrity section.
# This happens if a version without integrity checking modifies the cache.
config_path = os.path.join(self.cache_path, 'config')
config = ConfigParser(interpolation=None)
config.read(config_path)
config.set('cache', 'manifest', bin_to_hex(bytes(32)))
with open(config_path, 'w') as fd:
config.write(fd)
out = self.cmd('info', self.repository_location)
assert 'Cache integrity data not available: old Borg version modified the cache.' in out
class DiffArchiverTestCase(ArchiverTestCaseBase):
def test_basic_functionality(self):
# Initialize test folder

View file

@ -1,21 +1,21 @@
import pytest
from ..crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError
from ..crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError
class TestReadIntegrityFile:
def test_no_integrity(self, tmpdir):
protected_file = tmpdir.join('file')
protected_file.write('1234')
assert IntegrityCheckedFile.read_integrity_file(str(protected_file), None) is None
assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None) is None
def test_truncated_integrity(self, tmpdir):
protected_file = tmpdir.join('file')
protected_file.write('1234')
tmpdir.join('file.integrity').write('')
with pytest.raises(FileIntegrityError):
IntegrityCheckedFile.read_integrity_file(str(protected_file), None)
DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None)
def test_unknown_algorithm(self, tmpdir):
class SomeHasher:
@ -24,7 +24,7 @@ class TestReadIntegrityFile:
protected_file = tmpdir.join('file')
protected_file.write('1234')
tmpdir.join('file.integrity').write('{"algorithm": "HMAC_SERIOUSHASH", "digests": "1234"}')
assert IntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) is None
assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) is None
@pytest.mark.parametrize('json', (
'{"ALGORITHM": "HMAC_SERIOUSHASH", "digests": "1234"}',
@ -38,7 +38,7 @@ class TestReadIntegrityFile:
protected_file.write('1234')
tmpdir.join('file.integrity').write(json)
with pytest.raises(FileIntegrityError):
IntegrityCheckedFile.read_integrity_file(str(protected_file), None)
DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None)
def test_valid(self, tmpdir):
class SomeHasher:
@ -47,35 +47,35 @@ class TestReadIntegrityFile:
protected_file = tmpdir.join('file')
protected_file.write('1234')
tmpdir.join('file.integrity').write('{"algorithm": "HMAC_FOO1", "digests": {"final": "1234"}}')
assert IntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) == {'final': '1234'}
assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) == {'final': '1234'}
class TestIntegrityCheckedFile:
class TestDetachedIntegrityCheckedFile:
@pytest.fixture
def integrity_protected_file(self, tmpdir):
path = str(tmpdir.join('file'))
with IntegrityCheckedFile(path, write=True) as fd:
with DetachedIntegrityCheckedFile(path, write=True) as fd:
fd.write(b'foo and bar')
return path
def test_simple(self, tmpdir, integrity_protected_file):
assert tmpdir.join('file').check(file=True)
assert tmpdir.join('file.integrity').check(file=True)
with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
assert fd.read() == b'foo and bar'
def test_corrupted_file(self, integrity_protected_file):
with open(integrity_protected_file, 'ab') as fd:
fd.write(b' extra data')
with pytest.raises(FileIntegrityError):
with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
assert fd.read() == b'foo and bar extra data'
def test_corrupted_file_partial_read(self, integrity_protected_file):
with open(integrity_protected_file, 'ab') as fd:
fd.write(b' extra data')
with pytest.raises(FileIntegrityError):
with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
data = b'foo and bar'
assert fd.read(len(data)) == data
@ -88,7 +88,7 @@ class TestIntegrityCheckedFile:
tmpdir.join('file').move(new_path)
tmpdir.join('file.integrity').move(new_path + '.integrity')
with pytest.raises(FileIntegrityError):
with IntegrityCheckedFile(str(new_path), write=False) as fd:
with DetachedIntegrityCheckedFile(str(new_path), write=False) as fd:
assert fd.read() == b'foo and bar'
def test_moved_file(self, tmpdir, integrity_protected_file):
@ -96,27 +96,27 @@ class TestIntegrityCheckedFile:
tmpdir.join('file').move(new_dir.join('file'))
tmpdir.join('file.integrity').move(new_dir.join('file.integrity'))
new_path = str(new_dir.join('file'))
with IntegrityCheckedFile(new_path, write=False) as fd:
with DetachedIntegrityCheckedFile(new_path, write=False) as fd:
assert fd.read() == b'foo and bar'
def test_no_integrity(self, tmpdir, integrity_protected_file):
tmpdir.join('file.integrity').remove()
with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
assert fd.read() == b'foo and bar'
class TestIntegrityCheckedFileParts:
class TestDetachedIntegrityCheckedFileParts:
@pytest.fixture
def integrity_protected_file(self, tmpdir):
path = str(tmpdir.join('file'))
with IntegrityCheckedFile(path, write=True) as fd:
with DetachedIntegrityCheckedFile(path, write=True) as fd:
fd.write(b'foo and bar')
fd.hash_part('foopart')
fd.write(b' other data')
return path
def test_simple(self, integrity_protected_file):
with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
data1 = b'foo and bar'
assert fd.read(len(data1)) == data1
fd.hash_part('foopart')
@ -127,7 +127,7 @@ class TestIntegrityCheckedFileParts:
# Because some hash_part failed, the final digest will fail as well - again - even if we catch
# the failing hash_part. This is intentional: (1) it makes the code simpler (2) it's a good fail-safe
# against overly broad exception handling.
with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
data1 = b'foo and bar'
assert fd.read(len(data1)) == data1
with pytest.raises(FileIntegrityError):
@ -140,7 +140,7 @@ class TestIntegrityCheckedFileParts:
with open(integrity_protected_file, 'ab') as fd:
fd.write(b'some extra stuff that does not belong')
with pytest.raises(FileIntegrityError):
with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
data1 = b'foo and bar'
try:
assert fd.read(len(data1)) == data1

View file

@ -6,6 +6,7 @@ import zlib
from ..hashindex import NSIndex, ChunkIndex
from .. import hashindex
from ..crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError
from . import BaseTestCase
# Note: these tests are part of the self test, do not use or import py.test functionality here.
@ -319,6 +320,27 @@ class HashIndexDataTestCase(BaseTestCase):
assert idx1[H(3)] == (ChunkIndex.MAX_VALUE, 6, 7)
class HashIndexIntegrityTestCase(HashIndexDataTestCase):
def write_integrity_checked_index(self, tempdir):
idx = self._deserialize_hashindex(self.HASHINDEX)
file = os.path.join(tempdir, 'idx')
with IntegrityCheckedFile(path=file, write=True) as fd:
idx.write(fd)
integrity_data = fd.integrity_data
assert 'final' in integrity_data
assert 'HashHeader' in integrity_data
return file, integrity_data
def test_integrity_checked_file(self):
with tempfile.TemporaryDirectory() as tempdir:
file, integrity_data = self.write_integrity_checked_index(tempdir)
with open(file, 'r+b') as fd:
fd.write(b'Foo')
with self.assert_raises(FileIntegrityError):
with IntegrityCheckedFile(path=file, write=False, integrity_data=integrity_data) as fd:
ChunkIndex.read(fd)
class NSIndexTestCase(BaseTestCase):
def test_nsindex_segment_limit(self):
idx = NSIndex()