mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-09 00:32:37 -04:00
Merge pull request #2568 from enkore/issue/1101.integration.cache
1101.integration.cache
This commit is contained in:
commit
349a4ade7c
7 changed files with 277 additions and 68 deletions
|
|
@ -291,6 +291,20 @@ hashindex_read(PyObject *file_py)
|
|||
goto fail_decref_header;
|
||||
}
|
||||
|
||||
/*
|
||||
* Hash the header
|
||||
* If the header is corrupted this bails before doing something stupid (like allocating 3.8 TB of memory)
|
||||
*/
|
||||
Py_XDECREF(PyObject_CallMethod(file_py, "hash_part", "s", "HashHeader"));
|
||||
if(PyErr_Occurred()) {
|
||||
if(PyErr_ExceptionMatches(PyExc_AttributeError)) {
|
||||
/* Be able to work with regular file objects which do not have a hash_part method. */
|
||||
PyErr_Clear();
|
||||
} else {
|
||||
goto fail_decref_header;
|
||||
}
|
||||
}
|
||||
|
||||
/* Find length of file */
|
||||
length_object = PyObject_CallMethod(file_py, "seek", "ni", (Py_ssize_t)0, SEEK_END);
|
||||
if(PyErr_Occurred()) {
|
||||
|
|
@ -473,6 +487,19 @@ hashindex_write(HashIndex *index, PyObject *file_py)
|
|||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Hash the header
|
||||
*/
|
||||
Py_XDECREF(PyObject_CallMethod(file_py, "hash_part", "s", "HashHeader"));
|
||||
if(PyErr_Occurred()) {
|
||||
if(PyErr_ExceptionMatches(PyExc_AttributeError)) {
|
||||
/* Be able to work with regular file objects which do not have a hash_part method. */
|
||||
PyErr_Clear();
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Note: explicitly construct view; BuildValue can convert (pointer, length) to Python objects, but copies them for doing so */
|
||||
buckets_view = PyMemoryView_FromMemory((char*)index->buckets, buckets_length, PyBUF_READ);
|
||||
if(!buckets_view) {
|
||||
|
|
|
|||
|
|
@ -22,8 +22,10 @@ from .helpers import safe_ns
|
|||
from .helpers import yes, hostname_is_unique
|
||||
from .helpers import remove_surrogates
|
||||
from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage
|
||||
from .helpers import set_ec, EXIT_WARNING
|
||||
from .item import ArchiveItem, ChunkListEntry
|
||||
from .crypto.key import PlaintextKey
|
||||
from .crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError
|
||||
from .locking import Lock
|
||||
from .platform import SaveFile
|
||||
from .remote import cache_if_remote
|
||||
|
|
@ -237,6 +239,8 @@ class CacheConfig:
|
|||
config.set('cache', 'version', '1')
|
||||
config.set('cache', 'repository', self.repository.id_str)
|
||||
config.set('cache', 'manifest', '')
|
||||
config.add_section('integrity')
|
||||
config.set('integrity', 'manifest', '')
|
||||
with SaveFile(self.config_path) as fd:
|
||||
config.write(fd)
|
||||
|
||||
|
|
@ -253,6 +257,20 @@ class CacheConfig:
|
|||
self.manifest_id = unhexlify(self._config.get('cache', 'manifest'))
|
||||
self.timestamp = self._config.get('cache', 'timestamp', fallback=None)
|
||||
self.key_type = self._config.get('cache', 'key_type', fallback=None)
|
||||
try:
|
||||
self.integrity = dict(self._config.items('integrity'))
|
||||
if self._config.get('cache', 'manifest') != self.integrity.pop('manifest'):
|
||||
# The cache config file is updated (parsed with ConfigParser, the state of the ConfigParser
|
||||
# is modified and then written out.), not re-created.
|
||||
# Thus, older versions will leave our [integrity] section alone, making the section's data invalid.
|
||||
# Therefore, we also add the manifest ID to this section and
|
||||
# can discern whether an older version interfered by comparing the manifest IDs of this section
|
||||
# and the main [cache] section.
|
||||
self.integrity = {}
|
||||
logger.warning('Cache integrity data not available: old Borg version modified the cache.')
|
||||
except configparser.NoSectionError:
|
||||
logger.debug('Cache integrity: No integrity data found (files, chunks). Cache is from old version.')
|
||||
self.integrity = {}
|
||||
previous_location = self._config.get('cache', 'previous_location', fallback=None)
|
||||
if previous_location:
|
||||
self.previous_location = recanonicalize_relative_location(previous_location, self.repository)
|
||||
|
|
@ -263,6 +281,11 @@ class CacheConfig:
|
|||
if manifest:
|
||||
self._config.set('cache', 'manifest', manifest.id_str)
|
||||
self._config.set('cache', 'timestamp', manifest.timestamp)
|
||||
if not self._config.has_section('integrity'):
|
||||
self._config.add_section('integrity')
|
||||
for file, integrity_data in self.integrity.items():
|
||||
self._config.set('integrity', file, integrity_data)
|
||||
self._config.set('integrity', 'manifest', manifest.id_str)
|
||||
if key:
|
||||
self._config.set('cache', 'key_type', str(key.TYPE))
|
||||
self._config.set('cache', 'previous_location', self.repository._location.canonical_path())
|
||||
|
|
@ -392,14 +415,16 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
|
|||
with open(os.path.join(self.path, 'README'), 'w') as fd:
|
||||
fd.write(CACHE_README)
|
||||
self.cache_config.create()
|
||||
ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8'))
|
||||
ChunkIndex().write(os.path.join(self.path, 'chunks'))
|
||||
os.makedirs(os.path.join(self.path, 'chunks.archive.d'))
|
||||
with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd:
|
||||
pass # empty file
|
||||
|
||||
def _do_open(self):
|
||||
self.cache_config.load()
|
||||
self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8'))
|
||||
with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=False,
|
||||
integrity_data=self.cache_config.integrity.get('chunks')) as fd:
|
||||
self.chunks = ChunkIndex.read(fd)
|
||||
self.files = None
|
||||
|
||||
def open(self):
|
||||
|
|
@ -417,7 +442,9 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
|
|||
self.files = {}
|
||||
self._newest_mtime = None
|
||||
logger.debug('Reading files cache ...')
|
||||
with open(os.path.join(self.path, 'files'), 'rb') as fd:
|
||||
|
||||
with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=False,
|
||||
integrity_data=self.cache_config.integrity.get('files')) as fd:
|
||||
u = msgpack.Unpacker(use_list=True)
|
||||
while True:
|
||||
data = fd.read(64 * 1024)
|
||||
|
|
@ -458,7 +485,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
|
|||
self._newest_mtime = 2 ** 63 - 1 # nanoseconds, good until y2262
|
||||
ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20))
|
||||
pi.output('Saving files cache')
|
||||
with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd:
|
||||
with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=True) as fd:
|
||||
for path_hash, item in self.files.items():
|
||||
# Only keep files seen in this backup that are older than newest mtime seen in this backup -
|
||||
# this is to avoid issues with filesystem snapshots and mtime granularity.
|
||||
|
|
@ -467,10 +494,13 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
|
|||
if entry.age == 0 and bigint_to_int(entry.mtime) < self._newest_mtime or \
|
||||
entry.age > 0 and entry.age < ttl:
|
||||
msgpack.pack((path_hash, entry), fd)
|
||||
self.cache_config.integrity['files'] = fd.integrity_data
|
||||
pi.output('Saving chunks cache')
|
||||
with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=True) as fd:
|
||||
self.chunks.write(fd)
|
||||
self.cache_config.integrity['chunks'] = fd.integrity_data
|
||||
pi.output('Saving cache config')
|
||||
self.cache_config.save(self.manifest, self.key)
|
||||
pi.output('Saving chunks cache')
|
||||
self.chunks.write(os.path.join(self.path, 'chunks').encode('utf-8'))
|
||||
os.rename(os.path.join(self.path, 'txn.active'),
|
||||
os.path.join(self.path, 'txn.tmp'))
|
||||
shutil.rmtree(os.path.join(self.path, 'txn.tmp'))
|
||||
|
|
@ -510,7 +540,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
|
|||
def mkpath(id, suffix=''):
|
||||
id_hex = bin_to_hex(id)
|
||||
path = os.path.join(archive_path, id_hex + suffix)
|
||||
return path.encode('utf-8')
|
||||
return path
|
||||
|
||||
def cached_archives():
|
||||
if self.do_cache:
|
||||
|
|
@ -525,7 +555,14 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
|
|||
|
||||
def cleanup_outdated(ids):
|
||||
for id in ids:
|
||||
os.unlink(mkpath(id))
|
||||
cleanup_cached_archive(id)
|
||||
|
||||
def cleanup_cached_archive(id):
|
||||
os.unlink(mkpath(id))
|
||||
try:
|
||||
os.unlink(mkpath(id) + '.integrity')
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
def fetch_and_build_idx(archive_id, repository, key, chunk_idx):
|
||||
cdata = repository.get(archive_id)
|
||||
|
|
@ -542,14 +579,16 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
|
|||
for item in unpacker:
|
||||
if not isinstance(item, dict):
|
||||
logger.error('Error: Did not get expected metadata dict - archive corrupted!')
|
||||
continue
|
||||
continue # XXX: continue?!
|
||||
for chunk_id, size, csize in item.get(b'chunks', []):
|
||||
chunk_idx.add(chunk_id, 1, size, csize)
|
||||
if self.do_cache:
|
||||
fn = mkpath(archive_id)
|
||||
fn_tmp = mkpath(archive_id, suffix='.tmp')
|
||||
try:
|
||||
chunk_idx.write(fn_tmp)
|
||||
with DetachedIntegrityCheckedFile(path=fn_tmp, write=True,
|
||||
filename=bin_to_hex(archive_id)) as fd:
|
||||
chunk_idx.write(fd)
|
||||
except Exception:
|
||||
os.unlink(fn_tmp)
|
||||
else:
|
||||
|
|
@ -564,9 +603,9 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
|
|||
logger.info('Synchronizing chunks cache...')
|
||||
cached_ids = cached_archives()
|
||||
archive_ids = repo_archives()
|
||||
logger.info('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.' % (
|
||||
logger.info('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.',
|
||||
len(archive_ids), len(cached_ids),
|
||||
len(cached_ids - archive_ids), len(archive_ids - cached_ids), ))
|
||||
len(cached_ids - archive_ids), len(archive_ids - cached_ids))
|
||||
# deallocates old hashindex, creates empty hashindex:
|
||||
chunk_idx.clear()
|
||||
cleanup_outdated(cached_ids - archive_ids)
|
||||
|
|
@ -583,10 +622,20 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
|
|||
if self.do_cache:
|
||||
if archive_id in cached_ids:
|
||||
archive_chunk_idx_path = mkpath(archive_id)
|
||||
logger.info("Reading cached archive chunk index for %s ..." % archive_name)
|
||||
archive_chunk_idx = ChunkIndex.read(archive_chunk_idx_path)
|
||||
else:
|
||||
logger.info('Fetching and building archive index for %s ...' % archive_name)
|
||||
logger.info("Reading cached archive chunk index for %s ...", archive_name)
|
||||
try:
|
||||
with DetachedIntegrityCheckedFile(path=archive_chunk_idx_path, write=False) as fd:
|
||||
archive_chunk_idx = ChunkIndex.read(fd)
|
||||
except FileIntegrityError as fie:
|
||||
logger.error('Cached archive chunk index of %s is corrupted: %s', archive_name, fie)
|
||||
# Delete it and fetch a new index
|
||||
cleanup_cached_archive(archive_id)
|
||||
cached_ids.remove(archive_id)
|
||||
set_ec(EXIT_WARNING)
|
||||
if archive_id not in cached_ids:
|
||||
# Do not make this an else branch; the FileIntegrityError exception handler
|
||||
# above can remove *archive_id* from *cached_ids*.
|
||||
logger.info('Fetching and building archive index for %s ...', archive_name)
|
||||
archive_chunk_idx = ChunkIndex()
|
||||
fetch_and_build_idx(archive_id, repository, self.key, archive_chunk_idx)
|
||||
logger.info("Merging into master chunks index ...")
|
||||
|
|
@ -599,7 +648,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
|
|||
chunk_idx.merge(archive_chunk_idx)
|
||||
else:
|
||||
chunk_idx = chunk_idx or ChunkIndex()
|
||||
logger.info('Fetching archive index for %s ...' % archive_name)
|
||||
logger.info('Fetching archive index for %s ...', archive_name)
|
||||
fetch_and_build_idx(archive_id, repository, self.key, chunk_idx)
|
||||
if self.progress:
|
||||
pi.finish()
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ class FileIntegrityError(IntegrityError):
|
|||
|
||||
|
||||
class IntegrityCheckedFile(FileLikeWrapper):
|
||||
def __init__(self, path, write, filename=None, override_fd=None):
|
||||
def __init__(self, path, write, filename=None, override_fd=None, integrity_data=None):
|
||||
self.path = path
|
||||
self.writing = write
|
||||
mode = 'wb' if write else 'rb'
|
||||
|
|
@ -114,10 +114,10 @@ class IntegrityCheckedFile(FileLikeWrapper):
|
|||
|
||||
self.hash_filename(filename)
|
||||
|
||||
if write:
|
||||
if write or not integrity_data:
|
||||
self.digests = {}
|
||||
else:
|
||||
self.digests = self.read_integrity_file(path, self.hasher)
|
||||
self.digests = self.parse_integrity_data(path, integrity_data, self.hasher)
|
||||
# TODO: When we're reading but don't have any digests, i.e. no integrity file existed,
|
||||
# TODO: then we could just short-circuit.
|
||||
|
||||
|
|
@ -126,37 +126,33 @@ class IntegrityCheckedFile(FileLikeWrapper):
|
|||
# In Borg the name itself encodes the context (eg. index.N, cache, files),
|
||||
# while the path doesn't matter, and moving e.g. a repository or cache directory is supported.
|
||||
# Changing the name however imbues a change of context that is not permissible.
|
||||
# While Borg does not use anything except ASCII in these file names, it's important to use
|
||||
# the same encoding everywhere for portability. Using os.fsencode() would be wrong.
|
||||
filename = os.path.basename(filename or self.path)
|
||||
self.hasher.update(('%10d' % len(filename)).encode())
|
||||
self.hasher.update(filename.encode())
|
||||
|
||||
@staticmethod
|
||||
def integrity_file_path(path):
|
||||
return path + '.integrity'
|
||||
|
||||
@classmethod
|
||||
def read_integrity_file(cls, path, hasher):
|
||||
def parse_integrity_data(cls, path: str, data: str, hasher: SHA512FileHashingWrapper):
|
||||
try:
|
||||
with open(cls.integrity_file_path(path), 'r') as fd:
|
||||
integrity_file = json.load(fd)
|
||||
# Provisions for agility now, implementation later, but make sure the on-disk joint is oiled.
|
||||
algorithm = integrity_file['algorithm']
|
||||
if algorithm != hasher.ALGORITHM:
|
||||
logger.warning('Cannot verify integrity of %s: Unknown algorithm %r', path, algorithm)
|
||||
return
|
||||
digests = integrity_file['digests']
|
||||
# Require at least presence of the final digest
|
||||
digests['final']
|
||||
return digests
|
||||
except FileNotFoundError:
|
||||
logger.info('No integrity file found for %s', path)
|
||||
except (OSError, ValueError, TypeError, KeyError) as e:
|
||||
logger.warning('Could not read integrity file for %s: %s', path, e)
|
||||
integrity_data = json.loads(data)
|
||||
# Provisions for agility now, implementation later, but make sure the on-disk joint is oiled.
|
||||
algorithm = integrity_data['algorithm']
|
||||
if algorithm != hasher.ALGORITHM:
|
||||
logger.warning('Cannot verify integrity of %s: Unknown algorithm %r', path, algorithm)
|
||||
return
|
||||
digests = integrity_data['digests']
|
||||
# Require at least presence of the final digest
|
||||
digests['final']
|
||||
return digests
|
||||
except (ValueError, TypeError, KeyError) as e:
|
||||
logger.warning('Could not parse integrity data for %s: %s', path, e)
|
||||
raise FileIntegrityError(path)
|
||||
|
||||
def hash_part(self, partname, is_final=False):
|
||||
if not self.writing and not self.digests:
|
||||
return
|
||||
self.hasher.update(('%10d' % len(partname)).encode())
|
||||
self.hasher.update(partname.encode())
|
||||
self.hasher.hash_length(seek_to_end=is_final)
|
||||
digest = self.hasher.hexdigest()
|
||||
|
|
@ -173,10 +169,41 @@ class IntegrityCheckedFile(FileLikeWrapper):
|
|||
if exception:
|
||||
return
|
||||
if self.writing:
|
||||
with open(self.integrity_file_path(self.path), 'w') as fd:
|
||||
json.dump({
|
||||
'algorithm': self.hasher.ALGORITHM,
|
||||
'digests': self.digests,
|
||||
}, fd)
|
||||
self.store_integrity_data(json.dumps({
|
||||
'algorithm': self.hasher.ALGORITHM,
|
||||
'digests': self.digests,
|
||||
}))
|
||||
elif self.digests:
|
||||
logger.debug('Verified integrity of %s', self.path)
|
||||
|
||||
def store_integrity_data(self, data: str):
|
||||
self.integrity_data = data
|
||||
|
||||
|
||||
class DetachedIntegrityCheckedFile(IntegrityCheckedFile):
|
||||
def __init__(self, path, write, filename=None, override_fd=None):
|
||||
super().__init__(path, write, filename, override_fd)
|
||||
filename = filename or os.path.basename(path)
|
||||
output_dir = os.path.dirname(path)
|
||||
self.output_integrity_file = self.integrity_file_path(os.path.join(output_dir, filename))
|
||||
if not write:
|
||||
self.digests = self.read_integrity_file(self.path, self.hasher)
|
||||
|
||||
@staticmethod
|
||||
def integrity_file_path(path):
|
||||
return path + '.integrity'
|
||||
|
||||
@classmethod
|
||||
def read_integrity_file(cls, path, hasher):
|
||||
try:
|
||||
with open(cls.integrity_file_path(path), 'r') as fd:
|
||||
return cls.parse_integrity_data(path, fd.read(), hasher)
|
||||
except FileNotFoundError:
|
||||
logger.info('No integrity file found for %s', path)
|
||||
except OSError as e:
|
||||
logger.warning('Could not read integrity file for %s: %s', path, e)
|
||||
raise FileIntegrityError(path)
|
||||
|
||||
def store_integrity_data(self, data: str):
|
||||
with open(self.output_integrity_file, 'w') as fd:
|
||||
fd.write(data)
|
||||
|
|
|
|||
|
|
@ -67,8 +67,11 @@ cdef class IndexBase:
|
|||
def __cinit__(self, capacity=0, path=None, key_size=32):
|
||||
self.key_size = key_size
|
||||
if path:
|
||||
with open(path, 'rb') as fd:
|
||||
self.index = hashindex_read(fd)
|
||||
if isinstance(path, (str, bytes)):
|
||||
with open(path, 'rb') as fd:
|
||||
self.index = hashindex_read(fd)
|
||||
else:
|
||||
self.index = hashindex_read(path)
|
||||
assert self.index, 'hashindex_read() returned NULL with no exception set'
|
||||
else:
|
||||
self.index = hashindex_init(capacity, self.key_size, self.value_size)
|
||||
|
|
@ -84,8 +87,11 @@ cdef class IndexBase:
|
|||
return cls(path=path)
|
||||
|
||||
def write(self, path):
|
||||
with open(path, 'wb') as fd:
|
||||
hashindex_write(self.index, fd)
|
||||
if isinstance(path, (str, bytes)):
|
||||
with open(path, 'wb') as fd:
|
||||
hashindex_write(self.index, fd)
|
||||
else:
|
||||
hashindex_write(self.index, path)
|
||||
|
||||
def clear(self):
|
||||
hashindex_free(self.index)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import argparse
|
||||
import errno
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
|
@ -37,6 +38,7 @@ from ..constants import * # NOQA
|
|||
from ..crypto.low_level import bytes_to_long, num_aes_blocks
|
||||
from ..crypto.key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError
|
||||
from ..crypto.keymanager import RepoIdMismatch, NotABorgKeyFile
|
||||
from ..crypto.file_integrity import FileIntegrityError
|
||||
from ..helpers import Location, get_security_dir
|
||||
from ..helpers import Manifest
|
||||
from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
|
||||
|
|
@ -2886,6 +2888,82 @@ class RemoteArchiverTestCase(ArchiverTestCase):
|
|||
self.assert_true(marker not in res)
|
||||
|
||||
|
||||
class ArchiverCorruptionTestCase(ArchiverTestCaseBase):
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.create_test_files()
|
||||
self.cmd('init', '--encryption=repokey', self.repository_location)
|
||||
self.cache_path = json.loads(self.cmd('info', self.repository_location, '--json'))['cache']['path']
|
||||
|
||||
def corrupt(self, file):
|
||||
with open(file, 'r+b') as fd:
|
||||
fd.seek(-1, io.SEEK_END)
|
||||
fd.write(b'1')
|
||||
|
||||
def test_cache_chunks(self):
|
||||
self.corrupt(os.path.join(self.cache_path, 'chunks'))
|
||||
|
||||
if self.FORK_DEFAULT:
|
||||
out = self.cmd('info', self.repository_location, exit_code=2)
|
||||
assert 'failed integrity check' in out
|
||||
else:
|
||||
with pytest.raises(FileIntegrityError):
|
||||
self.cmd('info', self.repository_location)
|
||||
|
||||
def test_cache_files(self):
|
||||
self.cmd('create', self.repository_location + '::test', 'input')
|
||||
self.corrupt(os.path.join(self.cache_path, 'files'))
|
||||
|
||||
if self.FORK_DEFAULT:
|
||||
out = self.cmd('create', self.repository_location + '::test1', 'input', exit_code=2)
|
||||
assert 'failed integrity check' in out
|
||||
else:
|
||||
with pytest.raises(FileIntegrityError):
|
||||
self.cmd('create', self.repository_location + '::test1', 'input')
|
||||
|
||||
def test_chunks_archive(self):
|
||||
self.cmd('create', self.repository_location + '::test1', 'input')
|
||||
# Find ID of test1 so we can corrupt it later :)
|
||||
target_id = self.cmd('list', self.repository_location, '--format={id}{LF}').strip()
|
||||
self.cmd('create', self.repository_location + '::test2', 'input')
|
||||
|
||||
# Force cache sync, creating archive chunks of test1 and test2 in chunks.archive.d
|
||||
self.cmd('delete', '--cache-only', self.repository_location)
|
||||
self.cmd('info', self.repository_location, '--json')
|
||||
|
||||
chunks_archive = os.path.join(self.cache_path, 'chunks.archive.d')
|
||||
assert len(os.listdir(chunks_archive)) == 4 # two archives, one chunks cache and one .integrity file each
|
||||
|
||||
self.corrupt(os.path.join(chunks_archive, target_id))
|
||||
|
||||
# Trigger cache sync by changing the manifest ID in the cache config
|
||||
config_path = os.path.join(self.cache_path, 'config')
|
||||
config = ConfigParser(interpolation=None)
|
||||
config.read(config_path)
|
||||
config.set('cache', 'manifest', bin_to_hex(bytes(32)))
|
||||
with open(config_path, 'w') as fd:
|
||||
config.write(fd)
|
||||
|
||||
# Cache sync notices corrupted archive chunks, but automatically recovers.
|
||||
out = self.cmd('create', '-v', self.repository_location + '::test3', 'input', exit_code=1)
|
||||
assert 'Reading cached archive chunk index for test1' in out
|
||||
assert 'Cached archive chunk index of test1 is corrupted' in out
|
||||
assert 'Fetching and building archive index for test1' in out
|
||||
|
||||
def test_old_version_interfered(self):
|
||||
# Modify the main manifest ID without touching the manifest ID in the integrity section.
|
||||
# This happens if a version without integrity checking modifies the cache.
|
||||
config_path = os.path.join(self.cache_path, 'config')
|
||||
config = ConfigParser(interpolation=None)
|
||||
config.read(config_path)
|
||||
config.set('cache', 'manifest', bin_to_hex(bytes(32)))
|
||||
with open(config_path, 'w') as fd:
|
||||
config.write(fd)
|
||||
|
||||
out = self.cmd('info', self.repository_location)
|
||||
assert 'Cache integrity data not available: old Borg version modified the cache.' in out
|
||||
|
||||
|
||||
class DiffArchiverTestCase(ArchiverTestCaseBase):
|
||||
def test_basic_functionality(self):
|
||||
# Initialize test folder
|
||||
|
|
|
|||
|
|
@ -1,21 +1,21 @@
|
|||
|
||||
import pytest
|
||||
|
||||
from ..crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError
|
||||
from ..crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError
|
||||
|
||||
|
||||
class TestReadIntegrityFile:
|
||||
def test_no_integrity(self, tmpdir):
|
||||
protected_file = tmpdir.join('file')
|
||||
protected_file.write('1234')
|
||||
assert IntegrityCheckedFile.read_integrity_file(str(protected_file), None) is None
|
||||
assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None) is None
|
||||
|
||||
def test_truncated_integrity(self, tmpdir):
|
||||
protected_file = tmpdir.join('file')
|
||||
protected_file.write('1234')
|
||||
tmpdir.join('file.integrity').write('')
|
||||
with pytest.raises(FileIntegrityError):
|
||||
IntegrityCheckedFile.read_integrity_file(str(protected_file), None)
|
||||
DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None)
|
||||
|
||||
def test_unknown_algorithm(self, tmpdir):
|
||||
class SomeHasher:
|
||||
|
|
@ -24,7 +24,7 @@ class TestReadIntegrityFile:
|
|||
protected_file = tmpdir.join('file')
|
||||
protected_file.write('1234')
|
||||
tmpdir.join('file.integrity').write('{"algorithm": "HMAC_SERIOUSHASH", "digests": "1234"}')
|
||||
assert IntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) is None
|
||||
assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) is None
|
||||
|
||||
@pytest.mark.parametrize('json', (
|
||||
'{"ALGORITHM": "HMAC_SERIOUSHASH", "digests": "1234"}',
|
||||
|
|
@ -38,7 +38,7 @@ class TestReadIntegrityFile:
|
|||
protected_file.write('1234')
|
||||
tmpdir.join('file.integrity').write(json)
|
||||
with pytest.raises(FileIntegrityError):
|
||||
IntegrityCheckedFile.read_integrity_file(str(protected_file), None)
|
||||
DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None)
|
||||
|
||||
def test_valid(self, tmpdir):
|
||||
class SomeHasher:
|
||||
|
|
@ -47,35 +47,35 @@ class TestReadIntegrityFile:
|
|||
protected_file = tmpdir.join('file')
|
||||
protected_file.write('1234')
|
||||
tmpdir.join('file.integrity').write('{"algorithm": "HMAC_FOO1", "digests": {"final": "1234"}}')
|
||||
assert IntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) == {'final': '1234'}
|
||||
assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) == {'final': '1234'}
|
||||
|
||||
|
||||
class TestIntegrityCheckedFile:
|
||||
class TestDetachedIntegrityCheckedFile:
|
||||
@pytest.fixture
|
||||
def integrity_protected_file(self, tmpdir):
|
||||
path = str(tmpdir.join('file'))
|
||||
with IntegrityCheckedFile(path, write=True) as fd:
|
||||
with DetachedIntegrityCheckedFile(path, write=True) as fd:
|
||||
fd.write(b'foo and bar')
|
||||
return path
|
||||
|
||||
def test_simple(self, tmpdir, integrity_protected_file):
|
||||
assert tmpdir.join('file').check(file=True)
|
||||
assert tmpdir.join('file.integrity').check(file=True)
|
||||
with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
|
||||
with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
|
||||
assert fd.read() == b'foo and bar'
|
||||
|
||||
def test_corrupted_file(self, integrity_protected_file):
|
||||
with open(integrity_protected_file, 'ab') as fd:
|
||||
fd.write(b' extra data')
|
||||
with pytest.raises(FileIntegrityError):
|
||||
with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
|
||||
with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
|
||||
assert fd.read() == b'foo and bar extra data'
|
||||
|
||||
def test_corrupted_file_partial_read(self, integrity_protected_file):
|
||||
with open(integrity_protected_file, 'ab') as fd:
|
||||
fd.write(b' extra data')
|
||||
with pytest.raises(FileIntegrityError):
|
||||
with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
|
||||
with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
|
||||
data = b'foo and bar'
|
||||
assert fd.read(len(data)) == data
|
||||
|
||||
|
|
@ -88,7 +88,7 @@ class TestIntegrityCheckedFile:
|
|||
tmpdir.join('file').move(new_path)
|
||||
tmpdir.join('file.integrity').move(new_path + '.integrity')
|
||||
with pytest.raises(FileIntegrityError):
|
||||
with IntegrityCheckedFile(str(new_path), write=False) as fd:
|
||||
with DetachedIntegrityCheckedFile(str(new_path), write=False) as fd:
|
||||
assert fd.read() == b'foo and bar'
|
||||
|
||||
def test_moved_file(self, tmpdir, integrity_protected_file):
|
||||
|
|
@ -96,27 +96,27 @@ class TestIntegrityCheckedFile:
|
|||
tmpdir.join('file').move(new_dir.join('file'))
|
||||
tmpdir.join('file.integrity').move(new_dir.join('file.integrity'))
|
||||
new_path = str(new_dir.join('file'))
|
||||
with IntegrityCheckedFile(new_path, write=False) as fd:
|
||||
with DetachedIntegrityCheckedFile(new_path, write=False) as fd:
|
||||
assert fd.read() == b'foo and bar'
|
||||
|
||||
def test_no_integrity(self, tmpdir, integrity_protected_file):
|
||||
tmpdir.join('file.integrity').remove()
|
||||
with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
|
||||
with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
|
||||
assert fd.read() == b'foo and bar'
|
||||
|
||||
|
||||
class TestIntegrityCheckedFileParts:
|
||||
class TestDetachedIntegrityCheckedFileParts:
|
||||
@pytest.fixture
|
||||
def integrity_protected_file(self, tmpdir):
|
||||
path = str(tmpdir.join('file'))
|
||||
with IntegrityCheckedFile(path, write=True) as fd:
|
||||
with DetachedIntegrityCheckedFile(path, write=True) as fd:
|
||||
fd.write(b'foo and bar')
|
||||
fd.hash_part('foopart')
|
||||
fd.write(b' other data')
|
||||
return path
|
||||
|
||||
def test_simple(self, integrity_protected_file):
|
||||
with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
|
||||
with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
|
||||
data1 = b'foo and bar'
|
||||
assert fd.read(len(data1)) == data1
|
||||
fd.hash_part('foopart')
|
||||
|
|
@ -127,7 +127,7 @@ class TestIntegrityCheckedFileParts:
|
|||
# Because some hash_part failed, the final digest will fail as well - again - even if we catch
|
||||
# the failing hash_part. This is intentional: (1) it makes the code simpler (2) it's a good fail-safe
|
||||
# against overly broad exception handling.
|
||||
with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
|
||||
with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
|
||||
data1 = b'foo and bar'
|
||||
assert fd.read(len(data1)) == data1
|
||||
with pytest.raises(FileIntegrityError):
|
||||
|
|
@ -140,7 +140,7 @@ class TestIntegrityCheckedFileParts:
|
|||
with open(integrity_protected_file, 'ab') as fd:
|
||||
fd.write(b'some extra stuff that does not belong')
|
||||
with pytest.raises(FileIntegrityError):
|
||||
with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
|
||||
with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
|
||||
data1 = b'foo and bar'
|
||||
try:
|
||||
assert fd.read(len(data1)) == data1
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import zlib
|
|||
|
||||
from ..hashindex import NSIndex, ChunkIndex
|
||||
from .. import hashindex
|
||||
from ..crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError
|
||||
from . import BaseTestCase
|
||||
|
||||
# Note: these tests are part of the self test, do not use or import py.test functionality here.
|
||||
|
|
@ -319,6 +320,27 @@ class HashIndexDataTestCase(BaseTestCase):
|
|||
assert idx1[H(3)] == (ChunkIndex.MAX_VALUE, 6, 7)
|
||||
|
||||
|
||||
class HashIndexIntegrityTestCase(HashIndexDataTestCase):
|
||||
def write_integrity_checked_index(self, tempdir):
|
||||
idx = self._deserialize_hashindex(self.HASHINDEX)
|
||||
file = os.path.join(tempdir, 'idx')
|
||||
with IntegrityCheckedFile(path=file, write=True) as fd:
|
||||
idx.write(fd)
|
||||
integrity_data = fd.integrity_data
|
||||
assert 'final' in integrity_data
|
||||
assert 'HashHeader' in integrity_data
|
||||
return file, integrity_data
|
||||
|
||||
def test_integrity_checked_file(self):
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
file, integrity_data = self.write_integrity_checked_index(tempdir)
|
||||
with open(file, 'r+b') as fd:
|
||||
fd.write(b'Foo')
|
||||
with self.assert_raises(FileIntegrityError):
|
||||
with IntegrityCheckedFile(path=file, write=False, integrity_data=integrity_data) as fd:
|
||||
ChunkIndex.read(fd)
|
||||
|
||||
|
||||
class NSIndexTestCase(BaseTestCase):
|
||||
def test_nsindex_segment_limit(self):
|
||||
idx = NSIndex()
|
||||
|
|
|
|||
Loading…
Reference in a new issue