From 9a856533ba16638edf027724e9c26fcbf2f3b630 Mon Sep 17 00:00:00 2001 From: Marian Beermann Date: Sun, 2 Jul 2017 21:45:34 +0200 Subject: [PATCH] fuse: versions view, linear numbering by archive time --- src/borg/_hashindex.c | 8 ++++++ src/borg/crypto/low_level.pyx | 21 +++++++++++++++- src/borg/fuse.py | 29 ++++++++++++++-------- src/borg/hashindex.pyx | 45 +++++++++++++++++++++++++++++++--- src/borg/helpers.py | 4 +-- src/borg/testsuite/archiver.py | 6 ++--- 6 files changed, 93 insertions(+), 20 deletions(-) diff --git a/src/borg/_hashindex.c b/src/borg/_hashindex.c index e62e8875d..763d0a250 100644 --- a/src/borg/_hashindex.c +++ b/src/borg/_hashindex.c @@ -695,3 +695,11 @@ hashindex_size(HashIndex *index) { return sizeof(HashHeader) + index->num_buckets * index->bucket_size; } + +/* + * Used by the FuseVersionsIndex. + */ +typedef struct { + uint32_t version; + char hash[16]; +} __attribute__((__packed__)) FuseVersionsElement; diff --git a/src/borg/crypto/low_level.pyx b/src/borg/crypto/low_level.pyx index 4351a4d27..a68cd820f 100644 --- a/src/borg/crypto/low_level.pyx +++ b/src/borg/crypto/low_level.pyx @@ -8,7 +8,7 @@ from libc.stdlib cimport malloc, free from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release from cpython.bytes cimport PyBytes_FromStringAndSize -API_VERSION = '1.1_01' +API_VERSION = '1.1_02' cdef extern from "../algorithms/blake2-libselect.h": @@ -252,6 +252,25 @@ def blake2b_256(key, data): return PyBytes_FromStringAndSize( &md[0], 32) +def blake2b_128(data): + cdef blake2b_state state + cdef unsigned char md[16] + cdef unsigned char *data_ptr = data + + if blake2b_init(&state, 16) == -1: + raise Exception('blake2b_init() failed') + + rc = blake2b_update(&state, data_ptr, len(data)) + if rc == -1: + raise Exception('blake2b_update() failed') + + rc = blake2b_final(&state, &md[0], 16) + if rc == -1: + raise Exception('blake2b_final() failed') + + return PyBytes_FromStringAndSize( &md[0], 16) + + def hkdf_hmac_sha512(ikm, salt, info, output_length): """ Compute HKDF-HMAC-SHA512 with input key material *ikm*, *salt* and *info* to produce *output_length* bytes. diff --git a/src/borg/fuse.py b/src/borg/fuse.py index c8d4be829..344247c21 100644 --- a/src/borg/fuse.py +++ b/src/borg/fuse.py @@ -9,7 +9,6 @@ import time from collections import defaultdict from signal import SIGINT from distutils.version import LooseVersion -from zlib import adler32 import llfuse import msgpack @@ -17,7 +16,9 @@ import msgpack from .logger import create_logger logger = create_logger() +from .crypto.low_level import blake2b_128 from .archive import Archive +from .hashindex import FuseVersionsIndex from .helpers import daemonize, hardlinkable, signal_handler, format_file_size from .item import Item from .lrucache import LRUCache @@ -240,13 +241,14 @@ class FuseOperations(llfuse.Operations): if self.args.location.archive: self.process_archive(self.args.location.archive) else: + self.versions_index = FuseVersionsIndex() archive_names = (x.name for x in self.manifest.archives.list_considering(self.args)) for archive_name in archive_names: if self.versions: # process archives immediately self.process_archive(archive_name) else: - # lazy load archives, create archive placeholder inode + # lazily load archives, create archive placeholder inode archive_inode = self._create_dir(parent=1) self.contents[1][os.fsencode(archive_name)] = archive_inode self.pending_archives[archive_inode] = archive_name @@ -339,12 +341,19 @@ class FuseOperations(llfuse.Operations): logger.debug('fuse: process_archive completed in %.1f s for archive %s', duration, archive.name) def process_leaf(self, name, item, parent, prefix, is_dir, item_inode): - def file_version(item): + def file_version(item, path): if 'chunks' in item: - ident = 0 - for chunkid, _, _ in item.chunks: - ident = adler32(chunkid, ident) - return ident + file_id = blake2b_128(path) + current_version, previous_id = self.versions_index.get(file_id, (0, None)) + + chunk_ids = [chunk_id for chunk_id, _, _ in item.chunks] + contents_id = blake2b_128(b''.join(chunk_ids)) + + if contents_id != previous_id: + current_version += 1 + self.versions_index[file_id] = current_version, contents_id + + return current_version def make_versioned_name(name, version, add_dir=False): if add_dir: @@ -353,16 +362,16 @@ class FuseOperations(llfuse.Operations): name += b'/' + path_fname[-1] # keep original extension at end to avoid confusing tools name, ext = os.path.splitext(name) - version_enc = os.fsencode('.%08x' % version) + version_enc = os.fsencode('.%05d' % version) return name + version_enc + ext if self.versions and not is_dir: parent = self.process_inner(name, parent) - version = file_version(item) + path = os.fsencode(item.path) + version = file_version(item, path) if version is not None: # regular file, with contents - maybe a hardlink master name = make_versioned_name(name, version) - path = os.fsencode(item.path) self.file_versions[path] = version path = item.path diff --git a/src/borg/hashindex.pyx b/src/borg/hashindex.pyx index 0d271ad65..f14eeea92 100644 --- a/src/borg/hashindex.pyx +++ b/src/borg/hashindex.pyx @@ -6,17 +6,22 @@ import os cimport cython from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t from libc.errno cimport errno +from libc.string cimport memcpy from cpython.exc cimport PyErr_SetFromErrnoWithFilename from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release -from cpython.bytes cimport PyBytes_FromStringAndSize +from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_CheckExact, PyBytes_GET_SIZE, PyBytes_AS_STRING -API_VERSION = '1.1_06' +API_VERSION = '1.1_07' cdef extern from "_hashindex.c": ctypedef struct HashIndex: pass + ctypedef struct FuseVersionsElement: + uint32_t version + char hash[16] + HashIndex *hashindex_read(object file_py, int permit_compact) except * HashIndex *hashindex_init(int capacity, int key_size, int value_size) void hashindex_free(HashIndex *index) @@ -74,11 +79,13 @@ cdef class IndexBase: cdef HashIndex *index cdef int key_size + _key_size = 32 + MAX_LOAD_FACTOR = HASH_MAX_LOAD MAX_VALUE = _MAX_VALUE - def __cinit__(self, capacity=0, path=None, key_size=32, permit_compact=False): - self.key_size = key_size + def __cinit__(self, capacity=0, path=None, permit_compact=False): + self.key_size = self._key_size if path: if isinstance(path, (str, bytes)): with open(path, 'rb') as fd: @@ -153,6 +160,36 @@ cdef class IndexBase: return hashindex_compact(self.index) +cdef class FuseVersionsIndex(IndexBase): + # 4 byte version + 16 byte file contents hash + value_size = 20 + _key_size = 16 + + def __getitem__(self, key): + cdef FuseVersionsElement *data + assert len(key) == self.key_size + data = hashindex_get(self.index, key) + if data == NULL: + raise KeyError(key) + return _le32toh(data.version), PyBytes_FromStringAndSize(data.hash, 16) + + def __setitem__(self, key, value): + cdef FuseVersionsElement data + assert len(key) == self.key_size + data.version = value[0] + assert data.version <= _MAX_VALUE, "maximum number of versions reached" + if not PyBytes_CheckExact(value[1]) or PyBytes_GET_SIZE(value[1]) != 16: + raise TypeError("Expected bytes of length 16 for second value") + memcpy(data.hash, PyBytes_AS_STRING(value[1]), 16) + data.version = _htole32(data.version) + if not hashindex_set(self.index, key, &data): + raise Exception('hashindex_set failed') + + def __contains__(self, key): + assert len(key) == self.key_size + return hashindex_get(self.index, key) != NULL + + cdef class NSIndex(IndexBase): value_size = 8 diff --git a/src/borg/helpers.py b/src/borg/helpers.py index 873dc0cf7..dee5cb641 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -131,13 +131,13 @@ class MandatoryFeatureUnsupported(Error): def check_extension_modules(): from . import platform, compress, item - if hashindex.API_VERSION != '1.1_06': + if hashindex.API_VERSION != '1.1_07': raise ExtensionModuleError if chunker.API_VERSION != '1.1_01': raise ExtensionModuleError if compress.API_VERSION != '1.1_03': raise ExtensionModuleError - if borg.crypto.low_level.API_VERSION != '1.1_01': + if borg.crypto.low_level.API_VERSION != '1.1_02': raise ExtensionModuleError if platform.API_VERSION != platform.OS_API_VERSION != '1.1_01': raise ExtensionModuleError diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 52f069596..e71a3b60c 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -2101,11 +2101,11 @@ class ArchiverTestCase(ArchiverTestCaseBase): with self.fuse_mount(self.repository_location, mountpoint, '-o', 'versions'): path = os.path.join(mountpoint, 'input', 'test') # filename shows up as directory ... files = os.listdir(path) - assert all(f.startswith('test.') for f in files) # ... with files test.xxxxxxxx in there + assert all(f.startswith('test.') for f in files) # ... with files test.xxxxx in there assert {b'first', b'second'} == {open(os.path.join(path, f), 'rb').read() for f in files} if are_hardlinks_supported(): - st1 = os.stat(os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00000000')) - st2 = os.stat(os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00000000')) + st1 = os.stat(os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001')) + st2 = os.stat(os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001')) assert st1.st_ino == st2.st_ino @unittest.skipUnless(has_llfuse, 'llfuse not installed')