From b294ceba678e4c3b5bd1fe31aa9de2034db85964 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Borgstr=C3=B6m?= Date: Sat, 30 Jul 2011 21:13:48 +0200 Subject: [PATCH] Switch to pure AES256 encryption and improved metadata storage --- darc/__init__.py | 10 --- darc/archive.py | 201 ++++++++++++++++++++++++--------------------- darc/archiver.py | 79 ++++++++---------- darc/cache.py | 71 +++++++++------- darc/hashindex.pyx | 56 +++++++++++++ darc/helpers.py | 11 --- darc/key.py | 160 ++++++++++++++++++++++++++++++++++++ darc/keychain.py | 189 ------------------------------------------ darc/oaep.py | 71 ---------------- darc/store.py | 2 +- 10 files changed, 401 insertions(+), 449 deletions(-) create mode 100644 darc/key.py delete mode 100644 darc/keychain.py delete mode 100644 darc/oaep.py diff --git a/darc/__init__.py b/darc/__init__.py index 9d705b9bd..712d79598 100644 --- a/darc/__init__.py +++ b/darc/__init__.py @@ -2,13 +2,3 @@ NS_CHUNK = 0 NS_ARCHIVE_METADATA = 1 -NS_ARCHIVE_CHUNKS = 2 -NS_ARCHIVE_ITEMS = 3 - -PACKET_ENCRYPT_READ = 2 ** 7 -PACKET_ENCRYPT_CREATE = 2 ** 6 -PACKET_CHUNK = 1 | PACKET_ENCRYPT_READ -PACKET_ARCHIVE_METADATA = 2 | PACKET_ENCRYPT_READ -PACKET_ARCHIVE_ITEMS = 3 | PACKET_ENCRYPT_READ -PACKET_ARCHIVE_CHUNKS = 1 | PACKET_ENCRYPT_CREATE - diff --git a/darc/archive.py b/darc/archive.py index 05f66397b..322675879 100644 --- a/darc/archive.py +++ b/darc/archive.py @@ -6,11 +6,10 @@ import os import socket import stat import sys -from itertools import izip +from os.path import dirname from xattr import xattr, XATTR_NOFOLLOW -from . import NS_ARCHIVE_METADATA, NS_ARCHIVE_ITEMS, NS_ARCHIVE_CHUNKS, NS_CHUNK, \ - PACKET_ARCHIVE_METADATA, PACKET_ARCHIVE_ITEMS, PACKET_ARCHIVE_CHUNKS, PACKET_CHUNK +from . import NS_ARCHIVE_METADATA, NS_CHUNK from ._speedups import chunkify from .helpers import uid2user, user2uid, gid2group, group2gid, IntegrityError @@ -26,22 +25,24 @@ class Archive(object): class DoesNotExist(Exception): pass - def __init__(self, store, keychain, name=None): - self.keychain = keychain + def __init__(self, store, key, name=None, cache=None): + self.key = key self.store = store - self.items = [] + self.cache = cache + self.items = '' + self.items_refs = [] + self.items_prefix = '' self.items_ids = [] self.hard_links = {} if name: - self.load(self.keychain.id_hash(name)) + self.load(self.key.archive_hash(name)) def load(self, id): self.id = id try: - kind, data, self.hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_METADATA, self.id)) + data, self.hash = self.key.decrypt(self.store.get(NS_ARCHIVE_METADATA, self.id)) except self.store.DoesNotExist: raise self.DoesNotExist - assert kind == PACKET_ARCHIVE_METADATA self.metadata = msgpack.unpackb(data) assert self.metadata['version'] == 1 @@ -51,80 +52,90 @@ class Archive(object): t, f = self.metadata['time'].split('.', 1) return datetime.strptime(t, '%Y-%m-%dT%H:%M:%S') + timedelta(seconds=float('.' + f)) - def get_chunks(self): - for id in self.metadata['chunks_ids']: - magic, data, hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_CHUNKS, id)) - assert magic == PACKET_ARCHIVE_CHUNKS - assert hash == id - chunks = msgpack.unpackb(data) - for chunk in chunks: - yield chunk - def get_items(self): - for id in self.metadata['items_ids']: - magic, data, items_hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_ITEMS, id)) - assert magic == PACKET_ARCHIVE_ITEMS - assert items_hash == id - items = msgpack.unpackb(data) - for item in items: + unpacker = msgpack.Unpacker() + for id, size, csize in self.metadata['items']: + data, items_hash = self.key.decrypt(self.store.get(NS_CHUNK, id)) + assert self.key.id_hash(data) == id + unpacker.feed(data) + for item in unpacker: yield item - def add_item(self, item): - self.items.append(item) - if len(self.items) > 100000: + def add_item(self, item, refs=None): + data = msgpack.packb(item) + prefix = dirname(item['path']) + if self.items_prefix and self.items_prefix != prefix: self.flush_items() + if refs: + self.items_refs += refs + self.items += data + self.items_prefix = prefix def flush_items(self): - data, hash = self.keychain.encrypt(PACKET_ARCHIVE_ITEMS, msgpack.packb(self.items)) - self.store.put(NS_ARCHIVE_ITEMS, hash, data) - self.items_ids.append(hash) - self.items = [] - - def save_chunks(self, cache): - chunks = [] - ids = [] - def flush(chunks): - data, hash = self.keychain.encrypt(PACKET_ARCHIVE_CHUNKS, msgpack.packb(chunks)) - self.store.put(NS_ARCHIVE_CHUNKS, hash, data) - ids.append(hash) - for id, (count, size) in cache.chunks.iteritems(): - if count > 1000000: - chunks.append((id, size)) - if len(chunks) > 100000: - flush(chunks) - chunks = [] - flush(chunks) - return ids + if not self.items: + return + id = self.key.id_hash(self.items) + if self.cache.seen_chunk(id): + self.items_ids.append(self.cache.chunk_incref(id)) + for id in self.items_refs: + self.cache.chunk_decref(id) + else: + self.items_ids.append(self.cache.add_chunk(id, self.items)) + self.items = '' + self.items_refs = [] + self.items_prefix = '' def save(self, name, cache): - self.id = self.keychain.id_hash(name) - chunks_ids = self.save_chunks(cache) + self.id = self.key.archive_hash(name) self.flush_items() metadata = { 'version': 1, 'name': name, - 'chunks_ids': chunks_ids, - 'items_ids': self.items_ids, + 'items': self.items_ids, 'cmdline': sys.argv, 'hostname': socket.gethostname(), 'username': getuser(), 'time': datetime.utcnow().isoformat(), } - data, self.hash = self.keychain.encrypt(PACKET_ARCHIVE_METADATA, msgpack.packb(metadata)) + data, self.hash = self.key.encrypt(msgpack.packb(metadata)) self.store.put(NS_ARCHIVE_METADATA, self.id, data) self.store.commit() cache.commit() - def stats(self, cache): - osize = csize = usize = 0 + def get_chunks(self): for item in self.get_items(): - if stat.S_ISREG(item['mode']) and not 'source' in item: - osize += item['size'] - for id, size in self.get_chunks(): - csize += size - if cache.seen_chunk(id) == 1: - usize += size - return osize, csize, usize + try: + for chunk in item['chunks']: + yield chunk + except KeyError: + pass + + def stats(self, cache): + # This function is a bit evil since it abuses the cache to calculate + # the stats. The cache transaction must be rolled back afterwards + unpacker = msgpack.Unpacker() + cache.begin_txn() + osize = zsize = usize = 0 + for id, size, csize in self.metadata['items']: + osize += size + zsize += csize + unique = self.cache.seen_chunk(id) == 1 + if unique: + usize += csize + data, items_hash = self.key.decrypt(self.store.get(NS_CHUNK, id)) + assert self.key.id_hash(data) == id + unpacker.feed(data) + for item in unpacker: + try: + for id, size, csize in item['chunks']: + osize += size + zsize += csize + if unique and self.cache.seen_chunk(id) == 1: + usize += csize + except KeyError: + pass + cache.rollback() + return osize, zsize, usize def extract_item(self, item, dest=None, start_cb=None): dest = dest or os.getcwdu() @@ -163,14 +174,13 @@ class Archive(object): if i==0: start_cb(item) assert not error - magic, data, hash = self.keychain.decrypt(chunk) - assert magic == PACKET_CHUNK - if self.keychain.id_hash(data) != id: + data, hash = self.key.decrypt(chunk) + if self.key.id_hash(data) != id: raise IntegrityError('chunk hash did not match') fd.write(data) if last: - self.restore_attrs(path, item) fd.close() + self.restore_attrs(path, item) fd = open(path, 'wb') n = len(item['chunks']) @@ -179,7 +189,7 @@ class Archive(object): self.restore_attrs(path, item) fd.close() else: - for i, id in enumerate(item['chunks']): + for i, (id, size, csize) in enumerate(item['chunks']): self.store.get(NS_CHUNK, id, callback=extract_cb, callback_data=(id, i, i==n-1)) else: @@ -206,16 +216,15 @@ class Archive(object): pass if not symlink: # FIXME: We should really call futimes here (c extension required) - os.utime(path, (item['atime'], item['mtime'])) + os.utime(path, (item['mtime'], item['mtime'])) def verify_file(self, item, start, result): def verify_chunk(chunk, error, (id, i, last)): if i == 0: start(item) assert not error - magic, data, hash = self.keychain.decrypt(chunk) - assert magic == PACKET_CHUNK - if self.keychain.id_hash(data) != id: + data, hash = self.key.decrypt(chunk) + if self.key.id_hash(data) != id: result(item, False) elif last: result(item, True) @@ -224,17 +233,24 @@ class Archive(object): start(item) result(item, True) else: - for i, id in enumerate(item['chunks']): + for i, (id, size, csize) in enumerate(item['chunks']): self.store.get(NS_CHUNK, id, callback=verify_chunk, callback_data=(id, i, i==n-1)) def delete(self, cache): - for id, size in self.get_chunks(): - cache.chunk_decref(id) + unpacker = msgpack.Unpacker() + for id, size, csize in self.metadata['items']: + if self.cache.seen_chunk(id) == 1: + data, items_hash = self.key.decrypt(self.store.get(NS_CHUNK, id)) + assert self.key.id_hash(data) == id + unpacker.feed(data) + for item in unpacker: + try: + for chunk_id, size, csize in item['chunks']: + self.cache.chunk_decref(chunk_id) + except KeyError: + pass + self.cache.chunk_decref(id) self.store.delete(NS_ARCHIVE_METADATA, self.id) - for id in self.metadata['chunks_ids']: - self.store.delete(NS_ARCHIVE_CHUNKS, id) - for id in self.metadata['items_ids']: - self.store.delete(NS_ARCHIVE_ITEMS, id) self.store.commit() cache.commit() @@ -243,7 +259,7 @@ class Archive(object): 'mode': st.st_mode, 'uid': st.st_uid, 'user': uid2user(st.st_uid), 'gid': st.st_gid, 'group': gid2group(st.st_gid), - 'atime': st.st_atime, 'mtime': st.st_mtime, + 'mtime': st.st_mtime, } try: xa = xattr(path, XATTR_NOFOLLOW) @@ -287,34 +303,33 @@ class Archive(object): return else: self.hard_links[st.st_ino, st.st_dev] = safe_path - path_hash = self.keychain.id_hash(path.encode('utf-8')) - ids, size = cache.file_known_and_unchanged(path_hash, st) + path_hash = self.key.id_hash(path.encode('utf-8')) + ids = cache.file_known_and_unchanged(path_hash, st) + chunks = None if ids is not None: # Make sure all ids are available for id in ids: if not cache.seen_chunk(id): - ids = None break else: - for id in ids: - cache.chunk_incref(id) + chunks = [cache.chunk_incref(id) for id in ids] # Only chunkify the file if needed - if ids is None: + if chunks is None: with open(path, 'rb') as fd: - size = 0 - ids = [] + chunks = [] for chunk in chunkify(fd, CHUNK_SIZE, WINDOW_SIZE, - self.keychain.get_chunkify_seed()): - ids.append(cache.add_chunk(self.keychain.id_hash(chunk), chunk)) - size += len(chunk) + self.key.chunk_seed): + chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk)) + ids = [id for id, _, _ in chunks] cache.memorize_file(path_hash, st, ids) - item = {'path': safe_path, 'chunks': ids, 'size': size} + item = {'path': safe_path, 'chunks': chunks} item.update(self.stat_attrs(st, path)) - self.add_item(item) + self.add_item(item, ids) @staticmethod - def list_archives(store, keychain): + def list_archives(store, key): for id in list(store.list(NS_ARCHIVE_METADATA)): - archive = Archive(store, keychain) + archive = Archive(store, key) archive.load(id) yield archive + diff --git a/darc/archiver.py b/darc/archiver.py index 16f418413..d88bc9f6a 100644 --- a/darc/archiver.py +++ b/darc/archiver.py @@ -8,7 +8,7 @@ import sys from .archive import Archive from .store import Store from .cache import Cache -from .keychain import Keychain +from .key import Key from .helpers import location_validator, format_file_size, format_time,\ format_file_mode, IncludePattern, ExcludePattern, exclude_path, to_localtime from .remote import StoreServer, RemoteStore @@ -44,18 +44,22 @@ class Archiver(object): def do_serve(self, args): return StoreServer().serve() + def do_init(self, args): + store = self.open_store(args.store, create=True) + key = Key.create(store) + def do_create(self, args): - store = self.open_store(args.archive, create=True) - keychain = Keychain(args.keychain) + store = self.open_store(args.archive) + key = Key(store) try: - Archive(store, keychain, args.archive.archive) + Archive(store, key, args.archive.archive) except Archive.DoesNotExist: pass else: self.print_error('Archive already exists') return self.exit_code - archive = Archive(store, keychain) - cache = Cache(store, keychain) + cache = Cache(store, key) + archive = Archive(store, key, cache=cache) # Add darc cache dir to inode_skip list skip_inodes = set() try: @@ -112,8 +116,8 @@ class Archiver(object): def start_cb(item): self.print_verbose(item['path'].decode('utf-8')) store = self.open_store(args.archive) - keychain = Keychain(args.keychain) - archive = Archive(store, keychain, args.archive.archive) + key = Key(store) + archive = Archive(store, key, args.archive.archive) dirs = [] for item in archive.get_items(): if exclude_path(item['path'], args.patterns): @@ -131,22 +135,24 @@ class Archiver(object): def do_delete(self, args): store = self.open_store(args.archive) - keychain = Keychain(args.keychain) - archive = Archive(store, keychain, args.archive.archive) - cache = Cache(store, keychain) + key = Key(store) + cache = Cache(store, key) + archive = Archive(store, key, args.archive.archive, cache=cache) archive.delete(cache) return self.exit_code def do_list(self, args): store = self.open_store(args.src) - keychain = Keychain(args.keychain) + key = Key(store) if args.src.archive: tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 010: '-', 012: 'l', 014: 's'} - archive = Archive(store, keychain, args.src.archive) + archive = Archive(store, key, args.src.archive) for item in archive.get_items(): type = tmap.get(item['mode'] / 4096, '?') mode = format_file_mode(item['mode']) - size = item.get('size', 0) + size = 0 + if type == '-': + size = sum(size for _, size, _ in item['chunks']) mtime = format_time(datetime.fromtimestamp(item['mtime'])) if 'source' in item: if type == 'l': @@ -160,14 +166,14 @@ class Archiver(object): item['group'], size, mtime, item['path'], extra) else: - for archive in sorted(Archive.list_archives(store, keychain), key=attrgetter('ts')): + for archive in sorted(Archive.list_archives(store, key), key=attrgetter('ts')): print '%-20s %s' % (archive.metadata['name'], to_localtime(archive.ts).strftime('%c')) return self.exit_code def do_verify(self, args): store = self.open_store(args.archive) - keychain = Keychain(args.keychain) - archive = Archive(store, keychain, args.archive.archive) + key = Key(store) + archive = Archive(store, key, args.archive.archive) def start_cb(item): self.print_verbose('%s ...', item['path'].decode('utf-8'), newline=False) def result_cb(item, success): @@ -187,9 +193,9 @@ class Archiver(object): def do_info(self, args): store = self.open_store(args.archive) - keychain = Keychain(args.keychain) - archive = Archive(store, keychain, args.archive.archive) - cache = Cache(store, keychain) + key = Key(store) + cache = Cache(store, key) + archive = Archive(store, key, args.archive.archive, cache=cache) osize, csize, usize = archive.stats(cache) print 'Name:', archive.metadata['name'] print 'Hostname:', archive.metadata['hostname'] @@ -201,45 +207,28 @@ class Archiver(object): print 'Unique data:', format_file_size(usize) return self.exit_code - def do_init_keychain(self, args): - return Keychain.generate(args.keychain) - - def do_export_restricted(self, args): - keychain = Keychain(args.keychain) - keychain.restrict(args.output) - return self.exit_code - - def do_keychain_chpass(self, args): - return Keychain(args.keychain).chpass() - def run(self, args=None): dot_path = os.path.join(os.path.expanduser('~'), '.darc') if not os.path.exists(dot_path): os.mkdir(dot_path) - default_keychain = os.path.join(os.path.expanduser('~'), - '.darc', 'keychain') + os.mkdir(os.path.join(dot_path, 'keys')) + os.mkdir(os.path.join(dot_path, 'cache')) parser = argparse.ArgumentParser(description='DARC - Deduplicating Archiver') - parser.add_argument('-k', '--keychain', dest='keychain', type=str, - default=default_keychain, - help='Keychain to use') parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', default=False, help='Verbose output') - subparsers = parser.add_subparsers(title='Available subcommands') - subparser = subparsers.add_parser('init-keychain') - subparser.set_defaults(func=self.do_init_keychain) - subparser = subparsers.add_parser('export-restricted') - subparser.add_argument('output', metavar='OUTPUT', type=str, - help='Keychain to create') - subparser.set_defaults(func=self.do_export_restricted) - subparser = subparsers.add_parser('change-password') - subparser.set_defaults(func=self.do_keychain_chpass) subparser = subparsers.add_parser('serve') subparser.set_defaults(func=self.do_serve) + subparser = subparsers.add_parser('init') + subparser.set_defaults(func=self.do_init) + subparser.add_argument('store', metavar='ARCHIVE', + type=location_validator(archive=False), + help='Store to create') + subparser = subparsers.add_parser('create') subparser.set_defaults(func=self.do_create) subparser.add_argument('-i', '--include', dest='patterns', diff --git a/darc/cache.py b/darc/cache.py index cd28dbb3b..284eb32a0 100644 --- a/darc/cache.py +++ b/darc/cache.py @@ -5,19 +5,19 @@ import msgpack import os import shutil -from . import NS_ARCHIVE_CHUNKS, NS_CHUNK, PACKET_ARCHIVE_CHUNKS, PACKET_CHUNK +from . import NS_CHUNK, NS_ARCHIVE_METADATA from .helpers import error_callback -from .hashindex import NSIndex +from .hashindex import ChunkIndex class Cache(object): """Client Side cache """ - def __init__(self, store, keychain): + def __init__(self, store, key): self.txn_active = False self.store = store - self.keychain = keychain + self.key = key self.path = os.path.join(Cache.cache_dir_path(), self.store.id.encode('hex')) if not os.path.exists(self.path): self.create() @@ -25,6 +25,7 @@ class Cache(object): assert self.id == store.id if self.tid != store.tid: self.sync() + self.commit() @staticmethod def cache_dir_path(): @@ -44,7 +45,7 @@ class Cache(object): config.set('cache', 'tid', '0') with open(os.path.join(self.path, 'config'), 'wb') as fd: config.write(fd) - NSIndex.create(os.path.join(self.path, 'chunks')) + ChunkIndex.create(os.path.join(self.path, 'chunks')) with open(os.path.join(self.path, 'files'), 'wb') as fd: pass # empty file @@ -60,7 +61,7 @@ class Cache(object): raise Exception('%s Does not look like a darc cache') self.id = self.config.get('cache', 'store_id').decode('hex') self.tid = self.config.getint('cache', 'tid') - self.chunks = NSIndex(os.path.join(self.path, 'chunks')) + self.chunks = ChunkIndex(os.path.join(self.path, 'chunks')) self.files = None def _read_files(self): @@ -96,9 +97,6 @@ class Cache(object): with open(os.path.join(self.path, 'files'), 'wb') as fd: for item in self.files.iteritems(): msgpack.pack(item, fd) - for id, (count, size) in self.chunks.iteritems(): - if count > 1000000: - self.chunks[id] = count - 1000000, size self.config.set('cache', 'tid', self.store.tid) with open(os.path.join(self.path, 'config'), 'w') as fd: self.config.write(fd) @@ -129,48 +127,63 @@ class Cache(object): self.begin_txn() print 'Initializing cache...' self.chunks.clear() - for id in self.store.list(NS_ARCHIVE_CHUNKS): - magic, data, hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_CHUNKS, id)) - assert magic == PACKET_ARCHIVE_CHUNKS - chunks = msgpack.unpackb(data) - for id, size in chunks: + unpacker = msgpack.Unpacker() + for id in self.store.list(NS_ARCHIVE_METADATA): + data, hash = self.key.decrypt(self.store.get(NS_ARCHIVE_METADATA, id)) + archive = msgpack.unpackb(data) + print 'Analyzing archive:', archive['name'] + for id, size, csize in archive['items']: + data, hash = self.key.decrypt(self.store.get(NS_CHUNK, id)) + assert self.key.id_hash(data) == id try: - count, size = self.chunks[id] - self.chunks[id] = count + 1, size + count, size, csize = self.chunks[id] + self.chunks[id] = count + 1, size, csize except KeyError: - self.chunks[id] = 1, size + self.chunks[id] = 1, size, csize + unpacker.feed(data) + for item in unpacker: + try: + for id, size, csize in item['chunks']: + try: + count, size, csize = self.chunks[id] + self.chunks[id] = count + 1, size, csize + except KeyError: + self.chunks[id] = 1, size, csize + pass + except KeyError: + pass def add_chunk(self, id, data): if not self.txn_active: self.begin_txn() if self.seen_chunk(id): return self.chunk_incref(id) - data, hash = self.keychain.encrypt(PACKET_CHUNK, data) + size = len(data) + data, hash = self.key.encrypt(data) csize = len(data) self.store.put(NS_CHUNK, id, data, callback=error_callback) - self.chunks[id] = (1000001, csize) - return id + self.chunks[id] = (1, size, csize) + return id, size, csize def seen_chunk(self, id): - return self.chunks.get(id, (0, 0))[0] + return self.chunks.get(id, (0, 0, 0))[0] def chunk_incref(self, id): if not self.txn_active: self.begin_txn() - count, size = self.chunks[id] - if count < 1000000: - self.chunks[id] = (count + 1000001, size) - return id + count, size, csize = self.chunks[id] + self.chunks[id] = (count + 1, size, csize) + return id, size, csize def chunk_decref(self, id): if not self.txn_active: self.begin_txn() - count, size = self.chunks[id] + count, size, csize = self.chunks[id] if count == 1: del self.chunks[id] self.store.delete(NS_CHUNK, id, callback=error_callback) else: - self.chunks[id] = (count - 1, size) + self.chunks[id] = (count - 1, size, csize) def file_known_and_unchanged(self, path_hash, st): if self.files is None: @@ -180,9 +193,9 @@ class Cache(object): and entry[2] == st.st_size and entry[1] == st.st_ino): # reset entry age self.files[path_hash] = (0,) + entry[1:] - return entry[4], entry[2] + return entry[4] else: - return None, 0 + return None def memorize_file(self, path_hash, st, ids): # Entry: Age, inode, size, mtime, chunk ids diff --git a/darc/hashindex.pyx b/darc/hashindex.pyx index 00e4b7aae..d5614d516 100644 --- a/darc/hashindex.pyx +++ b/darc/hashindex.pyx @@ -113,6 +113,62 @@ cdef class NSKeyIterator: return self.key[:32], (value[0], value[1]) +cdef class ChunkIndex(IndexBase): + + @classmethod + def create(cls, path, capacity=16): + index = hashindex_create(path, capacity, 32, 12) + hashindex_close(index) + return cls(path) + + def __getitem__(self, key): + assert len(key) == 32 + data = hashindex_get(self.index, key) + if not data: + raise KeyError + return data[0], data[1], data[2] + + def __delitem__(self, key): + assert len(key) == 32 + hashindex_delete(self.index, key) + + def __setitem__(self, key, value): + assert len(key) == 32 + cdef int[3] data + data[0] = value[0] + data[1] = value[1] + data[2] = value[2] + hashindex_set(self.index, key, data) + + def __contains__(self, key): + assert len(key) == 32 + data = hashindex_get(self.index, key) + return data != NULL + + def iteritems(self, marker=None, limit=0): + iter = ChunkKeyIterator() + iter.index = self.index + return iter + + +cdef class ChunkKeyIterator: + cdef HashIndex *index + cdef char *key + + def __cinit__(self): + self.key = NULL + + def __iter__(self): + return self + + def __next__(self): + self.key = hashindex_next_key(self.index, self.key) + if not self.key: + raise StopIteration + cdef int *value = (self.key + 32) + return self.key[:32], (value[0], value[1], value[2]) + + cdef class BandIndex(IndexBase): @classmethod diff --git a/darc/helpers.py b/darc/helpers.py index f7008ac3e..fa299f5f4 100644 --- a/darc/helpers.py +++ b/darc/helpers.py @@ -70,17 +70,6 @@ def decode_long(bytes): return v + (b << base) -def zero_pad(data, length): - """Make sure data is `length` bytes long by prepending zero bytes - - >>> zero_pad('foo', 5) - '\\x00\\x00foo' - >>> zero_pad('foo', 3) - 'foo' - """ - return '\0' * (length - len(data)) + data - - def exclude_path(path, patterns): """Used by create and extract sub-commands to determine if an item should be processed or not diff --git a/darc/key.py b/darc/key.py new file mode 100644 index 000000000..cbc803882 --- /dev/null +++ b/darc/key.py @@ -0,0 +1,160 @@ +from __future__ import with_statement +from getpass import getpass +import hashlib +import os +import msgpack +import zlib + +from pbkdf2 import pbkdf2 +from Crypto.Cipher import AES +from Crypto.Hash import SHA256, HMAC +from Crypto.Util import Counter +from Crypto.Util.number import bytes_to_long, long_to_bytes +from Crypto.Random import get_random_bytes + +from .helpers import IntegrityError + + +class Key(object): + FILE_ID = 'DARC KEY' + + def __init__(self, store=None): + if store: + self.open(store) + + def open(self, store): + path = os.path.join(os.path.expanduser('~'), + '.darc', 'keys', store.id.encode('hex')) + with open(path, 'rb') as fd: + lines = fd.readlines() + if not lines[0].startswith(self.FILE_ID) != self.FILE_ID: + raise ValueError('Not a DARC key file') + self.store_id = lines[0][len(self.FILE_ID):].strip().decode('hex') + cdata = (''.join(lines[1:])).decode('base64') + self.password = '' + data = self.decrypt_key_file(cdata, '') + while not data: + self.password = getpass('Key password: ') + if not self.password: + raise Exception('Key decryption failed') + data = self.decrypt_key_file(cdata, self.password) + if not data: + print 'Incorrect password' + key = msgpack.unpackb(data) + assert key['version'] == 1 + self.store_id = key['store_id'] + self.enc_key = key['enc_key'] + self.enc_hmac_key = key['enc_hmac_key'] + self.id_key = key['id_key'] + self.archive_key = key['archive_key'] + self.chunk_seed = key['chunk_seed'] + self.counter = Counter.new(128, initial_value=bytes_to_long(os.urandom(16)), allow_wraparound=True) + + def encrypt_key_file(self, data, password): + salt = get_random_bytes(32) + iterations = 2000 + key = pbkdf2(password, salt, 32, iterations, hashlib.sha256) + hash = HMAC.new(key, data, SHA256).digest() + cdata = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).encrypt(data) + d = { + 'version': 1, + 'salt': salt, + 'iterations': iterations, + 'algorithm': 'SHA256', + 'hash': hash, + 'data': cdata, + } + return msgpack.packb(d) + + def decrypt_key_file(self, data, password): + d = msgpack.unpackb(data) + assert d['version'] == 1 + assert d['algorithm'] == 'SHA256' + key = pbkdf2(password, d['salt'], 32, d['iterations'], hashlib.sha256) + data = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).decrypt(d['data']) + if HMAC.new(key, data, SHA256).digest() != d['hash']: + return None + return data + + def save(self, path, password): + key = { + 'version': 1, + 'store_id': self.store_id, + 'enc_key': self.enc_key, + 'enc_hmac_key': self.enc_hmac_key, + 'id_key': self.enc_key, + 'archive_key': self.enc_key, + 'chunk_seed': self.chunk_seed, + } + data = self.encrypt_key_file(msgpack.packb(key), password) + with open(path, 'wb') as fd: + fd.write('%s %s\n' % (self.FILE_ID, self.store_id.encode('hex'))) + fd.write(data.encode('base64')) + print 'Key chain "%s" created' % path + + def chpass(self): + password, password2 = 1, 2 + while password != password2: + password = getpass('New password: ') + password2 = getpass('New password again: ') + if password != password2: + print 'Passwords do not match' + self.save(self.path, password) + return 0 + + @staticmethod + def create(store): + path = os.path.join(os.path.expanduser('~'), + '.darc', 'keys', store.id.encode('hex')) + if os.path.exists(path): + print '%s already exists' % path + return 1 + password, password2 = 1, 2 + while password != password2: + password = getpass('Keychain password: ') + password2 = getpass('Keychain password again: ') + if password != password2: + print 'Passwords do not match' + key = Key() + key.store_id = store.id + # Chunk AES256 encryption key + key.enc_key = get_random_bytes(32) + # Chunk encryption HMAC key + key.enc_hmac_key = get_random_bytes(32) + # Chunk id HMAC key + key.id_key = get_random_bytes(32) + # Archive name HMAC key + key.archive_key = get_random_bytes(32) + # Chunkifier seed + key.chunk_seed = bytes_to_long(get_random_bytes(4)) & 0x7fffffff + key.save(path, password) + return 0 + + def id_hash(self, data): + """Return HMAC hash using the "id" HMAC key + """ + return HMAC.new(self.id_key, data, SHA256).digest() + + def archive_hash(self, data): + """Return HMAC hash using the "archive" HMAC key + """ + return HMAC.new(self.archive_key, data, SHA256).digest() + + def encrypt(self, data): + data = zlib.compress(data) + nonce = long_to_bytes(self.counter.next_value(), 16) + data = ''.join((nonce, AES.new(self.enc_key, AES.MODE_CTR, '', + counter=self.counter).encrypt(data))) + hash = HMAC.new(self.enc_hmac_key, data, SHA256).digest() + return ''.join(('\0', hash, data)), hash + + def decrypt(self, data): + assert data[0] == '\0' + hash = data[1:33] + if HMAC.new(self.enc_hmac_key, data[33:], SHA256).digest() != hash: + raise IntegrityError('Encryption integrity error') + nonce = bytes_to_long(data[33:49]) + counter = Counter.new(128, initial_value=nonce, allow_wraparound=True) + data = AES.new(self.enc_key, AES.MODE_CTR, counter=counter).decrypt(data[49:]) + return zlib.decompress(data), hash + diff --git a/darc/keychain.py b/darc/keychain.py deleted file mode 100644 index c103226e7..000000000 --- a/darc/keychain.py +++ /dev/null @@ -1,189 +0,0 @@ -from __future__ import with_statement -from getpass import getpass -import hashlib -import os -import msgpack -import zlib - -from pbkdf2 import pbkdf2 -from Crypto.Cipher import AES -from Crypto.Hash import SHA256, HMAC -from Crypto.PublicKey import RSA -from Crypto.Util import Counter -from Crypto.Util.number import bytes_to_long, long_to_bytes - -from . import PACKET_ENCRYPT_READ, PACKET_ENCRYPT_CREATE -from .helpers import IntegrityError, zero_pad -from .oaep import OAEP - - -class Keychain(object): - FILE_ID = 'DARC KEYCHAIN' - - CREATE = '\1' - READ = '\2' - - def __init__(self, path=None): - self._key_cache = {} - self.read_key = os.urandom(32) - self.create_key = os.urandom(32) - self.counter = Counter.new(64, prefix='\0' * 8) - self.aes_id = self.rsa_read = self.rsa_create = None - self.path = path - if path: - self.open(path) - - def get_chunkify_seed(self): - return bytes_to_long(self.aes_id[:4]) & 0x7fffffff - - def open(self, path): - print 'Opening keychain "%s"' % path - with open(path, 'rb') as fd: - if fd.read(len(self.FILE_ID)) != self.FILE_ID: - raise ValueError('Not a keychain') - cdata = fd.read() - self.password = '' - data = self.decrypt_keychain(cdata, '') - while not data: - self.password = getpass('Keychain password: ') - if not self.password: - raise Exception('Keychain decryption failed') - data = self.decrypt_keychain(cdata, self.password) - if not data: - print 'Incorrect password' - chain = msgpack.unpackb(data) - assert chain['version'] == 1 - self.aes_id = chain['aes_id'] - self.rsa_read = RSA.importKey(chain['rsa_read']) - self.rsa_create = RSA.importKey(chain['rsa_create']) - self.read_encrypted = OAEP(256, hash=SHA256).encode(self.read_key, os.urandom(32)) - self.read_encrypted = zero_pad(self.rsa_read.encrypt(self.read_encrypted, '')[0], 256) - self.create_encrypted = OAEP(256, hash=SHA256).encode(self.create_key, os.urandom(32)) - self.create_encrypted = zero_pad(self.rsa_create.encrypt(self.create_encrypted, '')[0], 256) - - def encrypt_keychain(self, data, password): - salt = os.urandom(32) - iterations = 2000 - key = pbkdf2(password, salt, 32, iterations, hashlib.sha256) - hash = HMAC.new(key, data, SHA256).digest() - cdata = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).encrypt(data) - d = { - 'version': 1, - 'salt': salt, - 'iterations': iterations, - 'algorithm': 'SHA256', - 'hash': hash, - 'data': cdata, - } - return msgpack.packb(d) - - def decrypt_keychain(self, data, password): - d = msgpack.unpackb(data) - assert d['version'] == 1 - assert d['algorithm'] == 'SHA256' - key = pbkdf2(password, d['salt'], 32, d['iterations'], hashlib.sha256) - data = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).decrypt(d['data']) - if HMAC.new(key, data, SHA256).digest() != d['hash']: - return None - return data - - def save(self, path, password): - chain = { - 'version': 1, - 'aes_id': self.aes_id, - 'rsa_read': self.rsa_read.exportKey('PEM'), - 'rsa_create': self.rsa_create.exportKey('PEM'), - } - data = self.encrypt_keychain(msgpack.packb(chain), password) - with open(path, 'wb') as fd: - fd.write(self.FILE_ID) - fd.write(data) - print 'Key chain "%s" saved' % path - - def restrict(self, path): - if os.path.exists(path): - print '%s already exists' % path - return 1 - self.rsa_read = self.rsa_read.publickey() - self.save(path, self.password) - return 0 - - def chpass(self): - password, password2 = 1, 2 - while password != password2: - password = getpass('New password: ') - password2 = getpass('New password again: ') - if password != password2: - print 'Passwords do not match' - self.save(self.path, password) - return 0 - - @staticmethod - def generate(path): - if os.path.exists(path): - print '%s already exists' % path - return 1 - password, password2 = 1, 2 - while password != password2: - password = getpass('Keychain password: ') - password2 = getpass('Keychain password again: ') - if password != password2: - print 'Passwords do not match' - chain = Keychain() - print 'Generating keychain' - chain.aes_id = os.urandom(32) - chain.rsa_read = RSA.generate(2048) - chain.rsa_create = RSA.generate(2048) - chain.save(path, password) - return 0 - - def id_hash(self, data): - """Return HMAC hash using the "id" AES key - """ - return HMAC.new(self.aes_id, data, SHA256).digest() - - def encrypt(self, magic, data): - """Helper function used by `encrypt_read` and `encrypt_create` - """ - data = zlib.compress(data) - nonce = long_to_bytes(self.counter.next_value(), 8) - if magic & PACKET_ENCRYPT_READ: - data = ''.join((nonce, self.read_encrypted, - AES.new(self.read_key, AES.MODE_CTR, '', - counter=self.counter).encrypt(data))) - elif magic & PACKET_ENCRYPT_CREATE: - data = ''.join((nonce, self.create_encrypted, - AES.new(self.create_key, AES.MODE_CTR, '', - counter=self.counter).encrypt(data))) - hash = self.id_hash(data) - return ''.join((chr(magic), hash, data)), hash - - def _decrypt_key(self, data, rsa_key): - """Helper function used by `decrypt` - """ - try: - return self._key_cache[data] - except KeyError: - self._key_cache[data] = OAEP(256, hash=SHA256).decode(rsa_key.decrypt(data)) - return self._key_cache[data] - - def decrypt(self, data): - """Decrypt `data` previously encrypted by `encrypt_create` or `encrypt_read` - """ - magic = ord(data[0]) - hash = data[1:33] - if self.id_hash(data[33:]) != hash: - raise IntegrityError('Encryption integrity error') - nonce = bytes_to_long(data[33:41]) - counter = Counter.new(64, prefix='\0' * 8, initial_value=nonce) - if magic & PACKET_ENCRYPT_READ: - key = self._decrypt_key(data[41:297], self.rsa_read) - elif magic & PACKET_ENCRYPT_CREATE: - key = self._decrypt_key(data[41:297], self.rsa_create) - else: - raise Exception('Unknown pack magic %d found' % magic) - data = AES.new(key, AES.MODE_CTR, counter=counter).decrypt(data[297:]) - return magic, zlib.decompress(data), hash - - - diff --git a/darc/oaep.py b/darc/oaep.py deleted file mode 100644 index 069e3de33..000000000 --- a/darc/oaep.py +++ /dev/null @@ -1,71 +0,0 @@ -from Crypto.Util.number import long_to_bytes -from Crypto.Hash import SHA - -from .helpers import IntegrityError - -def _xor_bytes(a, b): - return ''.join(chr(ord(x[0]) ^ ord(x[1])) for x in zip(a, b)) - - -def MGF1(seed, mask_len, hash=SHA): - """MGF1 is a Mask Generation Function based on hash function - """ - T = ''.join(hash.new(seed + long_to_bytes(c, 4)).digest() - for c in range(1 + mask_len / hash.digest_size)) - return T[:mask_len] - - -class OAEP(object): - """Optimal Asymmetric Encryption Padding - """ - def __init__(self, k, hash=SHA, MGF=MGF1): - self.k = k - self.hash = hash - self.MGF = MGF - - def encode(self, msg, seed, label=''): - # FIXME: length checks - if len(msg) > self.k - 2 * self.hash.digest_size - 2: - raise ValueError('message too long') - label_hash = self.hash.new(label).digest() - padding = '\0' * (self.k - len(msg) - 2 * self.hash.digest_size - 2) - datablock = '%s%s\1%s' % (label_hash, padding, msg) - datablock_mask = self.MGF(seed, self.k - self.hash.digest_size - 1, self.hash) - masked_db = _xor_bytes(datablock, datablock_mask) - seed_mask = self.MGF(masked_db, self.hash.digest_size, self.hash) - masked_seed = _xor_bytes(seed, seed_mask) - return '\0%s%s' % (masked_seed, masked_db) - - def decode(self, ciphertext, label=''): - if len(ciphertext) < self.k: - ciphertext = ('\0' * (self.k - len(ciphertext))) + ciphertext - label_hash = self.hash.new(label).digest() - masked_seed = ciphertext[1:self.hash.digest_size + 1] - masked_db = ciphertext[-(self.k - self.hash.digest_size - 1):] - seed_mask = self.MGF(masked_db, self.hash.digest_size, self.hash) - seed = _xor_bytes(masked_seed, seed_mask) - datablock_mask = self.MGF(seed, self.k - self.hash.digest_size - 1, self.hash) - datablock = _xor_bytes(masked_db, datablock_mask) - label_hash2 = datablock[:self.hash.digest_size] - data = datablock[self.hash.digest_size:].lstrip('\0') - if (ciphertext[0] != '\0' or - label_hash != label_hash2 or - data[0] != '\1'): - raise IntegrityError('decryption error') - return data[1:] - - -def test(): - from Crypto.Hash import SHA256 - import os - import random - oaep = OAEP(256, SHA256) - for x in range(1000): - M = os.urandom(random.randint(0, 100)) - EM = oaep.encode(M, os.urandom(32)) - assert len(EM) == oaep.k - assert oaep.decode(EM) == M - -if __name__ == '__main__': - test() - diff --git a/darc/store.py b/darc/store.py index dcc9ae92d..296c512e8 100644 --- a/darc/store.py +++ b/darc/store.py @@ -32,7 +32,7 @@ class Store(object): def __init__(self, path, create=False): self.txn_active = False - if not os.path.exists(path) and create: + if create: self.create(path) self.open(path)