Port to Python 3.2+

This commit is contained in:
Jonas Borgström 2013-06-03 13:45:48 +02:00
parent bda6bc47d7
commit 1fdc5eabc6
12 changed files with 356 additions and 340 deletions

View file

@ -122,14 +122,27 @@ chunker_fill(Chunker *c)
if(!data) {
return 0;
}
int n = PyString_Size(data);
memcpy(c->data + c->position + c->remaining, PyString_AsString(data), n);
int n = PyBytes_Size(data);
memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), n);
c->remaining += n;
c->bytes_read += n;
Py_DECREF(data);
return 1;
}
PyObject *
PyBuffer_FromMemory(void *data, Py_ssize_t len)
{
Py_buffer buffer;
PyObject *mv;
PyBuffer_FillInfo(&buffer, NULL, data, len, 1, PyBUF_CONTIG_RO);
mv = PyMemoryView_FromBuffer(&buffer);
PyBuffer_Release(&buffer);
return mv;
}
static PyObject *
chunker_process(Chunker *c)
{
@ -186,4 +199,4 @@ chunker_process(Chunker *c)
c->bytes_yielded += n;
return PyBuffer_FromMemory(c->data + old_last, n);
}
}

View file

@ -1,27 +1,26 @@
from __future__ import with_statement
from datetime import datetime, timedelta
from getpass import getuser
from itertools import izip_longest
from itertools import zip_longest
import msgpack
import os
import socket
import stat
import sys
import time
from cStringIO import StringIO
from xattr import xattr, XATTR_NOFOLLOW
from io import BytesIO
import xattr
from .chunker import chunkify
from .helpers import uid2user, user2uid, gid2group, group2gid, \
encode_filename, Statistics
Statistics, decode_dict
ITEMS_BUFFER = 1024 * 1024
CHUNK_MIN = 1024
WINDOW_SIZE = 0xfff
CHUNK_MASK = 0xffff
have_lchmod = hasattr(os, 'lchmod')
linux = sys.platform == 'linux2'
utime_supports_fd = os.utime in getattr(os, 'supports_fd', {})
has_lchmod = hasattr(os, 'lchmod')
class ItemIter(object):
@ -39,20 +38,20 @@ class ItemIter(object):
def __iter__(self):
return self
def next(self):
def __next__(self):
if self.stack:
item = self.stack.pop(0)
else:
self._peek = None
item = self.get_next()
self.peeks = max(0, self.peeks - len(item.get('chunks', [])))
self.peeks = max(0, self.peeks - len(item.get(b'chunks', [])))
return item
def get_next(self):
next = self.unpacker.next()
while self.filter and not self.filter(next):
next = self.unpacker.next()
return next
n = next(self.unpacker)
while self.filter and not self.filter(n):
n = next(self.unpacker)
return n
def peek(self):
while True:
@ -61,12 +60,12 @@ class ItemIter(object):
raise StopIteration
self._peek = self.get_next()
self.stack.append(self._peek)
if 'chunks' in self._peek:
self._peek_iter = iter(self._peek['chunks'])
if b'chunks' in self._peek:
self._peek_iter = iter(self._peek[b'chunks'])
else:
self._peek_iter = None
try:
item = self._peek_iter.next()
item = next(self._peek_iter)
self.peeks += 1
return item
except StopIteration:
@ -83,15 +82,12 @@ class Archive(object):
def __init__(self, store, key, manifest, name, cache=None, create=False,
checkpoint_interval=300, numeric_owner=False):
if sys.platform == 'darwin':
self.cwd = os.getcwdu()
else:
self.cwd = os.getcwd()
self.cwd = os.getcwd()
self.key = key
self.store = store
self.cache = cache
self.manifest = manifest
self.items = StringIO()
self.items = BytesIO()
self.items_ids = []
self.hard_links = {}
self.stats = Statistics()
@ -112,20 +108,22 @@ class Archive(object):
if name not in self.manifest.archives:
raise self.DoesNotExist(name)
info = self.manifest.archives[name]
self.load(info['id'])
self.load(info[b'id'])
def load(self, id):
self.id = id
data = self.key.decrypt(self.id, self.store.get(self.id))
self.metadata = msgpack.unpackb(data)
if self.metadata['version'] != 1:
if self.metadata[b'version'] != 1:
raise Exception('Unknown archive metadata version')
self.name = self.metadata['name']
decode_dict(self.metadata, (b'name', b'hostname', b'username', b'time'))
self.metadata[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in self.metadata[b'cmdline']]
self.name = self.metadata[b'name']
@property
def ts(self):
"""Timestamp of archive creation in UTC"""
t, f = self.metadata['time'].split('.', 1)
t, f = self.metadata[b'time'].split('.', 1)
return datetime.strptime(t, '%Y-%m-%dT%H:%M:%S') + timedelta(seconds=float('.' + f))
def __repr__(self):
@ -136,18 +134,19 @@ class Archive(object):
i = 0
n = 20
while True:
items = self.metadata['items'][i:i + n]
items = self.metadata[b'items'][i:i + n]
i += n
if not items:
break
for id, chunk in [(id, chunk) for id, chunk in izip_longest(items, self.store.get_many(items))]:
for id, chunk in [(id, chunk) for id, chunk in zip_longest(items, self.store.get_many(items))]:
unpacker.feed(self.key.decrypt(id, chunk))
iter = ItemIter(unpacker, filter)
for item in iter:
decode_dict(item, (b'path', b'source', b'user', b'group'))
yield item, iter.peek
def add_item(self, item):
self.items.write(msgpack.packb(item))
self.items.write(msgpack.packb(item, unicode_errors='surrogateescape'))
now = time.time()
if now - self.last_checkpoint > self.checkpoint_interval:
self.last_checkpoint = now
@ -159,7 +158,7 @@ class Archive(object):
if self.items.tell() == 0:
return
self.items.seek(0)
chunks = list(str(s) for s in chunkify(self.items, WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, self.key.chunk_seed))
chunks = list(bytes(s) for s in chunkify(self.items, WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, self.key.chunk_seed))
self.items.seek(0)
self.items.truncate()
for chunk in chunks[:-1]:
@ -190,7 +189,7 @@ class Archive(object):
'username': getuser(),
'time': datetime.utcnow().isoformat(),
}
data = msgpack.packb(metadata)
data = msgpack.packb(metadata, unicode_errors='surrogateescape')
self.id = self.key.id_hash(data)
self.cache.add_chunk(self.id, data, self.stats)
self.manifest.archives[name] = {'id': self.id, 'time': metadata['time']}
@ -209,12 +208,12 @@ class Archive(object):
cache.begin_txn()
stats = Statistics()
add(self.id)
for id, chunk in izip_longest(self.metadata['items'], self.store.get_many(self.metadata['items'])):
for id, chunk in zip_longest(self.metadata[b'items'], self.store.get_many(self.metadata[b'items'])):
add(id)
unpacker.feed(self.key.decrypt(id, chunk))
for item in unpacker:
try:
for id, size, csize in item['chunks']:
for id, size, csize in item[b'chunks']:
add(id)
stats.nfiles += 1
except KeyError:
@ -224,8 +223,8 @@ class Archive(object):
def extract_item(self, item, dest=None, restore_attrs=True, peek=None):
dest = dest or self.cwd
assert item['path'][0] not in ('/', '\\', ':')
path = os.path.join(dest, encode_filename(item['path']))
assert item[b'path'][:1] not in ('/', '\\', ':')
path = os.path.join(dest, item[b'path'])
# Attempt to remove existing files, ignore errors on failure
try:
st = os.lstat(path)
@ -235,7 +234,7 @@ class Archive(object):
os.unlink(path)
except OSError:
pass
mode = item['mode']
mode = item[b'mode']
if stat.S_ISDIR(mode):
if not os.path.exists(path):
os.makedirs(path)
@ -245,18 +244,18 @@ class Archive(object):
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
# Hard link?
if 'source' in item:
source = os.path.join(dest, item['source'])
if b'source' in item:
source = os.path.join(dest, item[b'source'])
if os.path.exists(path):
os.unlink(path)
os.link(source, path)
else:
with open(path, 'wbx') as fd:
ids = [id for id, size, csize in item['chunks']]
for id, chunk in izip_longest(ids, self.store.get_many(ids, peek)):
with open(path, 'wb') as fd:
ids = [id for id, size, csize in item[b'chunks']]
for id, chunk in zip_longest(ids, self.store.get_many(ids, peek)):
data = self.key.decrypt(id, chunk)
fd.write(data)
self.restore_attrs(path, item)
self.restore_attrs(path, item, fd=fd.fileno())
elif stat.S_ISFIFO(mode):
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
@ -265,53 +264,61 @@ class Archive(object):
elif stat.S_ISLNK(mode):
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
source = item['source']
source = item[b'source']
if os.path.exists(path):
os.unlink(path)
os.symlink(source, path)
self.restore_attrs(path, item, symlink=True)
elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
os.mknod(path, item['mode'], item['rdev'])
os.mknod(path, item[b'mode'], item[b'rdev'])
self.restore_attrs(path, item)
else:
raise Exception('Unknown archive item type %r' % item['mode'])
raise Exception('Unknown archive item type %r' % item[b'mode'])
def restore_attrs(self, path, item, symlink=False):
xattrs = item.get('xattrs')
def restore_attrs(self, path, item, symlink=False, fd=None):
xattrs = item.get(b'xattrs')
if xattrs:
xa = xattr(path, XATTR_NOFOLLOW)
for k, v in xattrs.items():
try:
xa.set(k, v)
except (IOError, KeyError):
xattr.set(fd or path, k, v)
except (EnvironmentError):
pass
uid = gid = None
if not self.numeric_owner:
uid = user2uid(item['user'])
gid = group2gid(item['group'])
uid = uid or item['uid']
gid = gid or item['gid']
uid = user2uid(item[b'user'])
gid = group2gid(item[b'group'])
uid = uid or item[b'uid']
gid = gid or item[b'gid']
# This code is a bit of a mess due to os specific differences
try:
os.lchown(path, uid, gid)
if fd:
os.fchown(fd, uid, gid)
else:
os.lchown(path, uid, gid)
except OSError:
pass
if have_lchmod:
os.lchmod(path, item['mode'])
if fd:
os.fchmod(fd, item[b'mode'])
elif not symlink:
os.chmod(path, item['mode'])
if not symlink:
# FIXME: We should really call futimes here (c extension required)
os.utime(path, (item['mtime'], item['mtime']))
os.chmod(path, item[b'mode'])
elif has_lchmod: # Not available on Linux
os.lchmod(path, item[b'mode'])
if fd and utime_supports_fd: # Python >= 3.3
os.utime(fd, (item[b'mtime'], item[b'mtime']))
elif utime_supports_fd: # Python >= 3.3
os.utime(path, (item[b'mtime'], item[b'mtime']), follow_symlinks=False)
elif not symlink:
os.utime(path, (item[b'mtime'], item[b'mtime']))
def verify_file(self, item, start, result, peek=None):
if not item['chunks']:
if not item[b'chunks']:
start(item)
result(item, True)
else:
start(item)
ids = [id for id, size, csize in item['chunks']]
ids = [id for id, size, csize in item[b'chunks']]
try:
for id, chunk in izip_longest(ids, self.store.get_many(ids, peek)):
for id, chunk in zip_longest(ids, self.store.get_many(ids, peek)):
self.key.decrypt(id, chunk)
except Exception:
result(item, False)
@ -320,11 +327,11 @@ class Archive(object):
def delete(self, cache):
unpacker = msgpack.Unpacker(use_list=False)
for id in self.metadata['items']:
for id in self.metadata[b'items']:
unpacker.feed(self.key.decrypt(id, self.store.get(id)))
for item in unpacker:
try:
for chunk_id, size, csize in item['chunks']:
for chunk_id, size, csize in item[b'chunks']:
self.cache.chunk_decref(chunk_id)
except KeyError:
pass
@ -337,40 +344,34 @@ class Archive(object):
def stat_attrs(self, st, path):
item = {
'mode': st.st_mode,
'uid': st.st_uid, 'user': uid2user(st.st_uid),
'gid': st.st_gid, 'group': gid2group(st.st_gid),
'mtime': st.st_mtime,
b'mode': st.st_mode,
b'uid': st.st_uid, b'user': uid2user(st.st_uid),
b'gid': st.st_gid, b'group': gid2group(st.st_gid),
b'mtime': st.st_mtime,
}
if self.numeric_owner:
item['user'] = item['group'] = None
item[b'user'] = item[b'group'] = None
try:
xa = xattr(path, XATTR_NOFOLLOW)
xattrs = {}
for key in xa:
# Only store the user namespace on Linux
if linux and not key.startswith('user'):
continue
xattrs[key] = xa[key]
xattrs = xattr.get_all(path, True)
if xattrs:
item['xattrs'] = xattrs
except IOError:
item[b'xattrs'] = dict(xattrs)
except EnvironmentError:
pass
return item
def process_item(self, path, st):
item = {'path': path.lstrip('/\\:')}
item = {b'path': path.lstrip('/\\:')}
item.update(self.stat_attrs(st, path))
self.add_item(item)
def process_dev(self, path, st):
item = {'path': path.lstrip('/\\:'), 'rdev': st.st_rdev}
item = {b'path': path.lstrip('/\\:'), b'rdev': st.st_rdev}
item.update(self.stat_attrs(st, path))
self.add_item(item)
def process_symlink(self, path, st):
source = os.readlink(path)
item = {'path': path.lstrip('/\\:'), 'source': source}
item = {b'path': path.lstrip('/\\:'), b'source': source}
item.update(self.stat_attrs(st, path))
self.add_item(item)
@ -381,12 +382,12 @@ class Archive(object):
source = self.hard_links.get((st.st_ino, st.st_dev))
if (st.st_ino, st.st_dev) in self.hard_links:
item = self.stat_attrs(st, path)
item.update({'path': safe_path, 'source': source})
item.update({b'path': safe_path, b'source': source})
self.add_item(item)
return
else:
self.hard_links[st.st_ino, st.st_dev] = safe_path
path_hash = self.key.id_hash(path)
path_hash = self.key.id_hash(path.encode('utf-8', 'surrogateescape'))
ids = cache.file_known_and_unchanged(path_hash, st)
chunks = None
if ids is not None:
@ -404,7 +405,7 @@ class Archive(object):
chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats))
ids = [id for id, _, _ in chunks]
cache.memorize_file(path_hash, st, ids)
item = {'path': safe_path, 'chunks': chunks}
item = {b'path': safe_path, b'chunks': chunks}
item.update(self.stat_attrs(st, path))
self.stats.nfiles += 1
self.add_item(item)

View file

@ -1,4 +1,5 @@
import argparse
from binascii import hexlify
from datetime import datetime
from operator import attrgetter
import os
@ -11,7 +12,7 @@ from .cache import Cache
from .key import key_creator
from .helpers import location_validator, format_time, \
format_file_mode, IncludePattern, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \
get_cache_dir, format_timedelta, prune_split, Manifest, Location
get_cache_dir, format_timedelta, prune_split, Manifest, Location, remove_surrogates
from .remote import StoreServer, RemoteStore
@ -31,21 +32,21 @@ class Archiver(object):
def print_error(self, msg, *args):
msg = args and msg % args or msg
self.exit_code = 1
print >> sys.stderr, 'darc: ' + msg
print('darc: ' + msg, file=sys.stderr)
def print_verbose(self, msg, *args, **kw):
if self.verbose:
msg = args and msg % args or msg
if kw.get('newline', True):
print msg
print(msg)
else:
print msg,
print(msg, end=' ')
def do_serve(self, args):
return StoreServer().serve()
def do_init(self, args):
print 'Initializing store "%s"' % args.store.orig
print('Initializing store "%s"' % args.store.orig)
store = self.open_store(args.store, create=True)
key = key_creator(store, args)
manifest = Manifest()
@ -87,7 +88,7 @@ class Archiver(object):
if args.dontcross:
try:
restrict_dev = os.lstat(path).st_dev
except OSError, e:
except OSError as e:
self.print_error('%s: %s', path, e)
continue
else:
@ -97,14 +98,14 @@ class Archiver(object):
if args.stats:
t = datetime.now()
diff = t - t0
print '-' * 40
print 'Archive name: %s' % args.archive.archive
print 'Archive fingerprint: %s' % archive.id.encode('hex')
print 'Start time: %s' % t0.strftime('%c')
print 'End time: %s' % t.strftime('%c')
print 'Duration: %s' % format_timedelta(diff)
print('-' * 40)
print('Archive name: %s' % args.archive.archive)
print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii'))
print('Start time: %s' % t0.strftime('%c'))
print('End time: %s' % t.strftime('%c'))
print('Duration: %s' % format_timedelta(diff))
archive.stats.print_()
print '-' * 40
print('-' * 40)
return self.exit_code
def _process(self, archive, cache, patterns, skip_inodes, path, restrict_dev):
@ -112,7 +113,7 @@ class Archiver(object):
return
try:
st = os.lstat(path)
except OSError, e:
except OSError as e:
self.print_error('%s: %s', path, e)
return
if (st.st_ino, st.st_dev) in skip_inodes:
@ -123,17 +124,17 @@ class Archiver(object):
# Ignore unix sockets
if stat.S_ISSOCK(st.st_mode):
return
self.print_verbose(path)
self.print_verbose(remove_surrogates(path))
if stat.S_ISREG(st.st_mode):
try:
archive.process_file(path, st, cache)
except IOError, e:
except IOError as e:
self.print_error('%s: %s', path, e)
elif stat.S_ISDIR(st.st_mode):
archive.process_item(path, st)
try:
entries = os.listdir(path)
except OSError, e:
except OSError as e:
self.print_error('%s: %s', path, e)
else:
for filename in sorted(entries):
@ -154,18 +155,18 @@ class Archiver(object):
archive = Archive(store, key, manifest, args.archive.archive,
numeric_owner=args.numeric_owner)
dirs = []
for item, peek in archive.iter_items(lambda item: not exclude_path(item['path'], args.patterns)):
while dirs and not item['path'].startswith(dirs[-1]['path']):
for item, peek in archive.iter_items(lambda item: not exclude_path(item[b'path'], args.patterns)):
while dirs and not item[b'path'].startswith(dirs[-1][b'path']):
archive.extract_item(dirs.pop(-1), args.dest)
self.print_verbose(item['path'])
self.print_verbose(remove_surrogates(item[b'path']))
try:
if stat.S_ISDIR(item['mode']):
if stat.S_ISDIR(item[b'mode']):
dirs.append(item)
archive.extract_item(item, args.dest, restore_attrs=False)
else:
archive.extract_item(item, args.dest, peek=peek)
except IOError, e:
self.print_error('%s: %s', item['path'], e)
except IOError as e:
self.print_error('%s: %s', remove_surrogates(item[b'path']), e)
while dirs:
archive.extract_item(dirs.pop(-1), args.dest)
@ -183,32 +184,32 @@ class Archiver(object):
store = self.open_store(args.src)
manifest, key = Manifest.load(store)
if args.src.archive:
tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 010: '-', 012: 'l', 014: 's'}
tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 0o10: '-', 0o12: 'l', 0o14: 's'}
archive = Archive(store, key, manifest, args.src.archive)
for item, _ in archive.iter_items():
type = tmap.get(item['mode'] / 4096, '?')
mode = format_file_mode(item['mode'])
type = tmap.get(item[b'mode'] // 4096, '?')
mode = format_file_mode(item[b'mode'])
size = 0
if type == '-':
try:
size = sum(size for _, size, _ in item['chunks'])
size = sum(size for _, size, _ in item[b'chunks'])
except KeyError:
pass
mtime = format_time(datetime.fromtimestamp(item['mtime']))
if 'source' in item:
mtime = format_time(datetime.fromtimestamp(item[b'mtime']))
if b'source' in item:
if type == 'l':
extra = ' -> %s' % item['source']
extra = ' -> %s' % item[b'source']
else:
type = 'h'
extra = ' link to %s' % item['source']
extra = ' link to %s' % item[b'source']
else:
extra = ''
print '%s%s %-6s %-6s %8d %s %s%s' % (type, mode, item['user'] or item['uid'],
item['group'] or item['gid'], size, mtime,
item['path'], extra)
print('%s%s %-6s %-6s %8d %s %s%s' % (type, mode, item[b'user'] or item[b'uid'],
item[b'group'] or item[b'gid'], size, mtime,
remove_surrogates(item[b'path']), extra))
else:
for archive in sorted(Archive.list_archives(store, key, manifest), key=attrgetter('ts')):
print '%-20s %s' % (archive.metadata['name'], to_localtime(archive.ts).strftime('%c'))
print('%-20s %s' % (archive.metadata[b'name'], to_localtime(archive.ts).strftime('%c')))
return self.exit_code
def do_verify(self, args):
@ -217,16 +218,16 @@ class Archiver(object):
archive = Archive(store, key, manifest, args.archive.archive)
def start_cb(item):
self.print_verbose('%s ...', item['path'], newline=False)
self.print_verbose('%s ...', remove_surrogates(item[b'path']), newline=False)
def result_cb(item, success):
if success:
self.print_verbose('OK')
else:
self.print_verbose('ERROR')
self.print_error('%s: verification failed' % item['path'])
for item, peek in archive.iter_items(lambda item: not exclude_path(item['path'], args.patterns)):
if stat.S_ISREG(item['mode']) and 'chunks' in item:
self.print_error('%s: verification failed' % remove_surrogates(item[b'path']))
for item, peek in archive.iter_items(lambda item: not exclude_path(item[b'path'], args.patterns)):
if stat.S_ISREG(item[b'mode']) and b'chunks' in item:
archive.verify_file(item, start_cb, result_cb, peek=peek)
return self.exit_code
@ -236,12 +237,12 @@ class Archiver(object):
cache = Cache(store, key, manifest)
archive = Archive(store, key, manifest, args.archive.archive, cache=cache)
stats = archive.calc_stats(cache)
print 'Name:', archive.name
print 'Fingerprint: %s' % archive.id.encode('hex')
print 'Hostname:', archive.metadata['hostname']
print 'Username:', archive.metadata['username']
print 'Time:', to_localtime(archive.ts).strftime('%c')
print 'Command line:', ' '.join(archive.metadata['cmdline'])
print('Name:', archive.name)
print('Fingerprint: %s' % hexlify(archive.id).decode('ascii'))
print('Hostname:', archive.metadata[b'hostname'])
print('Username:', archive.metadata[b'username'])
print('Time: %s' % to_localtime(archive.ts).strftime('%c'))
print('Command line:', remove_surrogates(' '.join(archive.metadata[b'cmdline'])))
stats.print_()
return self.exit_code
@ -419,10 +420,10 @@ def main():
except Store.AlreadyExists:
archiver.print_error('Error: Store already exists')
exit_code = 1
except Archive.AlreadyExists, e:
except Archive.AlreadyExists as e:
archiver.print_error('Error: Archive "%s" already exists', e)
exit_code = 1
except Archive.DoesNotExist, e:
except Archive.DoesNotExist as e:
archiver.print_error('Error: Archive "%s" does not exist', e)
exit_code = 1
except KeyboardInterrupt:

View file

@ -1,12 +1,12 @@
from __future__ import with_statement
from ConfigParser import RawConfigParser
from configparser import RawConfigParser
import fcntl
from itertools import izip_longest
from itertools import zip_longest
import msgpack
import os
from binascii import hexlify, unhexlify
import shutil
from .helpers import get_cache_dir
from .helpers import get_cache_dir, decode_dict
from .hashindex import ChunkIndex
@ -19,7 +19,7 @@ class Cache(object):
self.store = store
self.key = key
self.manifest = manifest
self.path = os.path.join(get_cache_dir(), store.id.encode('hex'))
self.path = os.path.join(get_cache_dir(), hexlify(store.id).decode('ascii'))
if not os.path.exists(self.path):
self.create()
self.open()
@ -31,17 +31,17 @@ class Cache(object):
"""Create a new empty store at `path`
"""
os.makedirs(self.path)
with open(os.path.join(self.path, 'README'), 'wb') as fd:
with open(os.path.join(self.path, 'README'), 'w') as fd:
fd.write('This is a DARC cache')
config = RawConfigParser()
config.add_section('cache')
config.set('cache', 'version', '1')
config.set('cache', 'store', self.store.id.encode('hex'))
config.set('cache', 'store', hexlify(self.store.id).decode('ascii'))
config.set('cache', 'manifest', '')
with open(os.path.join(self.path, 'config'), 'wb') as fd:
with open(os.path.join(self.path, 'config'), 'w') as fd:
config.write(fd)
ChunkIndex.create(os.path.join(self.path, 'chunks'))
with open(os.path.join(self.path, 'files'), 'wb') as fd:
ChunkIndex.create(os.path.join(self.path, 'chunks').encode('utf-8'))
with open(os.path.join(self.path, 'files'), 'w') as fd:
pass # empty file
def open(self):
@ -55,8 +55,8 @@ class Cache(object):
if self.config.getint('cache', 'version') != 1:
raise Exception('%s Does not look like a darc cache')
self.id = self.config.get('cache', 'store')
self.manifest_id = self.config.get('cache', 'manifest').decode('hex')
self.chunks = ChunkIndex(os.path.join(self.path, 'chunks'))
self.manifest_id = unhexlify(self.config.get('cache', 'manifest').encode('ascii')) # .encode needed for Python 3.[0-2]
self.chunks = ChunkIndex(os.path.join(self.path, 'chunks').encode('utf-8'))
self.files = None
def _read_files(self):
@ -91,12 +91,12 @@ class Cache(object):
return
if self.files is not None:
with open(os.path.join(self.path, 'files'), 'wb') as fd:
for item in self.files.iteritems():
for item in self.files.items():
# Discard cached files with the newest mtime to avoid
# issues with filesystem snapshots and mtime precision
if item[1][0] < 10 and item[1][3] < self._newest_mtime:
msgpack.pack(item, fd)
self.config.set('cache', 'manifest', self.manifest.id.encode('hex'))
self.config.set('cache', 'manifest', hexlify(self.manifest.id).decode('ascii'))
with open(os.path.join(self.path, 'config'), 'w') as fd:
self.config.write(fd)
self.chunks.flush()
@ -130,23 +130,24 @@ class Cache(object):
except KeyError:
self.chunks[id] = 1, size, csize
self.begin_txn()
print 'Initializing cache...'
print('Initializing cache...')
self.chunks.clear()
unpacker = msgpack.Unpacker()
for name, info in self.manifest.archives.items():
id = info['id']
id = info[b'id']
cdata = self.store.get(id)
data = self.key.decrypt(id, cdata)
add(id, len(data), len(cdata))
archive = msgpack.unpackb(data)
print 'Analyzing archive:', archive['name']
for id, chunk in izip_longest(archive['items'], self.store.get_many(archive['items'])):
decode_dict(archive, (b'name', b'hostname', b'username', b'time')) # fixme: argv
print('Analyzing archive:', archive[b'name'])
for id, chunk in zip_longest(archive[b'items'], self.store.get_many(archive[b'items'])):
data = self.key.decrypt(id, chunk)
add(id, len(data), len(chunk))
unpacker.feed(data)
for item in unpacker:
try:
for id, size, csize in item['chunks']:
for id, size, csize in item[b'chunks']:
add(id, size, csize)
except KeyError:
pass

View file

@ -27,7 +27,8 @@ cdef class IndexBase:
raise Exception('Failed to open %s' % path)
def __dealloc__(self):
hashindex_close(self.index)
if self.index:
hashindex_close(self.index)
def clear(self):
hashindex_clear(self.index)

View file

@ -1,4 +1,3 @@
from __future__ import with_statement
import argparse
from datetime import datetime, timedelta
from fnmatch import fnmatchcase
@ -16,7 +15,7 @@ import urllib
class Manifest(object):
MANIFEST_ID = '\0' * 32
MANIFEST_ID = b'\0' * 32
def __init__(self):
self.archives = {}
@ -32,10 +31,10 @@ class Manifest(object):
data = key.decrypt(None, cdata)
manifest.id = key.id_hash(data)
m = msgpack.unpackb(data)
if not m.get('version') == 1:
if not m.get(b'version') == 1:
raise ValueError('Invalid manifest version')
manifest.archives = m['archives']
manifest.config = m['config']
manifest.archives = dict((k.decode('utf-8'), v) for k,v in m[b'archives'].items())
manifest.config = m[b'config']
return manifest, key
def write(self):
@ -75,21 +74,10 @@ class Statistics(object):
self.usize += csize
def print_(self):
print 'Number of files: %d' % self.nfiles
print 'Original size: %d (%s)' % (self.osize, format_file_size(self.osize))
print 'Compressed size: %s (%s)' % (self.csize, format_file_size(self.csize))
print 'Unique data: %d (%s)' % (self.usize, format_file_size(self.usize))
# OSX filenames are UTF-8 Only so any non-utf8 filenames are url encoded
if sys.platform == 'darwin':
def encode_filename(name):
try:
return name.decode('utf-8')
except UnicodeDecodeError:
return urllib.quote(name)
else:
encode_filename = str
print('Number of files: %d' % self.nfiles)
print('Original size: %d (%s)' % (self.osize, format_file_size(self.osize)))
print('Compressed size: %s (%s)' % (self.csize, format_file_size(self.csize)))
print('Unique data: %d (%s)' % (self.usize, format_file_size(self.usize)))
def get_keys_dir():
@ -212,7 +200,7 @@ def format_file_mode(mod):
def x(v):
return ''.join(v & m and s or '-'
for m, s in ((4, 'r'), (2, 'w'), (1, 'x')))
return '%s%s%s' % (x(mod / 64), x(mod / 8), x(mod))
return '%s%s%s' % (x(mod // 64), x(mod // 8), x(mod))
def format_file_size(v):
@ -377,3 +365,14 @@ def write_msgpack(filename, d):
fd.flush()
os.fsync(fd)
os.rename(filename + '.tmp', filename)
def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'):
for key in keys:
if isinstance(d.get(key), bytes):
d[key] = d[key].decode(encoding, errors)
return d
def remove_surrogates(s, errors='replace'):
return s.encode('utf-8', errors).decode('utf-8')

View file

@ -1,4 +1,4 @@
from __future__ import with_statement
from binascii import hexlify, a2b_base64, b2a_base64
from getpass import getpass
import os
import msgpack
@ -16,11 +16,11 @@ from Crypto.Protocol.KDF import PBKDF2
from .helpers import IntegrityError, get_keys_dir, Location
PREFIX = '\0' * 8
PREFIX = b'\0' * 8
KEYFILE = '\0'
PASSPHRASE = '\1'
PLAINTEXT = '\2'
KEYFILE = b'\0'
PASSPHRASE = b'\1'
PLAINTEXT = b'\2'
def key_creator(store, args):
@ -33,11 +33,11 @@ def key_creator(store, args):
def key_factory(store, manifest_data):
if manifest_data[0] == KEYFILE:
if manifest_data[:1] == KEYFILE:
return KeyfileKey.detect(store, manifest_data)
elif manifest_data[0] == PASSPHRASE:
elif manifest_data[:1] == PASSPHRASE:
return PassphraseKey.detect(store, manifest_data)
elif manifest_data[0] == PLAINTEXT:
elif manifest_data[:1] == PLAINTEXT:
return PlaintextKey.detect(store, manifest_data)
else:
raise Exception('Unkown Key type %d' % ord(manifest_data[0]))
@ -67,7 +67,7 @@ class PlaintextKey(KeyBase):
@classmethod
def create(cls, store, args):
print 'Encryption NOT enabled.\nUse the --key-file or --passphrase options to enable encryption.'
print('Encryption NOT enabled.\nUse the --key-file or --passphrase options to enable encryption.')
return cls()
@classmethod
@ -78,12 +78,12 @@ class PlaintextKey(KeyBase):
return SHA256.new(data).digest()
def encrypt(self, data):
return ''.join([self.TYPE, zlib.compress(data)])
return b''.join([self.TYPE, zlib.compress(data)])
def decrypt(self, id, data):
if data[0] != self.TYPE:
if data[:1] != self.TYPE:
raise IntegrityError('Invalid encryption envelope')
data = zlib.decompress(buffer(data, 1))
data = zlib.decompress(memoryview(data)[1:])
if id and SHA256.new(data).digest() != id:
raise IntegrityError('Chunk id verification failed')
return data
@ -99,26 +99,26 @@ class AESKeyBase(KeyBase):
def encrypt(self, data):
data = zlib.compress(data)
nonce = long_to_bytes(self.counter.next_value(), 8)
data = ''.join((nonce, AES.new(self.enc_key, AES.MODE_CTR, '',
data = b''.join((nonce, AES.new(self.enc_key, AES.MODE_CTR, b'',
counter=self.counter).encrypt(data)))
hash = HMAC.new(self.enc_hmac_key, data, SHA256).digest()
return ''.join((self.TYPE, hash, data))
return b''.join((self.TYPE, hash, data))
def decrypt(self, id, data):
if data[0] != self.TYPE:
if data[:1] != self.TYPE:
raise IntegrityError('Invalid encryption envelope')
hash = buffer(data, 1, 32)
if buffer(HMAC.new(self.enc_hmac_key, buffer(data, 33), SHA256).digest()) != hash:
hash = memoryview(data)[1:33]
if memoryview(HMAC.new(self.enc_hmac_key, memoryview(data)[33:], SHA256).digest()) != hash:
raise IntegrityError('Encryption envelope checksum mismatch')
nonce = bytes_to_long(buffer(data, 33, 8))
nonce = bytes_to_long(memoryview(data)[33:41])
counter = Counter.new(64, initial_value=nonce, prefix=PREFIX)
data = zlib.decompress(AES.new(self.enc_key, AES.MODE_CTR, counter=counter).decrypt(buffer(data, 41)))
data = zlib.decompress(AES.new(self.enc_key, AES.MODE_CTR, counter=counter).decrypt(memoryview(data)[41:]))
if id and HMAC.new(self.id_key, data, SHA256).digest() != id:
raise IntegrityError('Chunk id verification failed')
return data
def extract_iv(self, payload):
if payload[0] != self.TYPE:
if payload[:1] != self.TYPE:
raise IntegrityError('Invalid encryption envelope')
nonce = bytes_to_long(payload[33:41])
return nonce
@ -149,14 +149,14 @@ class PassphraseKey(AESKeyBase):
while passphrase != passphrase2:
passphrase = getpass('Enter passphrase: ')
if not passphrase:
print 'Passphrase must not be blank'
print('Passphrase must not be blank')
continue
passphrase2 = getpass('Enter same passphrase again: ')
if passphrase != passphrase2:
print 'Passphrases do not match'
print('Passphrases do not match')
key.init(store, passphrase)
if passphrase:
print 'Remember your passphrase. Your data will be inaccessible without it.'
print('Remember your passphrase. Your data will be inaccessible without it.')
return key
@classmethod
@ -198,40 +198,40 @@ class KeyfileKey(AESKeyBase):
@classmethod
def find_key_file(cls, store):
id = store.id.encode('hex')
id = hexlify(store.id).decode('ascii')
keys_dir = get_keys_dir()
for name in os.listdir(keys_dir):
filename = os.path.join(keys_dir, name)
with open(filename, 'rb') as fd:
with open(filename, 'r') as fd:
line = fd.readline().strip()
if line and line.startswith(cls.FILE_ID) and line[9:] == id:
return filename
raise Exception('Key file for store with ID %s not found' % id)
def load(self, filename, passphrase):
with open(filename, 'rb') as fd:
cdata = (''.join(fd.readlines()[1:])).decode('base64')
with open(filename, 'r') as fd:
cdata = a2b_base64(''.join(fd.readlines()[1:]).encode('ascii')) # .encode needed for Python 3.[0-2]
data = self.decrypt_key_file(cdata, passphrase)
if data:
key = msgpack.unpackb(data)
if key['version'] != 1:
if key[b'version'] != 1:
raise IntegrityError('Invalid key file header')
self.store_id = key['store_id']
self.enc_key = key['enc_key']
self.enc_hmac_key = key['enc_hmac_key']
self.id_key = key['id_key']
self.chunk_seed = key['chunk_seed']
self.store_id = key[b'store_id']
self.enc_key = key[b'enc_key']
self.enc_hmac_key = key[b'enc_hmac_key']
self.id_key = key[b'id_key']
self.chunk_seed = key[b'chunk_seed']
self.counter = Counter.new(64, initial_value=1, prefix=PREFIX)
self.path = filename
return True
def decrypt_key_file(self, data, passphrase):
d = msgpack.unpackb(data)
assert d['version'] == 1
assert d['algorithm'] == 'SHA256'
key = PBKDF2(passphrase, d['salt'], 32, d['iterations'], SHA256_PDF)
data = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).decrypt(d['data'])
if HMAC.new(key, data, SHA256).digest() != d['hash']:
assert d[b'version'] == 1
assert d[b'algorithm'] == b'SHA256'
key = PBKDF2(passphrase, d[b'salt'], 32, d[b'iterations'], SHA256_PDF)
data = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).decrypt(d[b'data'])
if HMAC.new(key, data, SHA256).digest() != d[b'hash']:
return None
return data
@ -261,9 +261,9 @@ class KeyfileKey(AESKeyBase):
'chunk_seed': self.chunk_seed,
}
data = self.encrypt_key_file(msgpack.packb(key), passphrase)
with open(path, 'wb') as fd:
fd.write('%s %s\n' % (self.FILE_ID, self.store_id.encode('hex')))
fd.write(data.encode('base64'))
with open(path, 'w') as fd:
fd.write('%s %s\n' % (self.FILE_ID, hexlify(self.store_id).decode('ascii')))
fd.write(b2a_base64(data).decode('ascii'))
self.path = path
def change_passphrase(self):
@ -272,9 +272,9 @@ class KeyfileKey(AESKeyBase):
passphrase = getpass('New passphrase: ')
passphrase2 = getpass('Enter same passphrase again: ')
if passphrase != passphrase2:
print 'Passphrases do not match'
print('Passphrases do not match')
self.save(self.path, passphrase)
print 'Key file "%s" updated' % self.path
print('Key file "%s" updated' % self.path)
@classmethod
def create(cls, store, args):
@ -293,13 +293,13 @@ class KeyfileKey(AESKeyBase):
passphrase = getpass('Enter passphrase (empty for no passphrase):')
passphrase2 = getpass('Enter same passphrase again: ')
if passphrase != passphrase2:
print 'Passphrases do not match'
print('Passphrases do not match')
key = cls()
key.store_id = store.id
key.init_from_random_data(get_random_bytes(100))
key.save(path, passphrase)
print 'Key file "%s" created.' % key.path
print 'Keep this file safe. Your data will be inaccessible without it.'
print('Key file "%s" created.' % key.path)
print('Keep this file safe. Your data will be inaccessible without it.')
return key
@ -317,7 +317,7 @@ class KeyTestCase(unittest.TestCase):
orig = '/some/place'
_location = _Location()
id = '\0' * 32
id = b'\0' * 32
def setUp(self):
self.tmpdir = tempfile.mkdtemp()
@ -328,8 +328,8 @@ class KeyTestCase(unittest.TestCase):
def test_plaintext(self):
key = PlaintextKey.create(None, None)
data = 'foo'
self.assertEqual(key.id_hash(data).encode('hex'), '2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae')
data = b'foo'
self.assertEqual(hexlify(key.id_hash(data)), b'2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae')
self.assertEqual(data, key.decrypt(key.id_hash(data), key.encrypt(data)))
def test_keyfile(self):
@ -338,25 +338,25 @@ class KeyTestCase(unittest.TestCase):
os.environ['DARC_PASSPHRASE'] = 'test'
key = KeyfileKey.create(self.MockStore(), MockArgs())
self.assertEqual(bytes_to_long(key.counter()), 1)
manifest = key.encrypt('')
manifest = key.encrypt(b'')
iv = key.extract_iv(manifest)
key2 = KeyfileKey.detect(self.MockStore(), manifest)
self.assertEqual(bytes_to_long(key2.counter()), iv + 1000)
# Key data sanity check
self.assertEqual(len(set([key2.id_key, key2.enc_key, key2.enc_hmac_key])), 3)
self.assertEqual(key2.chunk_seed == 0, False)
data = 'foo'
data = b'foo'
self.assertEqual(data, key2.decrypt(key.id_hash(data), key.encrypt(data)))
def test_passphrase(self):
os.environ['DARC_PASSPHRASE'] = 'test'
key = PassphraseKey.create(self.MockStore(), None)
self.assertEqual(bytes_to_long(key.counter()), 1)
self.assertEqual(key.id_key.encode('hex'), 'f28e915da78a972786da47fee6c4bd2960a421b9bdbdb35a7942eb82552e9a72')
self.assertEqual(key.enc_hmac_key.encode('hex'), '169c6082f209e524ea97e2c75318936f6e93c101b9345942a95491e9ae1738ca')
self.assertEqual(key.enc_key.encode('hex'), 'c05dd423843d4dd32a52e4dc07bb11acabe215917fc5cf3a3df6c92b47af79ba')
self.assertEqual(hexlify(key.id_key), b'f28e915da78a972786da47fee6c4bd2960a421b9bdbdb35a7942eb82552e9a72')
self.assertEqual(hexlify(key.enc_hmac_key), b'169c6082f209e524ea97e2c75318936f6e93c101b9345942a95491e9ae1738ca')
self.assertEqual(hexlify(key.enc_key), b'c05dd423843d4dd32a52e4dc07bb11acabe215917fc5cf3a3df6c92b47af79ba')
self.assertEqual(key.chunk_seed, -324662077)
manifest = key.encrypt('')
manifest = key.encrypt(b'')
iv = key.extract_iv(manifest)
key2 = PassphraseKey.detect(self.MockStore(), manifest)
self.assertEqual(bytes_to_long(key2.counter()), iv + 1000)
@ -364,8 +364,8 @@ class KeyTestCase(unittest.TestCase):
self.assertEqual(key.enc_hmac_key, key2.enc_hmac_key)
self.assertEqual(key.enc_key, key2.enc_key)
self.assertEqual(key.chunk_seed, key2.chunk_seed)
data = 'foo'
self.assertEqual(key.id_hash(data).encode('hex'), '016c27cd40dc8e84f196f3b43a9424e8472897e09f6935d0d3a82fb41664bad7')
data = b'foo'
self.assertEqual(hexlify(key.id_hash(data)), b'016c27cd40dc8e84f196f3b43a9424e8472897e09f6935d0d3a82fb41664bad7')
self.assertEqual(data, key2.decrypt(key2.id_hash(data), key.encrypt(data)))

View file

@ -1,4 +1,3 @@
from UserDict import DictMixin
from heapq import heappush, heapify, heapreplace, heappop
import unittest
@ -56,7 +55,7 @@ class LRUCacheTestCase(unittest.TestCase):
c[x] = i
self.assertEqual(len(c), 2)
self.assertEqual(set(c), set(['b', 'c']))
self.assertEqual(set(c.iteritems()), set([('b', 1), ('c', 2)]))
self.assertEqual(set(c.items()), set([('b', 1), ('c', 2)]))
self.assertEqual(False, 'a' in c)
self.assertEqual(True, 'b' in c)
self.assertRaises(KeyError, lambda: c['a'])

View file

@ -1,4 +1,3 @@
from __future__ import with_statement
import fcntl
import msgpack
import os
@ -35,16 +34,17 @@ class StoreServer(object):
return
unpacker.feed(data)
for type, msgid, method, args in unpacker:
method = method.decode('ascii')
try:
try:
f = getattr(self, method)
except AttributeError:
f = getattr(self.store, method)
res = f(*args)
except Exception, e:
sys.stdout.write(msgpack.packb((1, msgid, e.__class__.__name__, None)))
except Exception as e:
sys.stdout.buffer.write(msgpack.packb((1, msgid, e.__class__.__name__, None)))
else:
sys.stdout.write(msgpack.packb((1, msgid, None, res)))
sys.stdout.buffer.write(msgpack.packb((1, msgid, None, res)))
sys.stdout.flush()
if es:
return
@ -53,6 +53,7 @@ class StoreServer(object):
return 1
def open(self, path, create=False):
path = os.fsdecode(path)
if path.startswith('/~'):
path = path[1:]
self.store = Store(os.path.expanduser(path), create)
@ -69,7 +70,7 @@ class RemoteStore(object):
def __init__(self, location, create=False):
self.p = None
self.cache = LRUCache(256)
self.to_send = ''
self.to_send = b''
self.extra = {}
self.pending = {}
self.unpacker = msgpack.Unpacker(use_list=False)
@ -89,10 +90,10 @@ class RemoteStore(object):
raise Exception('Server insisted on using unsupported protocol version %d' % version)
try:
self.id = self.call('open', (location.path, create))
except self.RPCError, e:
if e.name == 'DoesNotExist':
except self.RPCError as e:
if e.name == b'DoesNotExist':
raise Store.DoesNotExist
elif e.name == 'AlreadyExists':
elif e.name == b'AlreadyExists':
raise Store.AlreadyExists
def __del__(self):
@ -127,7 +128,7 @@ class RemoteStore(object):
if to_send:
n = os.write(self.stdin_fd, to_send)
assert n > 0
to_send = buffer(to_send, n)
to_send = memoryview(to_send)[n:]
else:
w_fds = []
@ -167,7 +168,7 @@ class RemoteStore(object):
msgid, resp, error = self.cache[args]
m = max(m, msgid)
self.extra.setdefault(m, []).append((args, resp, error))
return ''.join(data)
return b''.join(data)
def gen_cache_requests(self, cmd, peek):
data = []
@ -183,7 +184,7 @@ class RemoteStore(object):
self.pending[msgid] = args
self.cache[args] = msgid, None, None
data.append(msgpack.packb((1, msgid, cmd, args)))
return ''.join(data)
return b''.join(data)
def call_multi(self, cmd, argsv, wait=True, peek=None):
w_fds = [self.stdin_fd]
@ -212,7 +213,8 @@ class RemoteStore(object):
if self.to_send:
n = os.write(self.stdin_fd, self.to_send)
assert n > 0
self.to_send = buffer(self.to_send, n)
# self.to_send = memoryview(self.to_send)[n:]
self.to_send = self.to_send[n:]
else:
w_fds = []
if not wait:
@ -231,8 +233,8 @@ class RemoteStore(object):
try:
for res in self.call_multi('get', [(id, )]):
return res
except self.RPCError, e:
if e.name == 'DoesNotExist':
except self.RPCError as e:
if e.name == b'DoesNotExist':
raise Store.DoesNotExist
raise

View file

@ -1,5 +1,5 @@
from __future__ import with_statement
from ConfigParser import RawConfigParser
from configparser import RawConfigParser
from binascii import hexlify, unhexlify
import fcntl
import os
import re
@ -40,6 +40,7 @@ class Store(object):
"""Requested key does not exist"""
def __init__(self, path, create=False):
self.io = None
if create:
self.create(path)
self.open(path)
@ -51,7 +52,7 @@ class Store(object):
raise self.AlreadyExists(path)
if not os.path.exists(path):
os.mkdir(path)
with open(os.path.join(path, 'README'), 'wb') as fd:
with open(os.path.join(path, 'README'), 'w') as fd:
fd.write('This is a DARC store')
os.mkdir(os.path.join(path, 'data'))
config = RawConfigParser()
@ -59,7 +60,7 @@ class Store(object):
config.set('store', 'version', '1')
config.set('store', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR)
config.set('store', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE)
config.set('store', 'id', os.urandom(32).encode('hex'))
config.set('store', 'id', hexlify(os.urandom(32)).decode('ascii'))
with open(os.path.join(path, 'config'), 'w') as fd:
config.write(fd)
@ -76,10 +77,11 @@ class Store(object):
raise Exception('%s Does not look like a darc store')
self.max_segment_size = self.config.getint('store', 'max_segment_size')
self.segments_per_dir = self.config.getint('store', 'segments_per_dir')
self.id = self.config.get('store', 'id').decode('hex')
self.id = unhexlify(self.config.get('store', 'id').strip().encode('ascii')) # .encode needed for Python 3.[0-2]
self.rollback()
def close(self):
self.rollback()
self.lock_fd.close()
def commit(self, rollback=True):
@ -97,26 +99,26 @@ class Store(object):
def open_index(self, head, read_only=False):
if head is None:
self.index = NSIndex.create(os.path.join(self.path, 'index.tmp'))
self.index = NSIndex.create(os.path.join(self.path, 'index.tmp').encode('utf-8'))
self.segments = {}
self.compact = set()
else:
if read_only:
self.index = NSIndex(os.path.join(self.path, 'index.%d') % head)
self.index = NSIndex((os.path.join(self.path, 'index.%d') % head).encode('utf-8'))
else:
shutil.copy(os.path.join(self.path, 'index.%d' % head),
os.path.join(self.path, 'index.tmp'))
self.index = NSIndex(os.path.join(self.path, 'index.tmp'))
self.index = NSIndex(os.path.join(self.path, 'index.tmp').encode('utf-8'))
hints = read_msgpack(os.path.join(self.path, 'hints.%d' % head))
if hints['version'] != 1:
if hints[b'version'] != 1:
raise ValueError('Unknown hints file version: %d' % hints['version'])
self.segments = hints['segments']
self.compact = set(hints['compact'])
self.segments = hints[b'segments']
self.compact = set(hints[b'compact'])
def write_index(self):
hints = {'version': 1,
'segments': self.segments,
'compact': list(self.compact)}
hints = {b'version': 1,
b'segments': self.segments,
b'compact': list(self.compact)}
write_msgpack(os.path.join(self.path, 'hints.%d' % self.io.head), hints)
self.index.flush()
os.rename(os.path.join(self.path, 'index.tmp'),
@ -192,6 +194,8 @@ class Store(object):
"""
"""
self._active_txn = False
if self.io:
self.io.close()
self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir)
if self.io.head is not None and not os.path.exists(os.path.join(self.path, 'index.%d' % self.io.head)):
self.recover(self.path)
@ -273,15 +277,15 @@ class LoggedIO(object):
self.cleanup()
def close(self):
for segment in self.fds.keys():
for segment in list(self.fds.keys()):
self.fds.pop(segment).close()
self.close_segment()
self.fds = None # Just to make sure we're disabled
def _segment_names(self, reverse=False):
for dirpath, dirs, filenames in os.walk(os.path.join(self.path, 'data')):
dirs.sort(lambda a, b: cmp(int(a), int(b)), reverse=reverse)
filenames.sort(lambda a, b: cmp(int(a), int(b)), reverse=reverse)
dirs.sort(key=int, reverse=reverse)
filenames.sort(key=int, reverse=reverse)
for filename in filenames:
yield int(filename), os.path.join(dirpath, filename)
@ -304,18 +308,18 @@ class LoggedIO(object):
return fd.read(self.header_fmt.size) == self.COMMIT
def segment_filename(self, segment):
return os.path.join(self.path, 'data', str(segment / self.segments_per_dir), str(segment))
return os.path.join(self.path, 'data', str(segment // self.segments_per_dir), str(segment))
def get_write_fd(self, no_new=False):
if not no_new and self.offset and self.offset > self.limit:
self.close_segment()
if not self._write_fd:
if self.segment % self.segments_per_dir == 0:
dirname = os.path.join(self.path, 'data', str(self.segment / self.segments_per_dir))
dirname = os.path.join(self.path, 'data', str(self.segment // self.segments_per_dir))
if not os.path.exists(dirname):
os.mkdir(dirname)
self._write_fd = open(self.segment_filename(self.segment), 'ab')
self._write_fd.write('DSEGMENT')
self._write_fd.write(b'DSEGMENT')
self.offset = 8
return self._write_fd
@ -336,7 +340,7 @@ class LoggedIO(object):
def iter_objects(self, segment, lookup=None, include_data=False):
fd = self.get_fd(segment)
fd.seek(0)
if fd.read(8) != 'DSEGMENT':
if fd.read(8) != b'DSEGMENT':
raise IntegrityError('Invalid segment header')
offset = 8
header = fd.read(self.header_fmt.size)
@ -345,7 +349,7 @@ class LoggedIO(object):
if size > MAX_OBJECT_SIZE:
raise IntegrityError('Invalid segment object size')
rest = fd.read(size - self.header_fmt.size)
if crc32(rest, crc32(buffer(header, 4))) & 0xffffffff != crc:
if crc32(rest, crc32(memoryview(header)[4:])) & 0xffffffff != crc:
raise IntegrityError('Segment checksum mismatch')
if tag not in (TAG_PUT, TAG_DELETE, TAG_COMMIT):
raise IntegrityError('Invalid segment entry header')
@ -370,7 +374,7 @@ class LoggedIO(object):
if size > MAX_OBJECT_SIZE:
raise IntegrityError('Invalid segment object size')
data = fd.read(size - self.put_header_fmt.size)
if crc32(data, crc32(buffer(header, 4))) & 0xffffffff != crc:
if crc32(data, crc32(memoryview(header)[4:])) & 0xffffffff != crc:
raise IntegrityError('Segment checksum mismatch')
if tag != TAG_PUT or id != key:
raise IntegrityError('Invalid segment entry header')
@ -382,7 +386,7 @@ class LoggedIO(object):
offset = self.offset
header = self.header_no_crc_fmt.pack(size, TAG_PUT)
crc = self.crc_fmt.pack(crc32(data, crc32(id, crc32(header))) & 0xffffffff)
fd.write(''.join((crc, header, id, data)))
fd.write(b''.join((crc, header, id, data)))
self.offset += size
return self.segment, offset
@ -390,7 +394,7 @@ class LoggedIO(object):
fd = self.get_write_fd()
header = self.header_no_crc_fmt.pack(self.put_header_fmt.size, TAG_DELETE)
crc = self.crc_fmt.pack(crc32(id, crc32(header)) & 0xffffffff)
fd.write(''.join((crc, header, id)))
fd.write(b''.join((crc, header, id)))
self.offset += self.put_header_fmt.size
return self.segment
@ -398,7 +402,7 @@ class LoggedIO(object):
fd = self.get_write_fd(no_new=True)
header = self.header_no_crc_fmt.pack(self.header_fmt.size, TAG_COMMIT)
crc = self.crc_fmt.pack(crc32(header) & 0xffffffff)
fd.write(''.join((crc, header)))
fd.write(b''.join((crc, header)))
self.head = self.segment
self.close_segment()
@ -421,13 +425,14 @@ class StoreTestCase(unittest.TestCase):
self.store = self.open(create=True)
def tearDown(self):
self.store.close()
shutil.rmtree(self.tmppath)
def test1(self):
for x in range(100):
self.store.put('%-32d' % x, 'SOMEDATA')
key50 = '%-32d' % 50
self.assertEqual(self.store.get(key50), 'SOMEDATA')
self.store.put(('%-32d' % x).encode('ascii'), b'SOMEDATA')
key50 = ('%-32d' % 50).encode('ascii')
self.assertEqual(self.store.get(key50), b'SOMEDATA')
self.store.delete(key50)
self.assertRaises(Store.DoesNotExist, lambda: self.store.get(key50))
self.store.commit()
@ -437,55 +442,56 @@ class StoreTestCase(unittest.TestCase):
for x in range(100):
if x == 50:
continue
self.assertEqual(store2.get('%-32d' % x), 'SOMEDATA')
self.assertEqual(store2.get(('%-32d' % x).encode('ascii')), b'SOMEDATA')
store2.close()
def test2(self):
"""Test multiple sequential transactions
"""
self.store.put('00000000000000000000000000000000', 'foo')
self.store.put('00000000000000000000000000000001', 'foo')
self.store.put(b'00000000000000000000000000000000', b'foo')
self.store.put(b'00000000000000000000000000000001', b'foo')
self.store.commit()
self.store.delete('00000000000000000000000000000000')
self.store.put('00000000000000000000000000000001', 'bar')
self.store.delete(b'00000000000000000000000000000000')
self.store.put(b'00000000000000000000000000000001', b'bar')
self.store.commit()
self.assertEqual(self.store.get('00000000000000000000000000000001'), 'bar')
self.assertEqual(self.store.get(b'00000000000000000000000000000001'), b'bar')
def test_consistency(self):
"""Test cache consistency
"""
self.store.put('00000000000000000000000000000000', 'foo')
self.assertEqual(self.store.get('00000000000000000000000000000000'), 'foo')
self.store.put('00000000000000000000000000000000', 'foo2')
self.assertEqual(self.store.get('00000000000000000000000000000000'), 'foo2')
self.store.put('00000000000000000000000000000000', 'bar')
self.assertEqual(self.store.get('00000000000000000000000000000000'), 'bar')
self.store.delete('00000000000000000000000000000000')
self.assertRaises(Store.DoesNotExist, lambda: self.store.get('00000000000000000000000000000000'))
self.store.put(b'00000000000000000000000000000000', b'foo')
self.assertEqual(self.store.get(b'00000000000000000000000000000000'), b'foo')
self.store.put(b'00000000000000000000000000000000', b'foo2')
self.assertEqual(self.store.get(b'00000000000000000000000000000000'), b'foo2')
self.store.put(b'00000000000000000000000000000000', b'bar')
self.assertEqual(self.store.get(b'00000000000000000000000000000000'), b'bar')
self.store.delete(b'00000000000000000000000000000000')
self.assertRaises(Store.DoesNotExist, lambda: self.store.get(b'00000000000000000000000000000000'))
def test_consistency2(self):
"""Test cache consistency2
"""
self.store.put('00000000000000000000000000000000', 'foo')
self.assertEqual(self.store.get('00000000000000000000000000000000'), 'foo')
self.store.put(b'00000000000000000000000000000000', b'foo')
self.assertEqual(self.store.get(b'00000000000000000000000000000000'), b'foo')
self.store.commit()
self.store.put('00000000000000000000000000000000', 'foo2')
self.assertEqual(self.store.get('00000000000000000000000000000000'), 'foo2')
self.store.put(b'00000000000000000000000000000000', b'foo2')
self.assertEqual(self.store.get(b'00000000000000000000000000000000'), b'foo2')
self.store.rollback()
self.assertEqual(self.store.get('00000000000000000000000000000000'), 'foo')
self.assertEqual(self.store.get(b'00000000000000000000000000000000'), b'foo')
def test_single_kind_transactions(self):
# put
self.store.put('00000000000000000000000000000000', 'foo')
self.store.put(b'00000000000000000000000000000000', b'foo')
self.store.commit()
self.store.close()
# replace
self.store = self.open()
self.store.put('00000000000000000000000000000000', 'bar')
self.store.put(b'00000000000000000000000000000000', b'bar')
self.store.commit()
self.store.close()
# delete
self.store = self.open()
self.store.delete('00000000000000000000000000000000')
self.store.delete(b'00000000000000000000000000000000')
self.store.commit()

View file

@ -1,14 +1,13 @@
from __future__ import with_statement
import doctest
import filecmp
import os
from StringIO import StringIO
from io import BytesIO, StringIO
import stat
import sys
import shutil
import tempfile
import unittest
from xattr import xattr, XATTR_NOFOLLOW
import xattr
from . import helpers, lrucache
from .chunker import chunkify, buzhash, buzhash_update
@ -17,6 +16,8 @@ from .key import suite as KeySuite
from .store import Store, suite as StoreSuite
from .remote import Store, suite as RemoteStoreSuite
utime_supports_fd = os.utime in getattr(os, 'supports_fd', {})
class Test(unittest.TestCase):
@ -52,7 +53,7 @@ class Test(unittest.TestCase):
ret = self.archiver.run(args)
sys.stdout, sys.stderr = stdout, stderr
if ret != exit_code:
print output.getvalue()
print(output.getvalue())
self.assertEqual(exit_code, ret)
return output.getvalue()
finally:
@ -67,13 +68,13 @@ class Test(unittest.TestCase):
filename = os.path.join(self.input_path, name)
if not os.path.exists(os.path.dirname(filename)):
os.makedirs(os.path.dirname(filename))
with open(filename, 'wbx') as fd:
fd.write('X' * size)
with open(filename, 'wb') as fd:
fd.write(b'X' * size)
def get_xattrs(self, path):
try:
return dict(xattr(path, XATTR_NOFOLLOW))
except IOError:
return xattr.get_all(path, True)
except EnvironmentError:
return {}
def diff_dirs(self, dir1, dir2):
@ -87,8 +88,7 @@ class Test(unittest.TestCase):
s1 = os.lstat(path1)
s2 = os.lstat(path2)
attrs = ['st_mode', 'st_uid', 'st_gid', 'st_rdev']
# We can't restore symlink atime/mtime right now
if not os.path.islink(path1):
if not os.path.islink(path1) or utime_supports_fd:
attrs.append('st_mtime')
d1 = [filename] + [getattr(s1, a) for a in attrs]
d2 = [filename] + [getattr(s2, a) for a in attrs]
@ -107,15 +107,13 @@ class Test(unittest.TestCase):
# File owner
os.chown('input/file1', 100, 200)
# File mode
os.chmod('input/file1', 7755)
os.chmod('input/dir2', 0700)
os.chmod('input/file1', 0o7755)
os.chmod('input/dir2', 0o700)
# Block device
os.mknod('input/bdev', 0600 | stat.S_IFBLK, os.makedev(10, 20))
os.mknod('input/bdev', 0o600 | stat.S_IFBLK, os.makedev(10, 20))
# Char device
os.mknod('input/cdev', 0600 | stat.S_IFCHR, os.makedev(30, 40))
# xattr
x = xattr(os.path.join(self.input_path, 'file1'))
x.set('user.foo', 'bar')
os.mknod('input/cdev', 0o600 | stat.S_IFCHR, os.makedev(30, 40))
xattr.set(os.path.join(self.input_path, 'file1'), 'user.foo', 'bar')
# Hard link
os.link(os.path.join(self.input_path, 'file1'),
os.path.join(self.input_path, 'hardlink'))
@ -193,25 +191,25 @@ class Test(unittest.TestCase):
class ChunkTest(unittest.TestCase):
def test_chunkify(self):
data = '0' * 1024 * 1024 * 15 + 'Y'
parts = [str(c) for c in chunkify(StringIO(data), 2, 0x3, 2, 0)]
data = b'0' * 1024 * 1024 * 15 + b'Y'
parts = [bytes(c) for c in chunkify(BytesIO(data), 2, 0x3, 2, 0)]
self.assertEqual(len(parts), 2)
self.assertEqual(''.join(parts), data)
self.assertEqual([str(c) for c in chunkify(StringIO(''), 2, 0x3, 2, 0)], [])
self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 2, 0x3, 2, 0)], ['fooba', 'rboobaz', 'fooba', 'rboobaz', 'fooba', 'rboobaz'])
self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 2, 0x3, 2, 1)], ['fo', 'obarb', 'oob', 'azf', 'oobarb', 'oob', 'azf', 'oobarb', 'oobaz'])
self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 2, 0x3, 2, 2)], ['foob', 'ar', 'boobazfoob', 'ar', 'boobazfoob', 'ar', 'boobaz'])
self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 3, 0x3, 3, 0)], ['foobarboobaz' * 3])
self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 3, 0x3, 3, 1)], ['foobar', 'boo', 'bazfo', 'obar', 'boo', 'bazfo', 'obar', 'boobaz'])
self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 3, 0x3, 3, 2)], ['foo', 'barboobaz', 'foo', 'barboobaz', 'foo', 'barboobaz'])
self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 3, 0x3, 4, 0)], ['foobarboobaz' * 3])
self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 3, 0x3, 4, 1)], ['foobar', 'boobazfo', 'obar', 'boobazfo', 'obar', 'boobaz'])
self.assertEqual([str(c) for c in chunkify(StringIO('foobarboobaz' * 3), 3, 0x3, 4, 2)], ['foob', 'arboobaz', 'foob', 'arboobaz', 'foob', 'arboobaz'])
self.assertEqual(b''.join(parts), data)
self.assertEqual([bytes(c) for c in chunkify(BytesIO(b''), 2, 0x3, 2, 0)], [])
self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 2, 0x3, 2, 0)], [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz'])
self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 2, 0x3, 2, 1)], [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz'])
self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 2, 0x3, 2, 2)], [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz'])
self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 3, 0)], [b'foobarboobaz' * 3])
self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 3, 1)], [b'foobar', b'boo', b'bazfo', b'obar', b'boo', b'bazfo', b'obar', b'boobaz'])
self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 3, 2)], [b'foo', b'barboobaz', b'foo', b'barboobaz', b'foo', b'barboobaz'])
self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 4, 0)], [b'foobarboobaz' * 3])
self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 4, 1)], [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz'])
self.assertEqual([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 4, 2)], [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz'])
def test_buzhash(self):
self.assertEqual(buzhash('abcdefghijklmnop', 0), 3795437769L)
self.assertEqual(buzhash('abcdefghijklmnop', 1), 3795400502L)
self.assertEqual(buzhash('abcdefghijklmnop', 1), buzhash_update(buzhash('Xabcdefghijklmno', 1), ord('X'), ord('p'), 16, 1))
self.assertEqual(buzhash(b'abcdefghijklmnop', 0), 3795437769)
self.assertEqual(buzhash(b'abcdefghijklmnop', 1), 3795400502)
self.assertEqual(buzhash(b'abcdefghijklmnop', 1), buzhash_update(buzhash(b'Xabcdefghijklmno', 1), ord('X'), ord('p'), 16, 1))
class RemoteTest(Test):

View file

@ -5,13 +5,9 @@ import sys
from glob import glob
import darc
min_python = (2, 5)
min_python = (3, 2)
if sys.version_info < min_python:
print "Darc requires Python %d.%d or later" % min_python
sys.exit(1)
if sys.version_info >= (3,):
print "Darc doesn't support Python 3 (yet)"
print("Darc requires Python %d.%d or later" % min_python)
sys.exit(1)
try:
@ -31,7 +27,6 @@ try:
class Sdist(sdist):
def __init__(self, *args, **kwargs):
for src in glob('darc/*.pyx'):
print 'src', src
cython_compiler.compile(glob('darc/*.pyx'),
cython_compiler.default_options)
sdist.__init__(self, *args, **kwargs)