diff --git a/src/borg/archive.py b/src/borg/archive.py index 7a4d7e877..09a3d3f70 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -35,7 +35,7 @@ from .helpers import ProgressIndicatorPercent, log_multi from .helpers import PathPrefixPattern, FnmatchPattern from .helpers import consume from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec -from .item import Item +from .item import Item, ArchiveItem from .key import key_factory from .platform import acl_get, acl_set, set_flags, get_flags, swidth from .remote import cache_if_remote @@ -269,37 +269,36 @@ class Archive: break i += 1 else: - if name not in self.manifest.archives: + info = self.manifest.archives.get(name) + if info is None: raise self.DoesNotExist(name) - info = self.manifest.archives[name] - self.load(info[b'id']) + self.load(info.id) self.zeros = b'\0' * (1 << chunker_params[1]) def _load_meta(self, id): _, data = self.key.decrypt(id, self.repository.get(id)) - metadata = msgpack.unpackb(data) - if metadata[b'version'] != 1: + metadata = ArchiveItem(internal_dict=msgpack.unpackb(data)) + if metadata.version != 1: raise Exception('Unknown archive metadata version') return metadata def load(self, id): self.id = id self.metadata = self._load_meta(self.id) - decode_dict(self.metadata, ARCHIVE_TEXT_KEYS) - self.metadata[b'cmdline'] = [safe_decode(arg) for arg in self.metadata[b'cmdline']] - self.name = self.metadata[b'name'] + self.metadata.cmdline = [safe_decode(arg) for arg in self.metadata.cmdline] + self.name = self.metadata.name @property def ts(self): """Timestamp of archive creation (start) in UTC""" - ts = self.metadata[b'time'] + ts = self.metadata.time return parse_timestamp(ts) @property def ts_end(self): """Timestamp of archive creation (end) in UTC""" # fall back to time if there is no time_end present in metadata - ts = self.metadata.get(b'time_end') or self.metadata[b'time'] + ts = self.metadata.get('time_end') or self.metadata.time return parse_timestamp(ts) @property @@ -336,7 +335,7 @@ Number of files: {0.stats.nfiles}'''.format( return filter(item) if filter else True def iter_items(self, filter=None, preload=False): - for item in self.pipeline.unpack_many(self.metadata[b'items'], preload=preload, + for item in self.pipeline.unpack_many(self.metadata.items, preload=preload, filter=lambda item: self.item_filter(item, filter)): yield item @@ -366,7 +365,7 @@ Number of files: {0.stats.nfiles}'''.format( metadata = { 'version': 1, 'name': name, - 'comment': comment, + 'comment': comment or '', 'items': self.items_buffer.chunks, 'cmdline': sys.argv, 'hostname': socket.gethostname(), @@ -376,10 +375,11 @@ Number of files: {0.stats.nfiles}'''.format( 'chunker_params': self.chunker_params, } metadata.update(additional_metadata or {}) - data = msgpack.packb(StableDict(metadata), unicode_errors='surrogateescape') + metadata = ArchiveItem(metadata) + data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape') self.id = self.key.id_hash(data) self.cache.add_chunk(self.id, Chunk(data), self.stats) - self.manifest.archives[name] = {'id': self.id, 'time': metadata['time']} + self.manifest.archives[name] = (self.id, metadata.time) self.manifest.write() self.repository.commit() self.cache.commit() @@ -400,7 +400,7 @@ Number of files: {0.stats.nfiles}'''.format( cache.begin_txn() stats = Statistics() add(self.id) - for id, chunk in zip(self.metadata[b'items'], self.repository.get_many(self.metadata[b'items'])): + for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)): add(id) _, data = self.key.decrypt(id, chunk) unpacker.feed(data) @@ -588,12 +588,12 @@ Number of files: {0.stats.nfiles}'''.format( raise def set_meta(self, key, value): - metadata = StableDict(self._load_meta(self.id)) - metadata[key] = value - data = msgpack.packb(metadata, unicode_errors='surrogateescape') + metadata = self._load_meta(self.id) + setattr(metadata, key, value) + data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape') new_id = self.key.id_hash(data) self.cache.add_chunk(new_id, Chunk(data), self.stats) - self.manifest.archives[self.name] = {'id': new_id, 'time': metadata[b'time']} + self.manifest.archives[self.name] = (new_id, metadata.time) self.cache.chunk_decref(self.id, self.stats) self.id = new_id @@ -602,7 +602,7 @@ Number of files: {0.stats.nfiles}'''.format( raise self.AlreadyExists(name) oldname = self.name self.name = name - self.set_meta(b'name', name) + self.set_meta('name', name) del self.manifest.archives[oldname] def delete(self, stats, progress=False, forced=False): @@ -625,7 +625,7 @@ Number of files: {0.stats.nfiles}'''.format( error = False try: unpacker = msgpack.Unpacker(use_list=False) - items_ids = self.metadata[b'items'] + items_ids = self.metadata.items pi = ProgressIndicatorPercent(total=len(items_ids), msg="Decrementing references %3.0f%%", same_line=True) for (i, (items_id, data)) in enumerate(zip(items_ids, self.repository.get_many(items_ids))): if progress: @@ -844,7 +844,7 @@ Number of files: {0.stats.nfiles}'''.format( @staticmethod def list_archives(repository, key, manifest, cache=None): # expensive! see also Manifest.list_archive_infos. - for name, info in manifest.archives.items(): + for name in manifest.archives: yield Archive(repository, key, manifest, name, cache=cache) @staticmethod @@ -1075,8 +1075,9 @@ class ArchiveChecker: except (TypeError, ValueError, StopIteration): continue if valid_archive(archive): - logger.info('Found archive %s', archive[b'name'].decode('utf-8')) - manifest.archives[archive[b'name'].decode('utf-8')] = {b'id': chunk_id, b'time': archive[b'time']} + archive = ArchiveItem(internal_dict=archive) + logger.info('Found archive %s', archive.name) + manifest.archives[archive.name] = (chunk_id, archive.time) logger.info('Manifest rebuild complete.') return manifest @@ -1187,7 +1188,7 @@ class ArchiveChecker: return required_item_keys.issubset(keys) and keys.issubset(item_keys) i = 0 - for state, items in groupby(archive[b'items'], missing_chunk_detector): + for state, items in groupby(archive.items, missing_chunk_detector): items = list(items) if state % 2: for chunk_id in items: @@ -1215,37 +1216,38 @@ class ArchiveChecker: if archive is None: # we need last N or all archives - archive_items = sorted(self.manifest.archives.items(), reverse=True, - key=lambda name_info: name_info[1][b'time']) + archive_infos = self.manifest.archives.list(sort_by='ts', reverse=True) if prefix is not None: - archive_items = [item for item in archive_items if item[0].startswith(prefix)] - num_archives = len(archive_items) + archive_infos = [info for info in archive_infos if info.name.startswith(prefix)] + num_archives = len(archive_infos) end = None if last is None else min(num_archives, last) else: # we only want one specific archive - archive_items = [item for item in self.manifest.archives.items() if item[0] == archive] - if not archive_items: + info = self.manifest.archives.get(archive) + if info is None: logger.error("Archive '%s' not found.", archive) + archive_infos = [] + else: + archive_infos = [info] num_archives = 1 end = 1 with cache_if_remote(self.repository) as repository: - for i, (name, info) in enumerate(archive_items[:end]): - logger.info('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives)) - archive_id = info[b'id'] + for i, info in enumerate(archive_infos[:end]): + logger.info('Analyzing archive {} ({}/{})'.format(info.name, num_archives - i, num_archives)) + archive_id = info.id if archive_id not in self.chunks: logger.error('Archive metadata block is missing!') self.error_found = True - del self.manifest.archives[name] + del self.manifest.archives[info.name] continue mark_as_possibly_superseded(archive_id) cdata = self.repository.get(archive_id) _, data = self.key.decrypt(archive_id, cdata) - archive = StableDict(msgpack.unpackb(data)) - if archive[b'version'] != 1: + archive = ArchiveItem(internal_dict=msgpack.unpackb(data)) + if archive.version != 1: raise Exception('Unknown archive metadata version') - decode_dict(archive, ARCHIVE_TEXT_KEYS) - archive[b'cmdline'] = [safe_decode(arg) for arg in archive[b'cmdline']] + archive.cmdline = [safe_decode(arg) for arg in archive.cmdline] items_buffer = ChunkBuffer(self.key) items_buffer.write_chunk = add_callback for item in robust_iterator(archive): @@ -1253,14 +1255,14 @@ class ArchiveChecker: verify_file_chunks(item) items_buffer.add(item) items_buffer.flush(flush=True) - for previous_item_id in archive[b'items']: + for previous_item_id in archive.items: mark_as_possibly_superseded(previous_item_id) - archive[b'items'] = items_buffer.chunks - data = msgpack.packb(archive, unicode_errors='surrogateescape') + archive.items = items_buffer.chunks + data = msgpack.packb(archive.as_dict(), unicode_errors='surrogateescape') new_archive_id = self.key.id_hash(data) cdata = self.key.encrypt(Chunk(data)) add_reference(new_archive_id, len(data), len(cdata), cdata) - info[b'id'] = new_archive_id + self.manifest.archives[info.name] = (new_archive_id, info.ts) def orphan_chunks_check(self): if self.check_all: @@ -1483,9 +1485,9 @@ class ArchiveRecreater: if completed: timestamp = archive.ts.replace(tzinfo=None) if comment is None: - comment = archive.metadata.get(b'comment', '') + comment = archive.metadata.get('comment', '') target.save(timestamp=timestamp, comment=comment, additional_metadata={ - 'cmdline': archive.metadata[b'cmdline'], + 'cmdline': archive.metadata.cmdline, 'recreate_cmdline': sys.argv, }) if replace_original: @@ -1554,7 +1556,7 @@ class ArchiveRecreater: if not target: target = self.create_target_archive(target_name) # If the archives use the same chunker params, then don't rechunkify - target.recreate_rechunkify = tuple(archive.metadata.get(b'chunker_params')) != self.chunker_params + target.recreate_rechunkify = tuple(archive.metadata.get('chunker_params')) != self.chunker_params return target, resume_from def try_resume(self, archive, target_name): @@ -1573,7 +1575,7 @@ class ArchiveRecreater: return target, resume_from def incref_partial_chunks(self, source_archive, target_archive): - target_archive.recreate_partial_chunks = source_archive.metadata.get(b'recreate_partial_chunks', []) + target_archive.recreate_partial_chunks = source_archive.metadata.get('recreate_partial_chunks', []) for chunk_id, size, csize in target_archive.recreate_partial_chunks: if not self.cache.seen_chunk(chunk_id): try: @@ -1606,8 +1608,8 @@ class ArchiveRecreater: return item def can_resume(self, archive, old_target, target_name): - resume_id = old_target.metadata[b'recreate_source_id'] - resume_args = [safe_decode(arg) for arg in old_target.metadata[b'recreate_args']] + resume_id = old_target.metadata.recreate_source_id + resume_args = [safe_decode(arg) for arg in old_target.metadata.recreate_args] if resume_id != archive.id: logger.warning('Source archive changed, will discard %s and start over', target_name) logger.warning('Saved fingerprint: %s', bin_to_hex(resume_id)) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 98a02df68..78fdfe7a3 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -679,8 +679,8 @@ class Archiver: archive2 = Archive(repository, key, manifest, args.archive2, consider_part_files=args.consider_part_files) - can_compare_chunk_ids = archive1.metadata.get(b'chunker_params', False) == archive2.metadata.get( - b'chunker_params', True) or args.same_chunker_params + can_compare_chunk_ids = archive1.metadata.get('chunker_params', False) == archive2.metadata.get( + 'chunker_params', True) or args.same_chunker_params if not can_compare_chunk_ids: self.print_warning('--chunker-params might be different between archives, diff will be slow.\n' 'If you know for certain that they are the same, pass --same-chunker-params ' @@ -734,7 +734,7 @@ class Archiver: msg.append("This repository seems to have no manifest, so we can't tell anything about its contents.") else: msg.append("You requested to completely DELETE the repository *including* all archives it contains:") - for archive_info in manifest.list_archive_infos(sort_by='ts'): + for archive_info in manifest.archives.list(sort_by='ts'): msg.append(format_archive(archive_info)) msg.append("Type 'YES' if you understand this and want to continue: ") msg = '\n'.join(msg) @@ -812,7 +812,7 @@ class Archiver: format = "{archive:<36} {time} [{id}]{NL}" formatter = ArchiveFormatter(format) - for archive_info in manifest.list_archive_infos(sort_by='ts'): + for archive_info in manifest.archives.list(sort_by='ts'): if args.prefix and not archive_info.name.startswith(args.prefix): continue write(safe_encode(formatter.format_item(archive_info))) @@ -831,14 +831,14 @@ class Archiver: stats = archive.calc_stats(cache) print('Archive name: %s' % archive.name) print('Archive fingerprint: %s' % archive.fpr) - print('Comment: %s' % archive.metadata.get(b'comment', '')) - print('Hostname: %s' % archive.metadata[b'hostname']) - print('Username: %s' % archive.metadata[b'username']) + print('Comment: %s' % archive.metadata.get('comment', '')) + print('Hostname: %s' % archive.metadata.hostname) + print('Username: %s' % archive.metadata.username) print('Time (start): %s' % format_time(to_localtime(archive.ts))) print('Time (end): %s' % format_time(to_localtime(archive.ts_end))) print('Duration: %s' % archive.duration_from_meta) print('Number of files: %d' % stats.nfiles) - print('Command line: %s' % format_cmdline(archive.metadata[b'cmdline'])) + print('Command line: %s' % format_cmdline(archive.metadata.cmdline)) print(DASHES) print(STATS_HEADER) print(str(stats)) @@ -857,7 +857,7 @@ class Archiver: '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", ' '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.') return self.exit_code - archives_checkpoints = manifest.list_archive_infos(sort_by='ts', reverse=True) # just a ArchiveInfo list + archives_checkpoints = manifest.archives.list(sort_by='ts', reverse=True) # just a ArchiveInfo list if args.prefix: archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)] is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search @@ -974,7 +974,7 @@ class Archiver: if args.target is not None: self.print_error('--target: Need to specify single archive') return self.exit_code - for archive in manifest.list_archive_infos(sort_by='ts'): + for archive in manifest.archives.list(sort_by='ts'): name = archive.name if recreater.is_temporary_archive(name): continue @@ -1009,7 +1009,7 @@ class Archiver: """dump (decrypted, decompressed) archive items metadata (not: data)""" archive = Archive(repository, key, manifest, args.location.archive, consider_part_files=args.consider_part_files) - for i, item_id in enumerate(archive.metadata[b'items']): + for i, item_id in enumerate(archive.metadata.items): _, data = key.decrypt(item_id, repository.get(item_id)) filename = '%06d_%s.items' % (i, bin_to_hex(item_id)) print('Dumping', filename) diff --git a/src/borg/cache.py b/src/borg/cache.py index df4b90861..f325ff25d 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -16,7 +16,7 @@ from .helpers import get_cache_dir from .helpers import decode_dict, int_to_bigint, bigint_to_int, bin_to_hex from .helpers import format_file_size from .helpers import yes -from .item import Item +from .item import Item, ArchiveItem from .key import PlaintextKey from .locking import Lock from .platform import SaveFile @@ -279,7 +279,7 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" return set() def repo_archives(): - return set(info[b'id'] for info in self.manifest.archives.values()) + return set(info.id for info in self.manifest.archives.list()) def cleanup_outdated(ids): for id in ids: @@ -290,12 +290,11 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" cdata = repository.get(archive_id) _, data = key.decrypt(archive_id, cdata) chunk_idx.add(archive_id, 1, len(data), len(cdata)) - archive = msgpack.unpackb(data) - if archive[b'version'] != 1: + archive = ArchiveItem(internal_dict=msgpack.unpackb(data)) + if archive.version != 1: raise Exception('Unknown archive metadata version') - decode_dict(archive, (b'name',)) unpacker = msgpack.Unpacker() - for item_id, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])): + for item_id, chunk in zip(archive.items, repository.get_many(archive.items)): _, data = key.decrypt(item_id, chunk) chunk_idx.add(item_id, 1, len(data), len(chunk)) unpacker.feed(data) @@ -319,9 +318,9 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" return chunk_idx def lookup_name(archive_id): - for name, info in self.manifest.archives.items(): - if info[b'id'] == archive_id: - return name + for info in self.manifest.archives.list(): + if info.id == archive_id: + return info.name def create_master_idx(chunk_idx): logger.info('Synchronizing chunks cache...') diff --git a/src/borg/constants.py b/src/borg/constants.py index d83c41f20..d6f26d116 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -15,8 +15,6 @@ ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'us # this is the set of keys that are always present in archives: REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ]) -ARCHIVE_TEXT_KEYS = (b'name', b'comment', b'hostname', b'username', b'time', b'time_end') - # default umask, overriden by --umask, defaults to read/write only for owner UMASK_DEFAULT = 0o077 diff --git a/src/borg/fuse.py b/src/borg/fuse.py index 3113515fe..4e7cf10c5 100644 --- a/src/borg/fuse.py +++ b/src/borg/fuse.py @@ -73,11 +73,11 @@ class FuseOperations(llfuse.Operations): if archive: self.process_archive(archive) else: - for archive_name in manifest.archives: + for name in manifest.archives: # Create archive placeholder inode archive_inode = self._create_dir(parent=1) - self.contents[1][os.fsencode(archive_name)] = archive_inode - self.pending_archives[archive_inode] = Archive(repository, key, manifest, archive_name) + self.contents[1][os.fsencode(name)] = archive_inode + self.pending_archives[archive_inode] = Archive(repository, key, manifest, name) def mount(self, mountpoint, mount_options, foreground=False): """Mount filesystem on *mountpoint* with *mount_options*.""" @@ -117,7 +117,7 @@ class FuseOperations(llfuse.Operations): """Build fuse inode hierarchy from archive metadata """ unpacker = msgpack.Unpacker() - for key, chunk in zip(archive.metadata[b'items'], self.repository.get_many(archive.metadata[b'items'])): + for key, chunk in zip(archive.metadata.items, self.repository.get_many(archive.metadata.items)): _, data = self.key.decrypt(key, chunk) unpacker.feed(data) for item in unpacker: diff --git a/src/borg/helpers.py b/src/borg/helpers.py index f4c553836..759291442 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -18,7 +18,7 @@ import time import unicodedata import uuid from binascii import hexlify -from collections import namedtuple, deque +from collections import namedtuple, deque, abc from contextlib import contextmanager from datetime import datetime, timezone, timedelta from fnmatch import translate @@ -97,12 +97,76 @@ def check_extension_modules(): raise ExtensionModuleError +ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts') + + +class Archives(abc.MutableMapping): + """ + Nice wrapper around the archives dict, making sure only valid types/values get in + and we can deal with str keys (and it internally encodes to byte keys) and eiter + str timestamps or datetime timestamps. + """ + def __init__(self): + # key: encoded archive name, value: dict(b'id': bytes_id, b'time': bytes_iso_ts) + self._archives = {} + + def __len__(self): + return len(self._archives) + + def __iter__(self): + return iter(safe_decode(name) for name in self._archives) + + def __getitem__(self, name): + assert isinstance(name, str) + _name = safe_encode(name) + values = self._archives.get(_name) + if values is None: + raise KeyError + ts = parse_timestamp(values[b'time'].decode('utf-8')) + return ArchiveInfo(name=name, id=values[b'id'], ts=ts) + + def __setitem__(self, name, info): + assert isinstance(name, str) + name = safe_encode(name) + assert isinstance(info, tuple) + id, ts = info + assert isinstance(id, bytes) + if isinstance(ts, datetime): + ts = ts.replace(tzinfo=None).isoformat() + assert isinstance(ts, str) + ts = ts.encode() + self._archives[name] = {b'id': id, b'time': ts} + + def __delitem__(self, name): + assert isinstance(name, str) + name = safe_encode(name) + del self._archives[name] + + def list(self, sort_by=None, reverse=False): + # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts + archives = self.values() # [self[name] for name in self] + if sort_by is not None: + archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse) + return archives + + def set_raw_dict(self, d): + """set the dict we get from the msgpack unpacker""" + for k, v in d.items(): + assert isinstance(k, bytes) + assert isinstance(v, dict) and b'id' in v and b'time' in v + self._archives[k] = v + + def get_raw_dict(self): + """get the dict we can give to the msgpack packer""" + return self._archives + + class Manifest: MANIFEST_ID = b'\0' * 32 def __init__(self, key, repository, item_keys=None): - self.archives = {} + self.archives = Archives() self.config = {} self.key = key self.repository = repository @@ -114,6 +178,7 @@ class Manifest: @classmethod def load(cls, repository, key=None): + from .item import ManifestItem from .key import key_factory from .repository import Repository try: @@ -125,42 +190,30 @@ class Manifest: manifest = cls(key, repository) _, data = key.decrypt(None, cdata) manifest.id = key.id_hash(data) - m = msgpack.unpackb(data) - if not m.get(b'version') == 1: + m = ManifestItem(internal_dict=msgpack.unpackb(data)) + if m.get('version') != 1: raise ValueError('Invalid manifest version') - manifest.archives = dict((k.decode('utf-8'), v) for k, v in m[b'archives'].items()) - manifest.timestamp = m.get(b'timestamp') - if manifest.timestamp: - manifest.timestamp = manifest.timestamp.decode('ascii') - manifest.config = m[b'config'] + manifest.archives.set_raw_dict(m.archives) + manifest.timestamp = m.get('timestamp') + manifest.config = m.config # valid item keys are whatever is known in the repo or every key we know - manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get(b'item_keys', [])) + manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get('item_keys', [])) return manifest, key def write(self): + from .item import ManifestItem self.timestamp = datetime.utcnow().isoformat() - data = msgpack.packb(StableDict({ - 'version': 1, - 'archives': self.archives, - 'timestamp': self.timestamp, - 'config': self.config, - 'item_keys': tuple(self.item_keys), - })) + manifest = ManifestItem( + version=1, + archives=self.archives.get_raw_dict(), + timestamp=self.timestamp, + config=self.config, + item_keys=tuple(self.item_keys), + ) + data = msgpack.packb(manifest.as_dict()) self.id = self.key.id_hash(data) self.repository.put(self.MANIFEST_ID, self.key.encrypt(Chunk(data))) - def list_archive_infos(self, sort_by=None, reverse=False): - # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts - ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts') - archives = [] - for name, values in self.archives.items(): - ts = parse_timestamp(values[b'time'].decode('utf-8')) - id = values[b'id'] - archives.append(ArchiveInfo(name=name, id=id, ts=ts)) - if sort_by is not None: - archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse) - return archives - def prune_within(archives, within): multiplier = {'H': 1, 'd': 24, 'w': 24 * 7, 'm': 24 * 31, 'y': 24 * 365} diff --git a/src/borg/item.py b/src/borg/item.py index 90289dbe8..0a0908c02 100644 --- a/src/borg/item.py +++ b/src/borg/item.py @@ -204,3 +204,61 @@ class Key(PropDict): enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes) id_key = PropDict._make_property('id_key', bytes) chunk_seed = PropDict._make_property('chunk_seed', int) + + +class ArchiveItem(PropDict): + """ + ArchiveItem abstraction that deals with validation and the low-level details internally: + + An ArchiveItem is created either from msgpack unpacker output, from another dict, from kwargs or + built step-by-step by setting attributes. + + msgpack gives us a dict with bytes-typed keys, just give it to ArchiveItem(d) and use arch.xxx later. + + If a ArchiveItem shall be serialized, give as_dict() method output to msgpack packer. + """ + + VALID_KEYS = {'version', 'name', 'items', 'cmdline', 'hostname', 'username', 'time', 'time_end', + 'comment', 'chunker_params', + 'recreate_cmdline', 'recreate_source_id', 'recreate_args', 'recreate_partial_chunks', + } # str-typed keys + + __slots__ = ("_dict", ) # avoid setting attributes not supported by properties + + version = PropDict._make_property('version', int) + name = PropDict._make_property('name', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + items = PropDict._make_property('items', list) + cmdline = PropDict._make_property('cmdline', list) # list of s-e-str + hostname = PropDict._make_property('hostname', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + username = PropDict._make_property('username', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + time = PropDict._make_property('time', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + time_end = PropDict._make_property('time_end', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + comment = PropDict._make_property('comment', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + chunker_params = PropDict._make_property('chunker_params', tuple) + recreate_source_id = PropDict._make_property('recreate_source_id', bytes) + recreate_cmdline = PropDict._make_property('recreate_cmdline', list) # list of s-e-str + recreate_args = PropDict._make_property('recreate_args', list) # list of s-e-str + recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list) # list of tuples + + +class ManifestItem(PropDict): + """ + ManifestItem abstraction that deals with validation and the low-level details internally: + + A ManifestItem is created either from msgpack unpacker output, from another dict, from kwargs or + built step-by-step by setting attributes. + + msgpack gives us a dict with bytes-typed keys, just give it to ManifestItem(d) and use manifest.xxx later. + + If a ManifestItem shall be serialized, give as_dict() method output to msgpack packer. + """ + + VALID_KEYS = {'version', 'archives', 'timestamp', 'config', 'item_keys', } # str-typed keys + + __slots__ = ("_dict", ) # avoid setting attributes not supported by properties + + version = PropDict._make_property('version', int) + archives = PropDict._make_property('archives', dict) # name -> dict + timestamp = PropDict._make_property('time', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + config = PropDict._make_property('config', dict) + item_keys = PropDict._make_property('item_keys', tuple) diff --git a/src/borg/remote.py b/src/borg/remote.py index ff057b7b0..18637cae7 100644 --- a/src/borg/remote.py +++ b/src/borg/remote.py @@ -283,22 +283,23 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+. return msgid def handle_error(error, res): - if error == b'DoesNotExist': + error = error.decode('utf-8') + if error == 'DoesNotExist': raise Repository.DoesNotExist(self.location.orig) - elif error == b'AlreadyExists': + elif error == 'AlreadyExists': raise Repository.AlreadyExists(self.location.orig) - elif error == b'CheckNeeded': + elif error == 'CheckNeeded': raise Repository.CheckNeeded(self.location.orig) - elif error == b'IntegrityError': + elif error == 'IntegrityError': raise IntegrityError(res) - elif error == b'PathNotAllowed': + elif error == 'PathNotAllowed': raise PathNotAllowed(*res) - elif error == b'ObjectNotFound': + elif error == 'ObjectNotFound': raise Repository.ObjectNotFound(res[0], self.location.orig) - elif error == b'InvalidRPCMethod': + elif error == 'InvalidRPCMethod': raise InvalidRPCMethod(*res) else: - raise self.RPCError(res.decode('utf-8'), error.decode('utf-8')) + raise self.RPCError(res.decode('utf-8'), error) calls = list(calls) waiting_for = [] diff --git a/src/borg/testsuite/archive.py b/src/borg/testsuite/archive.py index 19db1a44c..49648ef47 100644 --- a/src/borg/testsuite/archive.py +++ b/src/borg/testsuite/archive.py @@ -8,7 +8,7 @@ import msgpack from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics from ..archive import BackupOSError, backup_io, backup_io_iter -from ..item import Item +from ..item import Item, ArchiveItem from ..key import PlaintextKey from ..helpers import Manifest from . import BaseTestCase @@ -77,7 +77,7 @@ class ArchiveTimestampTestCase(BaseTestCase): key = PlaintextKey(repository) manifest = Manifest(repository, key) a = Archive(repository, key, manifest, 'test', create=True) - a.metadata = {b'time': isoformat} + a.metadata = ArchiveItem(time=isoformat) self.assert_equal(a.ts, expected) def test_with_microseconds(self): diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 2df01b29e..1901b8d49 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -1859,7 +1859,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): def test_missing_archive_item_chunk(self): archive, repository = self.open_archive('archive1') with repository: - repository.delete(archive.metadata[b'items'][-5]) + repository.delete(archive.metadata.items[-5]) repository.commit() self.cmd('check', self.repository_location, exit_code=1) self.cmd('check', '--repair', self.repository_location, exit_code=0) diff --git a/src/borg/testsuite/key.py b/src/borg/testsuite/key.py index 856970107..b85650a4f 100644 --- a/src/borg/testsuite/key.py +++ b/src/borg/testsuite/key.py @@ -69,9 +69,9 @@ class TestKey: monkeypatch.setenv('BORG_PASSPHRASE', 'test') key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) assert bytes_to_long(key.enc_cipher.iv, 8) == 0 - manifest = key.encrypt(Chunk(b'XXX')) + manifest = key.encrypt(Chunk(b'ABC')) assert key.extract_nonce(manifest) == 0 - manifest2 = key.encrypt(Chunk(b'XXX')) + manifest2 = key.encrypt(Chunk(b'ABC')) assert manifest != manifest2 assert key.decrypt(None, manifest) == key.decrypt(None, manifest2) assert key.extract_nonce(manifest2) == 1 @@ -91,7 +91,7 @@ class TestKey: assert not keyfile.exists() key = KeyfileKey.create(self.MockRepository(), self.MockArgs()) assert keyfile.exists() - chunk = Chunk(b'XXX') + chunk = Chunk(b'ABC') chunk_id = key.id_hash(chunk.data) chunk_cdata = key.encrypt(chunk) key = KeyfileKey.detect(self.MockRepository(), chunk_cdata) @@ -124,9 +124,9 @@ class TestKey: assert hexlify(key.enc_hmac_key) == b'b885a05d329a086627412a6142aaeb9f6c54ab7950f996dd65587251f6bc0901' assert hexlify(key.enc_key) == b'2ff3654c6daf7381dbbe718d2b20b4f1ea1e34caa6cc65f6bb3ac376b93fed2a' assert key.chunk_seed == -775740477 - manifest = key.encrypt(Chunk(b'XXX')) + manifest = key.encrypt(Chunk(b'ABC')) assert key.extract_nonce(manifest) == 0 - manifest2 = key.encrypt(Chunk(b'XXX')) + manifest2 = key.encrypt(Chunk(b'ABC')) assert manifest != manifest2 assert key.decrypt(None, manifest) == key.decrypt(None, manifest2) assert key.extract_nonce(manifest2) == 1