diff --git a/src/borg/archive.py b/src/borg/archive.py index c4d8c290a..c10d57d60 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -33,6 +33,7 @@ from .helpers import ProgressIndicatorPercent, log_multi from .helpers import PathPrefixPattern, FnmatchPattern from .helpers import consume from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec +from .item import Item from .key import key_factory from .platform import acl_get, acl_set, set_flags, get_flags, swidth from .remote import cache_if_remote @@ -86,7 +87,7 @@ class Statistics: columns, lines = get_terminal_size() if not final: msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self) - path = remove_surrogates(item[b'path']) if item else '' + path = remove_surrogates(item.path) if item else '' space = columns - swidth(msg) if space < swidth('...') + swidth(path): path = '%s...%s' % (path[:(space // 2) - swidth('...')], path[-space // 2:]) @@ -106,16 +107,16 @@ class DownloadPipeline: unpacker = msgpack.Unpacker(use_list=False) for _, data in self.fetch_many(ids): unpacker.feed(data) - items = [decode_dict(item, ITEM_TEXT_KEYS) for item in unpacker] + items = [Item(internal_dict=item) for item in unpacker] if filter: items = [item for item in items if filter(item)] for item in items: - if b'chunks' in item: - item[b'chunks'] = [ChunkListEntry(*e) for e in item[b'chunks']] + if 'chunks' in item: + item.chunks = [ChunkListEntry(*e) for e in item.chunks] if preload: for item in items: - if b'chunks' in item: - self.repository.preload([c.id for c in item[b'chunks']]) + if 'chunks' in item: + self.repository.preload([c.id for c in item.chunks]) for item in items: yield item @@ -135,7 +136,7 @@ class ChunkBuffer: self.chunker = Chunker(self.key.chunk_seed, *chunker_params) def add(self, item): - self.buffer.write(self.packer.pack(StableDict(item))) + self.buffer.write(self.packer.pack(item.as_dict())) if self.is_full(): self.flush() @@ -286,9 +287,6 @@ Number of files: {0.stats.nfiles}'''.format( yield item def add_item(self, item): - unknown_keys = set(item) - ITEM_KEYS - assert not unknown_keys, ('unknown item metadata keys detected, please update constants.ITEM_KEYS: %s', - ','.join(k.decode('ascii') for k in unknown_keys)) if self.show_progress: self.stats.show_progress(item=item, dt=0.2) self.items_buffer.add(item) @@ -356,9 +354,10 @@ Number of files: {0.stats.nfiles}'''.format( _, data = self.key.decrypt(id, chunk) unpacker.feed(data) for item in unpacker: - if b'chunks' in item: + item = Item(internal_dict=item) + if 'chunks' in item: stats.nfiles += 1 - add_file_chunks(item[b'chunks']) + add_file_chunks(item.chunks) cache.rollback() return stats @@ -373,22 +372,22 @@ Number of files: {0.stats.nfiles}'''.format( :param stdout: write extracted data to stdout :param sparse: write sparse files (chunk-granularity, independent of the original being sparse) :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly - :param original_path: b'path' key as stored in archive + :param original_path: 'path' key as stored in archive """ if dry_run or stdout: - if b'chunks' in item: - for _, data in self.pipeline.fetch_many([c.id for c in item[b'chunks']], is_preloaded=True): + if 'chunks' in item: + for _, data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True): if stdout: sys.stdout.buffer.write(data) if stdout: sys.stdout.buffer.flush() return - original_path = original_path or item[b'path'] + original_path = original_path or item.path dest = self.cwd - if item[b'path'].startswith('/') or item[b'path'].startswith('..'): + if item.path.startswith(('/', '..')): raise Exception('Path should be relative and local') - path = os.path.join(dest, item[b'path']) + path = os.path.join(dest, item.path) # Attempt to remove existing files, ignore errors on failure try: st = os.lstat(path) @@ -400,27 +399,27 @@ Number of files: {0.stats.nfiles}'''.format( raise self.IncompatibleFilesystemEncodingError(path, sys.getfilesystemencoding()) from None except OSError: pass - mode = item[b'mode'] + mode = item.mode if stat.S_ISREG(mode): if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) # Hard link? - if b'source' in item: - source = os.path.join(dest, item[b'source']) + if 'source' in item: + source = os.path.join(dest, item.source) if os.path.exists(path): os.unlink(path) if not hardlink_masters: os.link(source, path) return - item[b'chunks'], link_target = hardlink_masters[item[b'source']] + item.chunks, link_target = hardlink_masters[item.source] if link_target: # Hard link was extracted previously, just link os.link(link_target, path) return # Extract chunks, since the item which had the chunks was not extracted with open(path, 'wb') as fd: - ids = [c.id for c in item[b'chunks']] + ids = [c.id for c in item.chunks] for _, data in self.pipeline.fetch_many(ids, is_preloaded=True): if sparse and self.zeros.startswith(data): # all-zero chunk: create a hole in a sparse file @@ -433,7 +432,7 @@ Number of files: {0.stats.nfiles}'''.format( self.restore_attrs(path, item, fd=fd.fileno()) if hardlink_masters: # Update master entry with extracted file path, so that following hardlinks don't extract twice. - hardlink_masters[item.get(b'source') or original_path] = (None, path) + hardlink_masters[item.get('source') or original_path] = (None, path) elif stat.S_ISDIR(mode): if not os.path.exists(path): os.makedirs(path) @@ -442,7 +441,7 @@ Number of files: {0.stats.nfiles}'''.format( elif stat.S_ISLNK(mode): if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) - source = item[b'source'] + source = item.source if os.path.exists(path): os.unlink(path) try: @@ -456,18 +455,18 @@ Number of files: {0.stats.nfiles}'''.format( os.mkfifo(path) self.restore_attrs(path, item) elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode): - os.mknod(path, item[b'mode'], item[b'rdev']) + os.mknod(path, item.mode, item.rdev) self.restore_attrs(path, item) else: - raise Exception('Unknown archive item type %r' % item[b'mode']) + raise Exception('Unknown archive item type %r' % item.mode) def restore_attrs(self, path, item, symlink=False, fd=None): uid = gid = None if not self.numeric_owner: - uid = user2uid(item[b'user']) - gid = group2gid(item[b'group']) - uid = item[b'uid'] if uid is None else uid - gid = item[b'gid'] if gid is None else gid + uid = user2uid(item.user) + gid = group2gid(item.group) + uid = item.uid if uid is None else uid + gid = item.gid if gid is None else gid # This code is a bit of a mess due to os specific differences try: if fd: @@ -477,14 +476,14 @@ Number of files: {0.stats.nfiles}'''.format( except OSError: pass if fd: - os.fchmod(fd, item[b'mode']) + os.fchmod(fd, item.mode) elif not symlink: - os.chmod(path, item[b'mode']) + os.chmod(path, item.mode) elif has_lchmod: # Not available on Linux - os.lchmod(path, item[b'mode']) - mtime = bigint_to_int(item[b'mtime']) - if b'atime' in item: - atime = bigint_to_int(item[b'atime']) + os.lchmod(path, item.mode) + mtime = item.mtime + if 'atime' in item: + atime = item.atime else: # old archives only had mtime in item metadata atime = mtime @@ -493,14 +492,14 @@ Number of files: {0.stats.nfiles}'''.format( else: os.utime(path, None, ns=(atime, mtime), follow_symlinks=False) acl_set(path, item, self.numeric_owner) - if b'bsdflags' in item: + if 'bsdflags' in item: try: - set_flags(path, item[b'bsdflags'], fd=fd) + set_flags(path, item.bsdflags, fd=fd) except OSError: pass # chown removes Linux capabilities, so set the extended attributes at the end, after chown, since they include # the Linux capabilities in the "security.capability" attribute. - xattrs = item.get(b'xattrs', {}) + xattrs = item.get('xattrs', {}) for k, v in xattrs.items(): try: xattr.setxattr(fd or path, k, v, follow_symlinks=False) @@ -541,8 +540,9 @@ Number of files: {0.stats.nfiles}'''.format( unpacker.feed(data) self.cache.chunk_decref(items_id, stats) for item in unpacker: - if b'chunks' in item: - for chunk_id, size, csize in item[b'chunks']: + item = Item(internal_dict=item) + if 'chunks' in item: + for chunk_id, size, csize in item.chunks: self.cache.chunk_decref(chunk_id, stats) if progress: pi.finish() @@ -550,39 +550,39 @@ Number of files: {0.stats.nfiles}'''.format( del self.manifest.archives[self.name] def stat_attrs(self, st, path): - item = { - b'mode': st.st_mode, - b'uid': st.st_uid, b'user': uid2user(st.st_uid), - b'gid': st.st_gid, b'group': gid2group(st.st_gid), - b'atime': int_to_bigint(st.st_atime_ns), - b'ctime': int_to_bigint(st.st_ctime_ns), - b'mtime': int_to_bigint(st.st_mtime_ns), - } + attrs = dict( + mode=st.st_mode, + uid=st.st_uid, user=uid2user(st.st_uid), + gid=st.st_gid, group=gid2group(st.st_gid), + atime=st.st_atime_ns, + ctime=st.st_ctime_ns, + mtime=st.st_mtime_ns, + ) if self.numeric_owner: - item[b'user'] = item[b'group'] = None + attrs['user'] = attrs['group'] = None xattrs = xattr.get_all(path, follow_symlinks=False) if xattrs: - item[b'xattrs'] = StableDict(xattrs) + attrs['xattrs'] = StableDict(xattrs) bsdflags = get_flags(path, st) if bsdflags: - item[b'bsdflags'] = bsdflags - acl_get(path, item, st, self.numeric_owner) - return item + attrs['bsdflags'] = bsdflags + acl_get(path, attrs, st, self.numeric_owner) + return attrs def process_dir(self, path, st): - item = {b'path': make_path_safe(path)} + item = Item(path=make_path_safe(path)) item.update(self.stat_attrs(st, path)) self.add_item(item) return 'd' # directory def process_fifo(self, path, st): - item = {b'path': make_path_safe(path)} + item = Item(path=make_path_safe(path)) item.update(self.stat_attrs(st, path)) self.add_item(item) return 'f' # fifo def process_dev(self, path, st): - item = {b'path': make_path_safe(path), b'rdev': st.st_rdev} + item = Item(path=make_path_safe(path), rdev=st.st_rdev) item.update(self.stat_attrs(st, path)) self.add_item(item) if stat.S_ISCHR(st.st_mode): @@ -592,7 +592,7 @@ Number of files: {0.stats.nfiles}'''.format( def process_symlink(self, path, st): source = os.readlink(path) - item = {b'path': make_path_safe(path), b'source': source} + item = Item(path=make_path_safe(path), source=source) item.update(self.stat_attrs(st, path)) self.add_item(item) return 's' # symlink @@ -604,15 +604,15 @@ Number of files: {0.stats.nfiles}'''.format( for data in self.chunker.chunkify(fd): chunks.append(cache.add_chunk(self.key.id_hash(data), Chunk(data), self.stats)) self.stats.nfiles += 1 - t = int_to_bigint(int(time.time()) * 1000000000) - item = { - b'path': path, - b'chunks': chunks, - b'mode': 0o100660, # regular file, ug=rw - b'uid': uid, b'user': uid2user(uid), - b'gid': gid, b'group': gid2group(gid), - b'mtime': t, b'atime': t, b'ctime': t, - } + t = int(time.time()) * 1000000000 + item = Item( + path=path, + chunks=chunks, + mode=0o100660, # regular file, ug=rw + uid=uid, user=uid2user(uid), + gid=gid, group=gid2group(gid), + mtime=t, atime=t, ctime=t, + ) self.add_item(item) return 'i' # stdin @@ -623,11 +623,8 @@ Number of files: {0.stats.nfiles}'''.format( if st.st_nlink > 1: source = self.hard_links.get((st.st_ino, st.st_dev)) if (st.st_ino, st.st_dev) in self.hard_links: - item = self.stat_attrs(st, path) - item.update({ - b'path': safe_path, - b'source': source, - }) + item = Item(path=safe_path, source=source) + item.update(self.stat_attrs(st, path)) self.add_item(item) status = 'h' # regular file, hardlink (to already seen inodes) return status @@ -649,10 +646,10 @@ Number of files: {0.stats.nfiles}'''.format( status = 'U' # regular file, unchanged else: status = 'A' # regular file, added - item = { - b'path': safe_path, - b'hardlink_master': st.st_nlink > 1, # item is a hard link and has the chunks - } + item = Item( + path=safe_path, + hardlink_master=st.st_nlink > 1, # item is a hard link and has the chunks + ) # Only chunkify the file if needed if chunks is None: compress = self.compression_decider1.decide(path) @@ -668,7 +665,7 @@ Number of files: {0.stats.nfiles}'''.format( self.stats.show_progress(item=item, dt=0.2) cache.memorize_file(path_hash, st, [c.id for c in chunks]) status = status or 'M' # regular file, modified (if not 'A' already) - item[b'chunks'] = chunks + item.chunks = chunks item.update(self.stat_attrs(st, path)) self.stats.nfiles += 1 self.add_item(item) @@ -698,7 +695,7 @@ class RobustUnpacker: """ def __init__(self, validator): super().__init__() - self.item_keys = [msgpack.packb(name) for name in ITEM_KEYS] + self.item_keys = [msgpack.packb(name.encode()) for name in ITEM_KEYS] self.validator = validator self._buffered_data = [] self._resync = False @@ -894,10 +891,10 @@ class ArchiveChecker: """ offset = 0 chunk_list = [] - for chunk_id, size, csize in item[b'chunks']: + for chunk_id, size, csize in item.chunks: if chunk_id not in self.chunks: # If a file chunk is missing, create an all empty replacement chunk - logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(safe_decode(item[b'path']), offset, offset + size)) + logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(item.path, offset, offset + size)) self.error_found = True data = bytes(size) chunk_id = self.key.id_hash(data) @@ -908,14 +905,14 @@ class ArchiveChecker: add_reference(chunk_id, size, csize) chunk_list.append((chunk_id, size, csize)) offset += size - item[b'chunks'] = chunk_list + item.chunks = chunk_list def robust_iterator(archive): """Iterates through all archive items Missing item chunks will be skipped and the msgpack stream will be restarted """ - unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item) + unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and 'path' in item) _state = 0 def missing_chunk_detector(chunk_id): @@ -946,7 +943,7 @@ class ArchiveChecker: try: for item in unpacker: if isinstance(item, dict): - yield item + yield Item(internal_dict=item) else: report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i) except Exception: @@ -990,7 +987,7 @@ class ArchiveChecker: items_buffer = ChunkBuffer(self.key) items_buffer.write_chunk = add_callback for item in robust_iterator(archive): - if b'chunks' in item: + if 'chunks' in item: verify_file_chunks(item) items_buffer.add(item) items_buffer.flush(flush=True) @@ -1093,38 +1090,38 @@ class ArchiveRecreater: def item_is_hardlink_master(item): return (target_is_subset and - stat.S_ISREG(item[b'mode']) and - item.get(b'hardlink_master', True) and - b'source' not in item and - not matcher.match(item[b'path'])) + stat.S_ISREG(item.mode) and + item.get('hardlink_master', True) and + 'source' not in item and + not matcher.match(item.path)) for item in archive.iter_items(): if item_is_hardlink_master(item): # Re-visit all of these items in the archive even when fast-forwarding to rebuild hardlink_masters - hardlink_masters[item[b'path']] = (item.get(b'chunks'), None) + hardlink_masters[item.path] = (item.get('chunks'), None) continue if resume_from: # Fast forward to after the last processed file - if item[b'path'] == resume_from: - logger.info('Fast-forwarded to %s', remove_surrogates(item[b'path'])) + if item.path == resume_from: + logger.info('Fast-forwarded to %s', remove_surrogates(item.path)) resume_from = None continue - if not matcher.match(item[b'path']): - self.print_file_status('x', item[b'path']) + if not matcher.match(item.path): + self.print_file_status('x', item.path) continue - if target_is_subset and stat.S_ISREG(item[b'mode']) and item.get(b'source') in hardlink_masters: + if target_is_subset and stat.S_ISREG(item.mode) and item.get('source') in hardlink_masters: # master of this hard link is outside the target subset - chunks, new_source = hardlink_masters[item[b'source']] + chunks, new_source = hardlink_masters[item.source] if new_source is None: # First item to use this master, move the chunks - item[b'chunks'] = chunks - hardlink_masters[item[b'source']] = (None, item[b'path']) - del item[b'source'] + item.chunks = chunks + hardlink_masters[item.source] = (None, item.path) + del item.source else: # Master was already moved, only update this item's source - item[b'source'] = new_source + item.source = new_source if self.dry_run: - self.print_file_status('-', item[b'path']) + self.print_file_status('-', item.path) else: try: self.process_item(archive, target, item) @@ -1136,11 +1133,11 @@ class ArchiveRecreater: target.stats.show_progress(final=True) def process_item(self, archive, target, item): - if b'chunks' in item: - item[b'chunks'] = self.process_chunks(archive, target, item) + if 'chunks' in item: + item.chunks = self.process_chunks(archive, target, item) target.stats.nfiles += 1 target.add_item(item) - self.print_file_status(file_status(item[b'mode']), item[b'path']) + self.print_file_status(file_status(item.mode), item.path) if self.interrupt: raise self.Interrupted @@ -1148,9 +1145,9 @@ class ArchiveRecreater: """Return new chunk ID list for 'item'.""" # TODO: support --compression-from if not self.recompress and not target.recreate_rechunkify: - for chunk_id, size, csize in item[b'chunks']: + for chunk_id, size, csize in item.chunks: self.cache.chunk_incref(chunk_id, target.stats) - return item[b'chunks'] + return item.chunks new_chunks = self.process_partial_chunks(target) chunk_iterator = self.create_chunk_iterator(archive, target, item) consume(chunk_iterator, len(new_chunks)) @@ -1181,7 +1178,7 @@ class ArchiveRecreater: def create_chunk_iterator(self, archive, target, item): """Return iterator of chunks to store for 'item' from 'archive' in 'target'.""" - chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item[b'chunks']]) + chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item.chunks]) if target.recreate_rechunkify: # The target.chunker will read the file contents through ChunkIteratorFileWrapper chunk-by-chunk # (does not load the entire file into memory) @@ -1243,7 +1240,7 @@ class ArchiveRecreater: """Add excludes to the matcher created by exclude_cache and exclude_if_present.""" def exclude(dir, tag_item): if self.keep_tag_files: - tag_files.append(PathPrefixPattern(tag_item[b'path'])) + tag_files.append(PathPrefixPattern(tag_item.path)) tagged_dirs.append(FnmatchPattern(dir + '/')) else: tagged_dirs.append(PathPrefixPattern(dir)) @@ -1255,18 +1252,18 @@ class ArchiveRecreater: cachedir_masters = {} for item in archive.iter_items( - filter=lambda item: item[b'path'].endswith(CACHE_TAG_NAME) or matcher.match(item[b'path'])): - if item[b'path'].endswith(CACHE_TAG_NAME): - cachedir_masters[item[b'path']] = item - if stat.S_ISREG(item[b'mode']): - dir, tag_file = os.path.split(item[b'path']) + filter=lambda item: item.path.endswith(CACHE_TAG_NAME) or matcher.match(item.path)): + if item.path.endswith(CACHE_TAG_NAME): + cachedir_masters[item.path] = item + if stat.S_ISREG(item.mode): + dir, tag_file = os.path.split(item.path) if tag_file in self.exclude_if_present: exclude(dir, item) if self.exclude_caches and tag_file == CACHE_TAG_NAME: - if b'chunks' in item: + if 'chunks' in item: file = open_item(archive, item) else: - file = open_item(archive, cachedir_masters[item[b'source']]) + file = open_item(archive, cachedir_masters[item.source]) if file.read(len(CACHE_TAG_CONTENTS)).startswith(CACHE_TAG_CONTENTS): exclude(dir, item) matcher.add(tag_files, True) @@ -1307,13 +1304,13 @@ class ArchiveRecreater: logger.info('Replaying items from interrupted operation...') item = None for item in old_target.iter_items(): - if b'chunks' in item: - for chunk in item[b'chunks']: + if 'chunks' in item: + for chunk in item.chunks: self.cache.chunk_incref(chunk.id, target.stats) target.stats.nfiles += 1 target.add_item(item) if item: - resume_from = item[b'path'] + resume_from = item.path else: resume_from = None if self.progress: diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 102e91e26..6c09b0c9c 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -38,6 +38,7 @@ from .helpers import update_excludes, check_extension_modules from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo from .helpers import log_multi from .helpers import parse_pattern, PatternMatcher, PathPrefixPattern +from .item import Item from .key import key_creator, RepoKey, PassphraseKey from .platform import get_flags from .remote import RepositoryServer, RemoteRepository, cache_if_remote @@ -405,22 +406,22 @@ class Archiver: hardlink_masters = {} if partial_extract else None def item_is_hardlink_master(item): - return (partial_extract and stat.S_ISREG(item[b'mode']) and - item.get(b'hardlink_master', True) and b'source' not in item) + return (partial_extract and stat.S_ISREG(item.mode) and + item.get('hardlink_master', True) and 'source' not in item) for item in archive.iter_items(preload=True, - filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])): - orig_path = item[b'path'] + filter=lambda item: item_is_hardlink_master(item) or matcher.match(item.path)): + orig_path = item.path if item_is_hardlink_master(item): - hardlink_masters[orig_path] = (item.get(b'chunks'), None) - if not matcher.match(item[b'path']): + hardlink_masters[orig_path] = (item.get('chunks'), None) + if not matcher.match(item.path): continue if strip_components: - item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:]) - if not item[b'path']: + item.path = os.sep.join(orig_path.split(os.sep)[strip_components:]) + if not item.path: continue if not args.dry_run: - while dirs and not item[b'path'].startswith(dirs[-1][b'path']): + while dirs and not item.path.startswith(dirs[-1].path): archive.extract_item(dirs.pop(-1), stdout=stdout) if output_list: logging.getLogger('borg.output.list').info(remove_surrogates(orig_path)) @@ -428,7 +429,7 @@ class Archiver: if dry_run: archive.extract_item(item, dry_run=True) else: - if stat.S_ISDIR(item[b'mode']): + if stat.S_ISDIR(item.mode): dirs.append(item) archive.extract_item(item, restore_attrs=False) else: @@ -455,58 +456,58 @@ class Archiver: return self.compare_chunk_contents(chunks1, chunks2) def sum_chunk_size(item, consider_ids=None): - if item.get(b'deleted'): + if item.get('deleted'): return None else: - return sum(c.size for c in item[b'chunks'] + return sum(c.size for c in item.chunks if consider_ids is None or c.id in consider_ids) def get_owner(item): if args.numeric_owner: - return item[b'uid'], item[b'gid'] + return item.uid, item.gid else: - return item[b'user'], item[b'group'] + return item.user, item.group def get_mode(item): - if b'mode' in item: - return stat.filemode(item[b'mode']) + if 'mode' in item: + return stat.filemode(item.mode) else: return [None] def has_hardlink_master(item, hardlink_masters): - return stat.S_ISREG(item[b'mode']) and item.get(b'source') in hardlink_masters + return stat.S_ISREG(item.mode) and item.get('source') in hardlink_masters def compare_link(item1, item2): # These are the simple link cases. For special cases, e.g. if a # regular file is replaced with a link or vice versa, it is # indicated in compare_mode instead. - if item1.get(b'deleted'): + if item1.get('deleted'): return 'added link' - elif item2.get(b'deleted'): + elif item2.get('deleted'): return 'removed link' - elif b'source' in item1 and b'source' in item2 and item1[b'source'] != item2[b'source']: + elif 'source' in item1 and 'source' in item2 and item1.source != item2.source: return 'changed link' def contents_changed(item1, item2): if can_compare_chunk_ids: - return item1[b'chunks'] != item2[b'chunks'] + return item1.chunks != item2.chunks else: if sum_chunk_size(item1) != sum_chunk_size(item2): return True else: - chunk_ids1 = [c.id for c in item1[b'chunks']] - chunk_ids2 = [c.id for c in item2[b'chunks']] + chunk_ids1 = [c.id for c in item1.chunks] + chunk_ids2 = [c.id for c in item2.chunks] return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2) def compare_content(path, item1, item2): if contents_changed(item1, item2): - if item1.get(b'deleted'): + if item1.get('deleted'): return ('added {:>13}'.format(format_file_size(sum_chunk_size(item2)))) - elif item2.get(b'deleted'): + elif item2.get('deleted'): return ('removed {:>11}'.format(format_file_size(sum_chunk_size(item1)))) else: - chunk_ids1 = {c.id for c in item1[b'chunks']} - chunk_ids2 = {c.id for c in item2[b'chunks']} + chunk_ids1 = {c.id for c in item1.chunks} + chunk_ids2 = {c.id for c in item2.chunks} added_ids = chunk_ids2 - chunk_ids1 removed_ids = chunk_ids1 - chunk_ids2 added = sum_chunk_size(item2, added_ids) @@ -515,9 +516,9 @@ class Archiver: format_file_size(-removed, precision=1, sign=True))) def compare_directory(item1, item2): - if item2.get(b'deleted') and not item1.get(b'deleted'): + if item2.get('deleted') and not item1.get('deleted'): return 'removed directory' - elif item1.get(b'deleted') and not item2.get(b'deleted'): + elif item1.get('deleted') and not item2.get('deleted'): return 'added directory' def compare_owner(item1, item2): @@ -527,7 +528,7 @@ class Archiver: return '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2) def compare_mode(item1, item2): - if item1[b'mode'] != item2[b'mode']: + if item1.mode != item2.mode: return '[{} -> {}]'.format(get_mode(item1), get_mode(item2)) def compare_items(output, path, item1, item2, hardlink_masters, deleted=False): @@ -538,15 +539,15 @@ class Archiver: changes = [] if has_hardlink_master(item1, hardlink_masters): - item1 = hardlink_masters[item1[b'source']][0] + item1 = hardlink_masters[item1.source][0] if has_hardlink_master(item2, hardlink_masters): - item2 = hardlink_masters[item2[b'source']][1] + item2 = hardlink_masters[item2.source][1] if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l': changes.append(compare_link(item1, item2)) - if b'chunks' in item1 and b'chunks' in item2: + if 'chunks' in item1 and 'chunks' in item2: changes.append(compare_content(path, item1, item2)) if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd': @@ -570,21 +571,21 @@ class Archiver: def compare_archives(archive1, archive2, matcher): def hardlink_master_seen(item): - return b'source' not in item or not stat.S_ISREG(item[b'mode']) or item[b'source'] in hardlink_masters + return 'source' not in item or not stat.S_ISREG(item.mode) or item.source in hardlink_masters def is_hardlink_master(item): - return item.get(b'hardlink_master', True) and b'source' not in item + return item.get('hardlink_master', True) and 'source' not in item def update_hardlink_masters(item1, item2): if is_hardlink_master(item1) or is_hardlink_master(item2): - hardlink_masters[item1[b'path']] = (item1, item2) + hardlink_masters[item1.path] = (item1, item2) def compare_or_defer(item1, item2): update_hardlink_masters(item1, item2) if not hardlink_master_seen(item1) or not hardlink_master_seen(item2): deferred.append((item1, item2)) else: - compare_items(output, item1[b'path'], item1, item2, hardlink_masters) + compare_items(output, item1.path, item1, item2, hardlink_masters) orphans_archive1 = collections.OrderedDict() orphans_archive2 = collections.OrderedDict() @@ -593,44 +594,44 @@ class Archiver: output = [] for item1, item2 in zip_longest( - archive1.iter_items(lambda item: matcher.match(item[b'path'])), - archive2.iter_items(lambda item: matcher.match(item[b'path'])), + archive1.iter_items(lambda item: matcher.match(item.path)), + archive2.iter_items(lambda item: matcher.match(item.path)), ): - if item1 and item2 and item1[b'path'] == item2[b'path']: + if item1 and item2 and item1.path == item2.path: compare_or_defer(item1, item2) continue if item1: - matching_orphan = orphans_archive2.pop(item1[b'path'], None) + matching_orphan = orphans_archive2.pop(item1.path, None) if matching_orphan: compare_or_defer(item1, matching_orphan) else: - orphans_archive1[item1[b'path']] = item1 + orphans_archive1[item1.path] = item1 if item2: - matching_orphan = orphans_archive1.pop(item2[b'path'], None) + matching_orphan = orphans_archive1.pop(item2.path, None) if matching_orphan: compare_or_defer(matching_orphan, item2) else: - orphans_archive2[item2[b'path']] = item2 + orphans_archive2[item2.path] = item2 # At this point orphans_* contain items that had no matching partner in the other archive - deleted_item = { - b'deleted': True, - b'chunks': [], - b'mode': 0, - } + deleted_item = Item( + deleted=True, + chunks=[], + mode=0, + ) for added in orphans_archive2.values(): - path = added[b'path'] - deleted_item[b'path'] = path + path = added.path + deleted_item.path = path update_hardlink_masters(deleted_item, added) compare_items(output, path, deleted_item, added, hardlink_masters, deleted=True) for deleted in orphans_archive1.values(): - path = deleted[b'path'] - deleted_item[b'path'] = path + path = deleted.path + deleted_item.path = path update_hardlink_masters(deleted, deleted_item) compare_items(output, path, deleted, deleted_item, hardlink_masters, deleted=True) for item1, item2 in deferred: assert hardlink_master_seen(item1) assert hardlink_master_seen(item2) - compare_items(output, item1[b'path'], item1, item2, hardlink_masters) + compare_items(output, item1.path, item1, item2, hardlink_masters) for line in sorted(output): print_output(line) @@ -749,7 +750,7 @@ class Archiver: sys.stdout.write(bytestring.decode('utf-8', errors='replace')) else: write = sys.stdout.buffer.write - for item in archive.iter_items(lambda item: matcher.match(item[b'path'])): + for item in archive.iter_items(lambda item: matcher.match(item.path)): write(safe_encode(formatter.format_item(item))) else: for archive_info in manifest.list_archive_infos(sort_by='ts'): @@ -2116,7 +2117,7 @@ def sig_info_handler(signum, stack): # pragma: no cover logger.info("{0} {1}/{2}".format(path, format_file_size(pos), format_file_size(total))) break if func in ('extract_item', ): # extract op - path = loc['item'][b'path'] + path = loc['item'].path try: pos = loc['fd'].tell() except Exception: diff --git a/src/borg/cache.py b/src/borg/cache.py index 9b7ad07d7..4dc4c2181 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -16,6 +16,7 @@ from .helpers import get_cache_dir from .helpers import decode_dict, int_to_bigint, bigint_to_int, bin_to_hex from .helpers import format_file_size from .helpers import yes +from .item import Item from .key import PlaintextKey from .locking import UpgradableLock from .remote import cache_if_remote @@ -298,8 +299,9 @@ Chunk index: {0.total_unique_chunks:20d} {0.total_chunks:20d}""" if not isinstance(item, dict): logger.error('Error: Did not get expected metadata dict - archive corrupted!') continue - if b'chunks' in item: - for chunk_id, size, csize in item[b'chunks']: + item = Item(internal_dict=item) + if 'chunks' in item: + for chunk_id, size, csize in item.chunks: chunk_idx.add(chunk_id, 1, size, csize) if self.do_cache: fn = mkpath(archive_id) diff --git a/src/borg/constants.py b/src/borg/constants.py index 0f6d3ddc7..1a970887d 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -1,10 +1,9 @@ # this set must be kept complete, otherwise the RobustUnpacker might malfunction: -ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master', - b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime', - b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ]) +ITEM_KEYS = set(['path', 'source', 'rdev', 'chunks', 'hardlink_master', + 'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', + 'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended', ]) ARCHIVE_TEXT_KEYS = (b'name', b'comment', b'hostname', b'username', b'time', b'time_end') -ITEM_TEXT_KEYS = (b'path', b'source', b'user', b'group') # default umask, overriden by --umask, defaults to read/write only for owner UMASK_DEFAULT = 0o077 diff --git a/src/borg/fuse.py b/src/borg/fuse.py index c43ba9399..98b8bd6d2 100644 --- a/src/borg/fuse.py +++ b/src/borg/fuse.py @@ -15,7 +15,7 @@ logger = create_logger() from .archive import Archive from .helpers import daemonize -from .helpers import bigint_to_int +from .item import Item from .lrucache import LRUCache # Does this version of llfuse support ns precision? @@ -38,12 +38,13 @@ class ItemCache: def add(self, item): pos = self.fd.seek(0, io.SEEK_END) - self.fd.write(msgpack.packb(item)) + self.fd.write(msgpack.packb(item.as_dict())) return pos + self.offset def get(self, inode): self.fd.seek(inode - self.offset, io.SEEK_SET) - return next(msgpack.Unpacker(self.fd, read_size=1024)) + item = next(msgpack.Unpacker(self.fd, read_size=1024)) + return Item(internal_dict=item) class FuseOperations(llfuse.Operations): @@ -57,7 +58,7 @@ class FuseOperations(llfuse.Operations): self.items = {} self.parent = {} self.contents = defaultdict(dict) - self.default_dir = {b'mode': 0o40755, b'mtime': int(time.time() * 1e9), b'uid': os.getuid(), b'gid': os.getgid()} + self.default_dir = Item(mode=0o40755, mtime=int(time.time() * 1e9), uid=os.getuid(), gid=os.getgid()) self.pending_archives = {} self.accounted_chunks = {} self.cache = ItemCache() @@ -86,8 +87,9 @@ class FuseOperations(llfuse.Operations): _, data = self.key.decrypt(key, chunk) unpacker.feed(data) for item in unpacker: - segments = prefix + os.fsencode(os.path.normpath(item[b'path'])).split(b'/') - del item[b'path'] + item = Item(internal_dict=item) + segments = prefix + os.fsencode(os.path.normpath(item.path)).split(b'/') + del item.path num_segments = len(segments) parent = 1 for i, segment in enumerate(segments, 1): @@ -98,10 +100,10 @@ class FuseOperations(llfuse.Operations): self.parent[archive_inode] = parent # Leaf segment? if i == num_segments: - if b'source' in item and stat.S_ISREG(item[b'mode']): - inode = self._find_inode(item[b'source'], prefix) + if 'source' in item and stat.S_ISREG(item.mode): + inode = self._find_inode(item.source, prefix) item = self.cache.get(inode) - item[b'nlink'] = item.get(b'nlink', 1) + 1 + item.nlink = item.get('nlink', 1) + 1 self.items[inode] = item else: inode = self.cache.add(item) @@ -151,58 +153,56 @@ class FuseOperations(llfuse.Operations): item = self.get_item(inode) size = 0 dsize = 0 - try: - for key, chunksize, _ in item[b'chunks']: + if 'chunks' in item: + for key, chunksize, _ in item.chunks: size += chunksize if self.accounted_chunks.get(key, inode) == inode: self.accounted_chunks[key] = inode dsize += chunksize - except KeyError: - pass entry = llfuse.EntryAttributes() entry.st_ino = inode entry.generation = 0 entry.entry_timeout = 300 entry.attr_timeout = 300 - entry.st_mode = item[b'mode'] - entry.st_nlink = item.get(b'nlink', 1) - entry.st_uid = item[b'uid'] - entry.st_gid = item[b'gid'] - entry.st_rdev = item.get(b'rdev', 0) + entry.st_mode = item.mode + entry.st_nlink = item.get('nlink', 1) + entry.st_uid = item.uid + entry.st_gid = item.gid + entry.st_rdev = item.get('rdev', 0) entry.st_size = size entry.st_blksize = 512 entry.st_blocks = dsize / 512 # note: older archives only have mtime (not atime nor ctime) if have_fuse_xtime_ns: - entry.st_mtime_ns = bigint_to_int(item[b'mtime']) - if b'atime' in item: - entry.st_atime_ns = bigint_to_int(item[b'atime']) + entry.st_mtime_ns = item.mtime + if 'atime' in item: + entry.st_atime_ns = item.atime else: - entry.st_atime_ns = bigint_to_int(item[b'mtime']) - if b'ctime' in item: - entry.st_ctime_ns = bigint_to_int(item[b'ctime']) + entry.st_atime_ns = item.mtime + if 'ctime' in item: + entry.st_ctime_ns = item.ctime else: - entry.st_ctime_ns = bigint_to_int(item[b'mtime']) + entry.st_ctime_ns = item.mtime else: - entry.st_mtime = bigint_to_int(item[b'mtime']) / 1e9 - if b'atime' in item: - entry.st_atime = bigint_to_int(item[b'atime']) / 1e9 + entry.st_mtime = item.mtime / 1e9 + if 'atime' in item: + entry.st_atime = item.atime / 1e9 else: - entry.st_atime = bigint_to_int(item[b'mtime']) / 1e9 - if b'ctime' in item: - entry.st_ctime = bigint_to_int(item[b'ctime']) / 1e9 + entry.st_atime = item.mtime / 1e9 + if 'ctime' in item: + entry.st_ctime = item.ctime / 1e9 else: - entry.st_ctime = bigint_to_int(item[b'mtime']) / 1e9 + entry.st_ctime = item.mtime / 1e9 return entry def listxattr(self, inode, ctx=None): item = self.get_item(inode) - return item.get(b'xattrs', {}).keys() + return item.get('xattrs', {}).keys() def getxattr(self, inode, name, ctx=None): item = self.get_item(inode) try: - return item.get(b'xattrs', {})[name] + return item.get('xattrs', {})[name] except KeyError: raise llfuse.FUSEError(errno.ENODATA) from None @@ -234,7 +234,7 @@ class FuseOperations(llfuse.Operations): def read(self, fh, offset, size): parts = [] item = self.get_item(fh) - for id, s, csize in item[b'chunks']: + for id, s, csize in item.chunks: if s < offset: offset -= s continue @@ -264,7 +264,7 @@ class FuseOperations(llfuse.Operations): def readlink(self, inode, ctx=None): item = self.get_item(inode) - return os.fsencode(item[b'source']) + return os.fsencode(item.source) def mount(self, mountpoint, extra_options, foreground=False): options = ['fsname=borgfs', 'ro'] diff --git a/src/borg/helpers.py b/src/borg/helpers.py index bd29ecd22..0008fc5bc 100644 --- a/src/borg/helpers.py +++ b/src/borg/helpers.py @@ -1157,10 +1157,8 @@ class ItemFormatter: class FakeArchive: fpr = name = "" - fake_item = { - b'mode': 0, b'path': '', b'user': '', b'group': '', b'mtime': 0, - b'uid': 0, b'gid': 0, - } + from .item import Item + fake_item = Item(mode=0, path='', user='', group='', mtime=0, uid=0, gid=0) formatter = cls(FakeArchive, "") keys = [] keys.extend(formatter.call_keys.keys()) @@ -1196,12 +1194,12 @@ class ItemFormatter: 'csize': self.calculate_csize, 'num_chunks': self.calculate_num_chunks, 'unique_chunks': self.calculate_unique_chunks, - 'isomtime': partial(self.format_time, b'mtime'), - 'isoctime': partial(self.format_time, b'ctime'), - 'isoatime': partial(self.format_time, b'atime'), - 'mtime': partial(self.time, b'mtime'), - 'ctime': partial(self.time, b'ctime'), - 'atime': partial(self.time, b'atime'), + 'isomtime': partial(self.format_time, 'mtime'), + 'isoctime': partial(self.format_time, 'ctime'), + 'isoatime': partial(self.format_time, 'atime'), + 'mtime': partial(self.time, 'mtime'), + 'ctime': partial(self.time, 'ctime'), + 'atime': partial(self.time, 'atime'), } for hash_function in hashlib.algorithms_guaranteed: self.add_key(hash_function, partial(self.hash_item, hash_function)) @@ -1213,11 +1211,11 @@ class ItemFormatter: self.used_call_keys = set(self.call_keys) & self.format_keys def get_item_data(self, item): - mode = stat.filemode(item[b'mode']) + mode = stat.filemode(item.mode) item_type = mode[0] item_data = self.item_data - source = item.get(b'source', '') + source = item.get('source', '') extra = '' if source: source = remove_surrogates(source) @@ -1228,16 +1226,16 @@ class ItemFormatter: extra = ' link to %s' % source item_data['type'] = item_type item_data['mode'] = mode - item_data['user'] = item[b'user'] or item[b'uid'] - item_data['group'] = item[b'group'] or item[b'gid'] - item_data['uid'] = item[b'uid'] - item_data['gid'] = item[b'gid'] - item_data['path'] = remove_surrogates(item[b'path']) - item_data['bpath'] = item[b'path'] + item_data['user'] = item.user or item.uid + item_data['group'] = item.group or item.gid + item_data['uid'] = item.uid + item_data['gid'] = item.gid + item_data['path'] = remove_surrogates(item.path) + item_data['bpath'] = item.path item_data['source'] = source item_data['linktarget'] = source item_data['extra'] = extra - item_data['flags'] = item.get(b'bsdflags') + item_data['flags'] = item.get('bsdflags') for key in self.used_call_keys: item_data[key] = self.call_keys[key](item) return item_data @@ -1246,31 +1244,31 @@ class ItemFormatter: return self.format.format_map(self.get_item_data(item)) def calculate_num_chunks(self, item): - return len(item.get(b'chunks', [])) + return len(item.get('chunks', [])) def calculate_unique_chunks(self, item): chunk_index = self.archive.cache.chunks - return sum(1 for c in item.get(b'chunks', []) if chunk_index[c.id].refcount == 1) + return sum(1 for c in item.get('chunks', []) if chunk_index[c.id].refcount == 1) def calculate_size(self, item): - return sum(c.size for c in item.get(b'chunks', [])) + return sum(c.size for c in item.get('chunks', [])) def calculate_csize(self, item): - return sum(c.csize for c in item.get(b'chunks', [])) + return sum(c.csize for c in item.get('chunks', [])) def hash_item(self, hash_function, item): - if b'chunks' not in item: + if 'chunks' not in item: return "" hash = hashlib.new(hash_function) - for _, data in self.archive.pipeline.fetch_many([c.id for c in item[b'chunks']]): + for _, data in self.archive.pipeline.fetch_many([c.id for c in item.chunks]): hash.update(data) return hash.hexdigest() def format_time(self, key, item): - return format_time(safe_timestamp(item.get(key) or item[b'mtime'])) + return format_time(safe_timestamp(item.get(key) or item.mtime)) def time(self, key, item): - return safe_timestamp(item.get(key) or item[b'mtime']) + return safe_timestamp(item.get(key) or item.mtime) class ChunkIteratorFileWrapper: @@ -1314,7 +1312,7 @@ class ChunkIteratorFileWrapper: def open_item(archive, item): """Return file-like object for archived item (with chunks).""" - chunk_iterator = archive.pipeline.fetch_many([c.id for c in item[b'chunks']]) + chunk_iterator = archive.pipeline.fetch_many([c.id for c in item.chunks]) return ChunkIteratorFileWrapper(chunk_iterator) diff --git a/src/borg/item.py b/src/borg/item.py index 1bccade36..47c0bfcb7 100644 --- a/src/borg/item.py +++ b/src/borg/item.py @@ -21,25 +21,34 @@ class PropDict: __slots__ = ("_dict", ) # avoid setting attributes not supported by properties - def __init__(self, data_dict=None, **kw): + def __init__(self, data_dict=None, internal_dict=None, **kw): if data_dict is None: data = kw elif not isinstance(data_dict, dict): raise TypeError("data_dict must be dict") else: data = data_dict - # internally, we want an dict with only str-typed keys - _dict = {} - for k, v in data.items(): + self._dict = {} + self.update_internal(internal_dict or {}) + self.update(data) + + def update(self, d): + for k, v in d.items(): if isinstance(k, bytes): k = k.decode() - elif not isinstance(k, str): - raise TypeError("dict keys must be str or bytes, not %r" % k) - _dict[k] = v - unknown_keys = set(_dict) - self.VALID_KEYS - if unknown_keys: - raise ValueError("dict contains unknown keys %s" % ','.join(unknown_keys)) - self._dict = _dict + setattr(self, self._check_key(k), v) + + def update_internal(self, d): + for k, v in d.items(): + if isinstance(k, bytes): + k = k.decode() + self._dict[k] = v + + def __eq__(self, other): + return self.as_dict() == other.as_dict() + + def __repr__(self): + return '%s(internal_dict=%r)' % (self.__class__.__name__, self._dict) def as_dict(self): """return the internal dictionary""" @@ -110,7 +119,7 @@ class Item(PropDict): If an Item shall be serialized, give as_dict() method output to msgpack packer. """ - VALID_KEYS = set(key.decode() for key in ITEM_KEYS) # we want str-typed keys + VALID_KEYS = ITEM_KEYS | {'deleted', 'nlink', } # str-typed keys __slots__ = ("_dict", ) # avoid setting attributes not supported by properties @@ -118,14 +127,14 @@ class Item(PropDict): path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) source = PropDict._make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) + group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) + acl_access = PropDict._make_property('acl_access', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) acl_default = PropDict._make_property('acl_default', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) acl_extended = PropDict._make_property('acl_extended', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) acl_nfs4 = PropDict._make_property('acl_nfs4', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) - group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) - mode = PropDict._make_property('mode', int) uid = PropDict._make_property('uid', int) gid = PropDict._make_property('gid', int) @@ -138,6 +147,9 @@ class Item(PropDict): hardlink_master = PropDict._make_property('hardlink_master', bool) - chunks = PropDict._make_property('chunks', list) + chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None') xattrs = PropDict._make_property('xattrs', StableDict) + + deleted = PropDict._make_property('deleted', bool) + nlink = PropDict._make_property('nlink', int) diff --git a/src/borg/platform/darwin.pyx b/src/borg/platform/darwin.pyx index 016ad5aab..188e5f4f0 100644 --- a/src/borg/platform/darwin.pyx +++ b/src/borg/platform/darwin.pyx @@ -62,9 +62,9 @@ def acl_get(path, item, st, numeric_owner=False): if text == NULL: return if numeric_owner: - item[b'acl_extended'] = _remove_non_numeric_identifier(text) + item['acl_extended'] = _remove_non_numeric_identifier(text) else: - item[b'acl_extended'] = text + item['acl_extended'] = text finally: acl_free(text) acl_free(acl) @@ -72,18 +72,16 @@ def acl_get(path, item, st, numeric_owner=False): def acl_set(path, item, numeric_owner=False): cdef acl_t acl = NULL - try: + acl_text = item.get('acl_extended') + if acl_text is not None: try: if numeric_owner: - acl = acl_from_text(item[b'acl_extended']) + acl = acl_from_text(acl_text) else: - acl = acl_from_text(_remove_numeric_id_if_possible(item[b'acl_extended'])) - except KeyError: - return - if acl == NULL: - return - if acl_set_link_np(os.fsencode(path), ACL_TYPE_EXTENDED, acl): - return - finally: - acl_free(acl) - + acl = acl_from_text(_remove_numeric_id_if_possible(acl_text)) + if acl == NULL: + return + if acl_set_link_np(os.fsencode(path), ACL_TYPE_EXTENDED, acl): + return + finally: + acl_free(acl) diff --git a/src/borg/platform/freebsd.pyx b/src/borg/platform/freebsd.pyx index 7553a26cc..0a02ed8b2 100644 --- a/src/borg/platform/freebsd.pyx +++ b/src/borg/platform/freebsd.pyx @@ -57,10 +57,10 @@ def acl_get(path, item, st, numeric_owner=False): return flags |= ACL_TEXT_NUMERIC_IDS if numeric_owner else 0 if ret > 0: - _get_acl(p, ACL_TYPE_NFS4, item, b'acl_nfs4', flags) + _get_acl(p, ACL_TYPE_NFS4, item, 'acl_nfs4', flags) else: - _get_acl(p, ACL_TYPE_ACCESS, item, b'acl_access', flags) - _get_acl(p, ACL_TYPE_DEFAULT, item, b'acl_default', flags) + _get_acl(p, ACL_TYPE_ACCESS, item, 'acl_access', flags) + _get_acl(p, ACL_TYPE_DEFAULT, item, 'acl_default', flags) cdef _set_acl(p, type, item, attribute, numeric_owner=False): @@ -98,6 +98,6 @@ def acl_set(path, item, numeric_owner=False): of the user/group names """ p = os.fsencode(path) - _set_acl(p, ACL_TYPE_NFS4, item, b'acl_nfs4', numeric_owner) - _set_acl(p, ACL_TYPE_ACCESS, item, b'acl_access', numeric_owner) - _set_acl(p, ACL_TYPE_DEFAULT, item, b'acl_default', numeric_owner) + _set_acl(p, ACL_TYPE_NFS4, item, 'acl_nfs4', numeric_owner) + _set_acl(p, ACL_TYPE_ACCESS, item, 'acl_access', numeric_owner) + _set_acl(p, ACL_TYPE_DEFAULT, item, 'acl_default', numeric_owner) diff --git a/src/borg/platform/linux.pyx b/src/borg/platform/linux.pyx index 76d76521f..cb10253ea 100644 --- a/src/borg/platform/linux.pyx +++ b/src/borg/platform/linux.pyx @@ -171,12 +171,12 @@ def acl_get(path, item, st, numeric_owner=False): if access_acl: access_text = acl_to_text(access_acl, NULL) if access_text: - item[b'acl_access'] = converter(access_text) + item['acl_access'] = converter(access_text) default_acl = acl_get_file(p, ACL_TYPE_DEFAULT) if default_acl: default_text = acl_to_text(default_acl, NULL) if default_text: - item[b'acl_default'] = converter(default_text) + item['acl_default'] = converter(default_text) finally: acl_free(default_text) acl_free(default_acl) @@ -193,8 +193,8 @@ def acl_set(path, item, numeric_owner=False): converter = posix_acl_use_stored_uid_gid else: converter = acl_use_local_uid_gid - access_text = item.get(b'acl_access') - default_text = item.get(b'acl_default') + access_text = item.get('acl_access') + default_text = item.get('acl_default') if access_text: try: access_acl = acl_from_text(converter(access_text)) diff --git a/src/borg/testsuite/archive.py b/src/borg/testsuite/archive.py index 2f276086b..a2ee23f5e 100644 --- a/src/borg/testsuite/archive.py +++ b/src/borg/testsuite/archive.py @@ -7,6 +7,7 @@ import pytest import msgpack from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, Statistics +from ..item import Item from ..key import PlaintextKey from ..helpers import Manifest from . import BaseTestCase @@ -38,12 +39,12 @@ def tests_stats_progress(stats, columns=80): out = StringIO() stats.update(10**3, 0, unique=False) - stats.show_progress(item={b'path': 'foo'}, final=False, stream=out) + stats.show_progress(item=Item(path='foo'), final=False, stream=out) s = '1.02 kB O 10 B C 10 B D 0 N foo' buf = ' ' * (columns - len(s)) assert out.getvalue() == s + buf + "\r" out = StringIO() - stats.show_progress(item={b'path': 'foo'*40}, final=False, stream=out) + stats.show_progress(item=Item(path='foo'*40), final=False, stream=out) s = '1.02 kB O 10 B C 10 B D 0 N foofoofoofoofoofoofoofo...oofoofoofoofoofoofoofoofoo' buf = ' ' * (columns - len(s)) assert out.getvalue() == s + buf + "\r" @@ -93,7 +94,7 @@ class ArchiveTimestampTestCase(BaseTestCase): class ChunkBufferTestCase(BaseTestCase): def test(self): - data = [{b'foo': 1}, {b'bar': 2}] + data = [Item(path='p1'), Item(path='p2')] cache = MockCache() key = PlaintextKey(None) chunks = CacheChunkBuffer(cache, key, None) @@ -105,11 +106,11 @@ class ChunkBufferTestCase(BaseTestCase): unpacker = msgpack.Unpacker() for id in chunks.chunks: unpacker.feed(cache.objects[id]) - self.assert_equal(data, list(unpacker)) + self.assert_equal(data, [Item(internal_dict=d) for d in unpacker]) def test_partial(self): - big = b"0123456789" * 10000 - data = [{b'full': 1, b'data': big}, {b'partial': 2, b'data': big}] + big = "0123456789" * 10000 + data = [Item(path='full', source=big), Item(path='partial', source=big)] cache = MockCache() key = PlaintextKey(None) chunks = CacheChunkBuffer(cache, key, None) @@ -126,7 +127,7 @@ class ChunkBufferTestCase(BaseTestCase): unpacker = msgpack.Unpacker() for id in chunks.chunks: unpacker.feed(cache.objects[id]) - self.assert_equal(data, list(unpacker)) + self.assert_equal(data, [Item(internal_dict=d) for d in unpacker]) class RobustUnpackerTestCase(BaseTestCase): diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index a6ce4ce7a..7abefd39a 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -1641,8 +1641,8 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): archive, repository = self.open_archive('archive1') with repository: for item in archive.iter_items(): - if item[b'path'].endswith('testsuite/archiver.py'): - repository.delete(item[b'chunks'][-1].id) + if item.path.endswith('testsuite/archiver.py'): + repository.delete(item.chunks[-1].id) break repository.commit() self.cmd('check', self.repository_location, exit_code=1) @@ -1696,8 +1696,8 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase): archive, repository = self.open_archive('archive1') with repository: for item in archive.iter_items(): - if item[b'path'].endswith('testsuite/archiver.py'): - chunk = item[b'chunks'][-1] + if item.path.endswith('testsuite/archiver.py'): + chunk = item.chunks[-1] data = repository.get(chunk.id) + b'1234' repository.put(chunk.id, data) break diff --git a/src/borg/testsuite/item.py b/src/borg/testsuite/item.py index bd66e4831..b0b7569e3 100644 --- a/src/borg/testsuite/item.py +++ b/src/borg/testsuite/item.py @@ -35,13 +35,13 @@ def test_item_empty(): def test_item_from_dict(): # does not matter whether we get str or bytes keys - item = Item({b'path': b'/a/b/c', b'mode': 0o666}) + item = Item({b'path': '/a/b/c', b'mode': 0o666}) assert item.path == '/a/b/c' assert item.mode == 0o666 assert 'path' in item # does not matter whether we get str or bytes keys - item = Item({'path': b'/a/b/c', 'mode': 0o666}) + item = Item({'path': '/a/b/c', 'mode': 0o666}) assert item.path == '/a/b/c' assert item.mode == 0o666 assert 'mode' in item @@ -60,7 +60,7 @@ def test_item_from_dict(): def test_item_from_kw(): - item = Item(path=b'/a/b/c', mode=0o666) + item = Item(path='/a/b/c', mode=0o666) assert item.path == '/a/b/c' assert item.mode == 0o666 @@ -107,7 +107,7 @@ def test_item_se_str_property(): item.path = 42 # non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut - item = Item({'path': b'/a/\xfc/c'}) + item = Item(internal_dict={'path': b'/a/\xfc/c'}) assert item.path == '/a/\udcfc/c' # getting a surrogate-escaped representation assert item.as_dict() == {'path': b'/a/\xfc/c'} del item.path diff --git a/src/borg/testsuite/platform.py b/src/borg/testsuite/platform.py index 857920079..991c98b84 100644 --- a/src/borg/testsuite/platform.py +++ b/src/borg/testsuite/platform.py @@ -51,26 +51,26 @@ class PlatformLinuxTestCase(BaseTestCase): return item def set_acl(self, path, access=None, default=None, numeric_owner=False): - item = {b'acl_access': access, b'acl_default': default} + item = {'acl_access': access, 'acl_default': default} acl_set(path, item, numeric_owner=numeric_owner) def test_access_acl(self): file = tempfile.NamedTemporaryFile() self.assert_equal(self.get_acl(file.name), {}) self.set_acl(file.name, access=b'user::rw-\ngroup::r--\nmask::rw-\nother::---\nuser:root:rw-:9999\ngroup:root:rw-:9999\n', numeric_owner=False) - self.assert_in(b'user:root:rw-:0', self.get_acl(file.name)[b'acl_access']) - self.assert_in(b'group:root:rw-:0', self.get_acl(file.name)[b'acl_access']) - self.assert_in(b'user:0:rw-:0', self.get_acl(file.name, numeric_owner=True)[b'acl_access']) + self.assert_in(b'user:root:rw-:0', self.get_acl(file.name)['acl_access']) + self.assert_in(b'group:root:rw-:0', self.get_acl(file.name)['acl_access']) + self.assert_in(b'user:0:rw-:0', self.get_acl(file.name, numeric_owner=True)['acl_access']) file2 = tempfile.NamedTemporaryFile() self.set_acl(file2.name, access=b'user::rw-\ngroup::r--\nmask::rw-\nother::---\nuser:root:rw-:9999\ngroup:root:rw-:9999\n', numeric_owner=True) - self.assert_in(b'user:9999:rw-:9999', self.get_acl(file2.name)[b'acl_access']) - self.assert_in(b'group:9999:rw-:9999', self.get_acl(file2.name)[b'acl_access']) + self.assert_in(b'user:9999:rw-:9999', self.get_acl(file2.name)['acl_access']) + self.assert_in(b'group:9999:rw-:9999', self.get_acl(file2.name)['acl_access']) def test_default_acl(self): self.assert_equal(self.get_acl(self.tmpdir), {}) self.set_acl(self.tmpdir, access=ACCESS_ACL, default=DEFAULT_ACL) - self.assert_equal(self.get_acl(self.tmpdir)[b'acl_access'], ACCESS_ACL) - self.assert_equal(self.get_acl(self.tmpdir)[b'acl_default'], DEFAULT_ACL) + self.assert_equal(self.get_acl(self.tmpdir)['acl_access'], ACCESS_ACL) + self.assert_equal(self.get_acl(self.tmpdir)['acl_default'], DEFAULT_ACL) def test_non_ascii_acl(self): # Testing non-ascii ACL processing to see whether our code is robust. @@ -86,18 +86,18 @@ class PlatformLinuxTestCase(BaseTestCase): group_entry_numeric = 'group:666:rw-:666'.encode('ascii') acl = b'\n'.join([nothing_special, user_entry, group_entry]) self.set_acl(file.name, access=acl, numeric_owner=False) - acl_access = self.get_acl(file.name, numeric_owner=False)[b'acl_access'] + acl_access = self.get_acl(file.name, numeric_owner=False)['acl_access'] self.assert_in(user_entry, acl_access) self.assert_in(group_entry, acl_access) - acl_access_numeric = self.get_acl(file.name, numeric_owner=True)[b'acl_access'] + acl_access_numeric = self.get_acl(file.name, numeric_owner=True)['acl_access'] self.assert_in(user_entry_numeric, acl_access_numeric) self.assert_in(group_entry_numeric, acl_access_numeric) file2 = tempfile.NamedTemporaryFile() self.set_acl(file2.name, access=acl, numeric_owner=True) - acl_access = self.get_acl(file2.name, numeric_owner=False)[b'acl_access'] + acl_access = self.get_acl(file2.name, numeric_owner=False)['acl_access'] self.assert_in(user_entry, acl_access) self.assert_in(group_entry, acl_access) - acl_access_numeric = self.get_acl(file.name, numeric_owner=True)[b'acl_access'] + acl_access_numeric = self.get_acl(file.name, numeric_owner=True)['acl_access'] self.assert_in(user_entry_numeric, acl_access_numeric) self.assert_in(group_entry_numeric, acl_access_numeric) @@ -125,7 +125,7 @@ class PlatformDarwinTestCase(BaseTestCase): return item def set_acl(self, path, acl, numeric_owner=False): - item = {b'acl_extended': acl} + item = {'acl_extended': acl} acl_set(path, item, numeric_owner=numeric_owner) def test_access_acl(self): @@ -133,11 +133,11 @@ class PlatformDarwinTestCase(BaseTestCase): file2 = tempfile.NamedTemporaryFile() self.assert_equal(self.get_acl(file.name), {}) self.set_acl(file.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=False) - self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000014:staff:20:allow:read', self.get_acl(file.name)[b'acl_extended']) - self.assert_in(b'user:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read', self.get_acl(file.name)[b'acl_extended']) + self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000014:staff:20:allow:read', self.get_acl(file.name)['acl_extended']) + self.assert_in(b'user:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read', self.get_acl(file.name)['acl_extended']) self.set_acl(file2.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=True) - self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)[b'acl_extended']) - self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)[b'acl_extended']) + self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)['acl_extended']) + self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)['acl_extended']) @unittest.skipUnless(sys.platform.startswith(('linux', 'freebsd', 'darwin')), 'POSIX only tests')