Merge pull request #1117 from ThomasWaldmann/items-refactor

refactor to use Item class
2026-06-09 00:32:37 -04:00 · 2016-06-04 17:51:08 +02:00 · 2016-06-04 17:51:08 +02:00 · 504af0206d
commit 504af0206d
parent 60fad6388e 60da32123a
14 changed files with 324 additions and 316 deletions
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@ -33,6 +33,7 @@ from .helpers import ProgressIndicatorPercent, log_multi
 from .helpers import PathPrefixPattern, FnmatchPattern
 from .helpers import consume
 from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
+from .item import Item
 from .key import key_factory
 from .platform import acl_get, acl_set, set_flags, get_flags, swidth
 from .remote import cache_if_remote
@ -86,7 +87,7 @@ class Statistics:
            columns, lines = get_terminal_size()
            if not final:
                msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self)
-                path = remove_surrogates(item[b'path']) if item else ''
+                path = remove_surrogates(item.path) if item else ''
                space = columns - swidth(msg)
                if space < swidth('...') + swidth(path):
                    path = '%s...%s' % (path[:(space // 2) - swidth('...')], path[-space // 2:])
@ -106,16 +107,16 @@ class DownloadPipeline:
        unpacker = msgpack.Unpacker(use_list=False)
        for _, data in self.fetch_many(ids):
            unpacker.feed(data)
-            items = [decode_dict(item, ITEM_TEXT_KEYS) for item in unpacker]
+            items = [Item(internal_dict=item) for item in unpacker]
            if filter:
                items = [item for item in items if filter(item)]
            for item in items:
-                if b'chunks' in item:
-                    item[b'chunks'] = [ChunkListEntry(*e) for e in item[b'chunks']]
+                if 'chunks' in item:
+                    item.chunks = [ChunkListEntry(*e) for e in item.chunks]
            if preload:
                for item in items:
-                    if b'chunks' in item:
-                        self.repository.preload([c.id for c in item[b'chunks']])
+                    if 'chunks' in item:
+                        self.repository.preload([c.id for c in item.chunks])
            for item in items:
                yield item

@ -135,7 +136,7 @@ class ChunkBuffer:
        self.chunker = Chunker(self.key.chunk_seed, *chunker_params)

    def add(self, item):
-        self.buffer.write(self.packer.pack(StableDict(item)))
+        self.buffer.write(self.packer.pack(item.as_dict()))
        if self.is_full():
            self.flush()

@ -286,9 +287,6 @@ Number of files: {0.stats.nfiles}'''.format(
            yield item

    def add_item(self, item):
-        unknown_keys = set(item) - ITEM_KEYS
-        assert not unknown_keys, ('unknown item metadata keys detected, please update constants.ITEM_KEYS: %s',
-                                  ','.join(k.decode('ascii') for k in unknown_keys))
        if self.show_progress:
            self.stats.show_progress(item=item, dt=0.2)
        self.items_buffer.add(item)
@ -356,9 +354,10 @@ Number of files: {0.stats.nfiles}'''.format(
            _, data = self.key.decrypt(id, chunk)
            unpacker.feed(data)
            for item in unpacker:
-                if b'chunks' in item:
+                item = Item(internal_dict=item)
+                if 'chunks' in item:
                    stats.nfiles += 1
-                    add_file_chunks(item[b'chunks'])
+                    add_file_chunks(item.chunks)
        cache.rollback()
        return stats

@ -373,22 +372,22 @@ Number of files: {0.stats.nfiles}'''.format(
        :param stdout: write extracted data to stdout
        :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
        :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
-        :param original_path: b'path' key as stored in archive
+        :param original_path: 'path' key as stored in archive
        """
        if dry_run or stdout:
-            if b'chunks' in item:
-                for _, data in self.pipeline.fetch_many([c.id for c in item[b'chunks']], is_preloaded=True):
+            if 'chunks' in item:
+                for _, data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True):
                    if stdout:
                        sys.stdout.buffer.write(data)
                if stdout:
                    sys.stdout.buffer.flush()
            return

-        original_path = original_path or item[b'path']
+        original_path = original_path or item.path
        dest = self.cwd
-        if item[b'path'].startswith('/') or item[b'path'].startswith('..'):
+        if item.path.startswith(('/', '..')):
            raise Exception('Path should be relative and local')
-        path = os.path.join(dest, item[b'path'])
+        path = os.path.join(dest, item.path)
        # Attempt to remove existing files, ignore errors on failure
        try:
            st = os.lstat(path)
@ -400,27 +399,27 @@ Number of files: {0.stats.nfiles}'''.format(
            raise self.IncompatibleFilesystemEncodingError(path, sys.getfilesystemencoding()) from None
        except OSError:
            pass
-        mode = item[b'mode']
+        mode = item.mode
        if stat.S_ISREG(mode):
            if not os.path.exists(os.path.dirname(path)):
                os.makedirs(os.path.dirname(path))

            # Hard link?
-            if b'source' in item:
-                source = os.path.join(dest, item[b'source'])
+            if 'source' in item:
+                source = os.path.join(dest, item.source)
                if os.path.exists(path):
                    os.unlink(path)
                if not hardlink_masters:
                    os.link(source, path)
                    return
-                item[b'chunks'], link_target = hardlink_masters[item[b'source']]
+                item.chunks, link_target = hardlink_masters[item.source]
                if link_target:
                    # Hard link was extracted previously, just link
                    os.link(link_target, path)
                    return
                # Extract chunks, since the item which had the chunks was not extracted
            with open(path, 'wb') as fd:
-                ids = [c.id for c in item[b'chunks']]
+                ids = [c.id for c in item.chunks]
                for _, data in self.pipeline.fetch_many(ids, is_preloaded=True):
                    if sparse and self.zeros.startswith(data):
                        # all-zero chunk: create a hole in a sparse file
@ -433,7 +432,7 @@ Number of files: {0.stats.nfiles}'''.format(
                self.restore_attrs(path, item, fd=fd.fileno())
            if hardlink_masters:
                # Update master entry with extracted file path, so that following hardlinks don't extract twice.
-                hardlink_masters[item.get(b'source') or original_path] = (None, path)
+                hardlink_masters[item.get('source') or original_path] = (None, path)
        elif stat.S_ISDIR(mode):
            if not os.path.exists(path):
                os.makedirs(path)
@ -442,7 +441,7 @@ Number of files: {0.stats.nfiles}'''.format(
        elif stat.S_ISLNK(mode):
            if not os.path.exists(os.path.dirname(path)):
                os.makedirs(os.path.dirname(path))
-            source = item[b'source']
+            source = item.source
            if os.path.exists(path):
                os.unlink(path)
            try:
@ -456,18 +455,18 @@ Number of files: {0.stats.nfiles}'''.format(
            os.mkfifo(path)
            self.restore_attrs(path, item)
        elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
-            os.mknod(path, item[b'mode'], item[b'rdev'])
+            os.mknod(path, item.mode, item.rdev)
            self.restore_attrs(path, item)
        else:
-            raise Exception('Unknown archive item type %r' % item[b'mode'])
+            raise Exception('Unknown archive item type %r' % item.mode)

    def restore_attrs(self, path, item, symlink=False, fd=None):
        uid = gid = None
        if not self.numeric_owner:
-            uid = user2uid(item[b'user'])
-            gid = group2gid(item[b'group'])
-        uid = item[b'uid'] if uid is None else uid
-        gid = item[b'gid'] if gid is None else gid
+            uid = user2uid(item.user)
+            gid = group2gid(item.group)
+        uid = item.uid if uid is None else uid
+        gid = item.gid if gid is None else gid
        # This code is a bit of a mess due to os specific differences
        try:
            if fd:
@ -477,14 +476,14 @@ Number of files: {0.stats.nfiles}'''.format(
        except OSError:
            pass
        if fd:
-            os.fchmod(fd, item[b'mode'])
+            os.fchmod(fd, item.mode)
        elif not symlink:
-            os.chmod(path, item[b'mode'])
+            os.chmod(path, item.mode)
        elif has_lchmod:  # Not available on Linux
-            os.lchmod(path, item[b'mode'])
-        mtime = bigint_to_int(item[b'mtime'])
-        if b'atime' in item:
-            atime = bigint_to_int(item[b'atime'])
+            os.lchmod(path, item.mode)
+        mtime = item.mtime
+        if 'atime' in item:
+            atime = item.atime
        else:
            # old archives only had mtime in item metadata
            atime = mtime
@ -493,14 +492,14 @@ Number of files: {0.stats.nfiles}'''.format(
        else:
            os.utime(path, None, ns=(atime, mtime), follow_symlinks=False)
        acl_set(path, item, self.numeric_owner)
-        if b'bsdflags' in item:
+        if 'bsdflags' in item:
            try:
-                set_flags(path, item[b'bsdflags'], fd=fd)
+                set_flags(path, item.bsdflags, fd=fd)
            except OSError:
                pass
        # chown removes Linux capabilities, so set the extended attributes at the end, after chown, since they include
        # the Linux capabilities in the "security.capability" attribute.
-        xattrs = item.get(b'xattrs', {})
+        xattrs = item.get('xattrs', {})
        for k, v in xattrs.items():
            try:
                xattr.setxattr(fd or path, k, v, follow_symlinks=False)
@ -541,8 +540,9 @@ Number of files: {0.stats.nfiles}'''.format(
            unpacker.feed(data)
            self.cache.chunk_decref(items_id, stats)
            for item in unpacker:
-                if b'chunks' in item:
-                    for chunk_id, size, csize in item[b'chunks']:
+                item = Item(internal_dict=item)
+                if 'chunks' in item:
+                    for chunk_id, size, csize in item.chunks:
                        self.cache.chunk_decref(chunk_id, stats)
        if progress:
            pi.finish()
@ -550,39 +550,39 @@ Number of files: {0.stats.nfiles}'''.format(
        del self.manifest.archives[self.name]

    def stat_attrs(self, st, path):
-        item = {
-            b'mode': st.st_mode,
-            b'uid': st.st_uid, b'user': uid2user(st.st_uid),
-            b'gid': st.st_gid, b'group': gid2group(st.st_gid),
-            b'atime': int_to_bigint(st.st_atime_ns),
-            b'ctime': int_to_bigint(st.st_ctime_ns),
-            b'mtime': int_to_bigint(st.st_mtime_ns),
-        }
+        attrs = dict(
+            mode=st.st_mode,
+            uid=st.st_uid, user=uid2user(st.st_uid),
+            gid=st.st_gid, group=gid2group(st.st_gid),
+            atime=st.st_atime_ns,
+            ctime=st.st_ctime_ns,
+            mtime=st.st_mtime_ns,
+        )
        if self.numeric_owner:
-            item[b'user'] = item[b'group'] = None
+            attrs['user'] = attrs['group'] = None
        xattrs = xattr.get_all(path, follow_symlinks=False)
        if xattrs:
-            item[b'xattrs'] = StableDict(xattrs)
+            attrs['xattrs'] = StableDict(xattrs)
        bsdflags = get_flags(path, st)
        if bsdflags:
-            item[b'bsdflags'] = bsdflags
-        acl_get(path, item, st, self.numeric_owner)
-        return item
+            attrs['bsdflags'] = bsdflags
+        acl_get(path, attrs, st, self.numeric_owner)
+        return attrs

    def process_dir(self, path, st):
-        item = {b'path': make_path_safe(path)}
+        item = Item(path=make_path_safe(path))
        item.update(self.stat_attrs(st, path))
        self.add_item(item)
        return 'd'  # directory

    def process_fifo(self, path, st):
-        item = {b'path': make_path_safe(path)}
+        item = Item(path=make_path_safe(path))
        item.update(self.stat_attrs(st, path))
        self.add_item(item)
        return 'f'  # fifo

    def process_dev(self, path, st):
-        item = {b'path': make_path_safe(path), b'rdev': st.st_rdev}
+        item = Item(path=make_path_safe(path), rdev=st.st_rdev)
        item.update(self.stat_attrs(st, path))
        self.add_item(item)
        if stat.S_ISCHR(st.st_mode):
@ -592,7 +592,7 @@ Number of files: {0.stats.nfiles}'''.format(

    def process_symlink(self, path, st):
        source = os.readlink(path)
-        item = {b'path': make_path_safe(path), b'source': source}
+        item = Item(path=make_path_safe(path), source=source)
        item.update(self.stat_attrs(st, path))
        self.add_item(item)
        return 's'  # symlink
@ -604,15 +604,15 @@ Number of files: {0.stats.nfiles}'''.format(
        for data in self.chunker.chunkify(fd):
            chunks.append(cache.add_chunk(self.key.id_hash(data), Chunk(data), self.stats))
        self.stats.nfiles += 1
-        t = int_to_bigint(int(time.time()) * 1000000000)
-        item = {
-            b'path': path,
-            b'chunks': chunks,
-            b'mode': 0o100660,  # regular file, ug=rw
-            b'uid': uid, b'user': uid2user(uid),
-            b'gid': gid, b'group': gid2group(gid),
-            b'mtime': t, b'atime': t, b'ctime': t,
-        }
+        t = int(time.time()) * 1000000000
+        item = Item(
+            path=path,
+            chunks=chunks,
+            mode=0o100660,  # regular file, ug=rw
+            uid=uid, user=uid2user(uid),
+            gid=gid, group=gid2group(gid),
+            mtime=t, atime=t, ctime=t,
+        )
        self.add_item(item)
        return 'i'  # stdin

@ -623,11 +623,8 @@ Number of files: {0.stats.nfiles}'''.format(
        if st.st_nlink > 1:
            source = self.hard_links.get((st.st_ino, st.st_dev))
            if (st.st_ino, st.st_dev) in self.hard_links:
-                item = self.stat_attrs(st, path)
-                item.update({
-                    b'path': safe_path,
-                    b'source': source,
-                })
+                item = Item(path=safe_path, source=source)
+                item.update(self.stat_attrs(st, path))
                self.add_item(item)
                status = 'h'  # regular file, hardlink (to already seen inodes)
                return status
@ -649,10 +646,10 @@ Number of files: {0.stats.nfiles}'''.format(
                status = 'U'  # regular file, unchanged
        else:
            status = 'A'  # regular file, added
-        item = {
-            b'path': safe_path,
-            b'hardlink_master': st.st_nlink > 1,  # item is a hard link and has the chunks
-        }
+        item = Item(
+            path=safe_path,
+            hardlink_master=st.st_nlink > 1,  # item is a hard link and has the chunks
+        )
        # Only chunkify the file if needed
        if chunks is None:
            compress = self.compression_decider1.decide(path)
@ -668,7 +665,7 @@ Number of files: {0.stats.nfiles}'''.format(
                        self.stats.show_progress(item=item, dt=0.2)
            cache.memorize_file(path_hash, st, [c.id for c in chunks])
            status = status or 'M'  # regular file, modified (if not 'A' already)
-        item[b'chunks'] = chunks
+        item.chunks = chunks
        item.update(self.stat_attrs(st, path))
        self.stats.nfiles += 1
        self.add_item(item)
@ -698,7 +695,7 @@ class RobustUnpacker:
    """
    def __init__(self, validator):
        super().__init__()
-        self.item_keys = [msgpack.packb(name) for name in ITEM_KEYS]
+        self.item_keys = [msgpack.packb(name.encode()) for name in ITEM_KEYS]
        self.validator = validator
        self._buffered_data = []
        self._resync = False
@ -894,10 +891,10 @@ class ArchiveChecker:
            """
            offset = 0
            chunk_list = []
-            for chunk_id, size, csize in item[b'chunks']:
+            for chunk_id, size, csize in item.chunks:
                if chunk_id not in self.chunks:
                    # If a file chunk is missing, create an all empty replacement chunk
-                    logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(safe_decode(item[b'path']), offset, offset + size))
+                    logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(item.path, offset, offset + size))
                    self.error_found = True
                    data = bytes(size)
                    chunk_id = self.key.id_hash(data)
@ -908,14 +905,14 @@ class ArchiveChecker:
                    add_reference(chunk_id, size, csize)
                chunk_list.append((chunk_id, size, csize))
                offset += size
-            item[b'chunks'] = chunk_list
+            item.chunks = chunk_list

        def robust_iterator(archive):
            """Iterates through all archive items

            Missing item chunks will be skipped and the msgpack stream will be restarted
            """
-            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item)
+            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and 'path' in item)
            _state = 0

            def missing_chunk_detector(chunk_id):
@ -946,7 +943,7 @@ class ArchiveChecker:
                    try:
                        for item in unpacker:
                            if isinstance(item, dict):
-                                yield item
+                                yield Item(internal_dict=item)
                            else:
                                report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i)
                    except Exception:
@ -990,7 +987,7 @@ class ArchiveChecker:
                items_buffer = ChunkBuffer(self.key)
                items_buffer.write_chunk = add_callback
                for item in robust_iterator(archive):
-                    if b'chunks' in item:
+                    if 'chunks' in item:
                        verify_file_chunks(item)
                    items_buffer.add(item)
                items_buffer.flush(flush=True)
@ -1093,38 +1090,38 @@ class ArchiveRecreater:

        def item_is_hardlink_master(item):
            return (target_is_subset and
-                    stat.S_ISREG(item[b'mode']) and
-                    item.get(b'hardlink_master', True) and
-                    b'source' not in item and
-                    not matcher.match(item[b'path']))
+                    stat.S_ISREG(item.mode) and
+                    item.get('hardlink_master', True) and
+                    'source' not in item and
+                    not matcher.match(item.path))

        for item in archive.iter_items():
            if item_is_hardlink_master(item):
                # Re-visit all of these items in the archive even when fast-forwarding to rebuild hardlink_masters
-                hardlink_masters[item[b'path']] = (item.get(b'chunks'), None)
+                hardlink_masters[item.path] = (item.get('chunks'), None)
                continue
            if resume_from:
                # Fast forward to after the last processed file
-                if item[b'path'] == resume_from:
-                    logger.info('Fast-forwarded to %s', remove_surrogates(item[b'path']))
+                if item.path == resume_from:
+                    logger.info('Fast-forwarded to %s', remove_surrogates(item.path))
                    resume_from = None
                continue
-            if not matcher.match(item[b'path']):
-                self.print_file_status('x', item[b'path'])
+            if not matcher.match(item.path):
+                self.print_file_status('x', item.path)
                continue
-            if target_is_subset and stat.S_ISREG(item[b'mode']) and item.get(b'source') in hardlink_masters:
+            if target_is_subset and stat.S_ISREG(item.mode) and item.get('source') in hardlink_masters:
                # master of this hard link is outside the target subset
-                chunks, new_source = hardlink_masters[item[b'source']]
+                chunks, new_source = hardlink_masters[item.source]
                if new_source is None:
                    # First item to use this master, move the chunks
-                    item[b'chunks'] = chunks
-                    hardlink_masters[item[b'source']] = (None, item[b'path'])
-                    del item[b'source']
+                    item.chunks = chunks
+                    hardlink_masters[item.source] = (None, item.path)
+                    del item.source
                else:
                    # Master was already moved, only update this item's source
-                    item[b'source'] = new_source
+                    item.source = new_source
            if self.dry_run:
-                self.print_file_status('-', item[b'path'])
+                self.print_file_status('-', item.path)
            else:
                try:
                    self.process_item(archive, target, item)
@ -1136,11 +1133,11 @@ class ArchiveRecreater:
            target.stats.show_progress(final=True)

    def process_item(self, archive, target, item):
-        if b'chunks' in item:
-            item[b'chunks'] = self.process_chunks(archive, target, item)
+        if 'chunks' in item:
+            item.chunks = self.process_chunks(archive, target, item)
            target.stats.nfiles += 1
        target.add_item(item)
-        self.print_file_status(file_status(item[b'mode']), item[b'path'])
+        self.print_file_status(file_status(item.mode), item.path)
        if self.interrupt:
            raise self.Interrupted

@ -1148,9 +1145,9 @@ class ArchiveRecreater:
        """Return new chunk ID list for 'item'."""
        # TODO: support --compression-from
        if not self.recompress and not target.recreate_rechunkify:
-            for chunk_id, size, csize in item[b'chunks']:
+            for chunk_id, size, csize in item.chunks:
                self.cache.chunk_incref(chunk_id, target.stats)
-            return item[b'chunks']
+            return item.chunks
        new_chunks = self.process_partial_chunks(target)
        chunk_iterator = self.create_chunk_iterator(archive, target, item)
        consume(chunk_iterator, len(new_chunks))
@ -1181,7 +1178,7 @@ class ArchiveRecreater:

    def create_chunk_iterator(self, archive, target, item):
        """Return iterator of chunks to store for 'item' from 'archive' in 'target'."""
-        chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item[b'chunks']])
+        chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item.chunks])
        if target.recreate_rechunkify:
            # The target.chunker will read the file contents through ChunkIteratorFileWrapper chunk-by-chunk
            # (does not load the entire file into memory)
@ -1243,7 +1240,7 @@ class ArchiveRecreater:
        """Add excludes to the matcher created by exclude_cache and exclude_if_present."""
        def exclude(dir, tag_item):
            if self.keep_tag_files:
-                tag_files.append(PathPrefixPattern(tag_item[b'path']))
+                tag_files.append(PathPrefixPattern(tag_item.path))
                tagged_dirs.append(FnmatchPattern(dir + '/'))
            else:
                tagged_dirs.append(PathPrefixPattern(dir))
@ -1255,18 +1252,18 @@ class ArchiveRecreater:
        cachedir_masters = {}

        for item in archive.iter_items(
-                filter=lambda item: item[b'path'].endswith(CACHE_TAG_NAME) or matcher.match(item[b'path'])):
-            if item[b'path'].endswith(CACHE_TAG_NAME):
-                cachedir_masters[item[b'path']] = item
-            if stat.S_ISREG(item[b'mode']):
-                dir, tag_file = os.path.split(item[b'path'])
+                filter=lambda item: item.path.endswith(CACHE_TAG_NAME) or matcher.match(item.path)):
+            if item.path.endswith(CACHE_TAG_NAME):
+                cachedir_masters[item.path] = item
+            if stat.S_ISREG(item.mode):
+                dir, tag_file = os.path.split(item.path)
                if tag_file in self.exclude_if_present:
                    exclude(dir, item)
                if self.exclude_caches and tag_file == CACHE_TAG_NAME:
-                    if b'chunks' in item:
+                    if 'chunks' in item:
                        file = open_item(archive, item)
                    else:
-                        file = open_item(archive, cachedir_masters[item[b'source']])
+                        file = open_item(archive, cachedir_masters[item.source])
                    if file.read(len(CACHE_TAG_CONTENTS)).startswith(CACHE_TAG_CONTENTS):
                        exclude(dir, item)
        matcher.add(tag_files, True)
@ -1307,13 +1304,13 @@ class ArchiveRecreater:
        logger.info('Replaying items from interrupted operation...')
        item = None
        for item in old_target.iter_items():
-            if b'chunks' in item:
-                for chunk in item[b'chunks']:
+            if 'chunks' in item:
+                for chunk in item.chunks:
                    self.cache.chunk_incref(chunk.id, target.stats)
                target.stats.nfiles += 1
            target.add_item(item)
        if item:
-            resume_from = item[b'path']
+            resume_from = item.path
        else:
            resume_from = None
        if self.progress:
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@ -38,6 +38,7 @@ from .helpers import update_excludes, check_extension_modules
 from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
 from .helpers import log_multi
 from .helpers import parse_pattern, PatternMatcher, PathPrefixPattern
+from .item import Item
 from .key import key_creator, RepoKey, PassphraseKey
 from .platform import get_flags
 from .remote import RepositoryServer, RemoteRepository, cache_if_remote
@ -405,22 +406,22 @@ class Archiver:
        hardlink_masters = {} if partial_extract else None

        def item_is_hardlink_master(item):
-            return (partial_extract and stat.S_ISREG(item[b'mode']) and
-                    item.get(b'hardlink_master', True) and b'source' not in item)
+            return (partial_extract and stat.S_ISREG(item.mode) and
+                    item.get('hardlink_master', True) and 'source' not in item)

        for item in archive.iter_items(preload=True,
-                filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])):
-            orig_path = item[b'path']
+                filter=lambda item: item_is_hardlink_master(item) or matcher.match(item.path)):
+            orig_path = item.path
            if item_is_hardlink_master(item):
-                hardlink_masters[orig_path] = (item.get(b'chunks'), None)
-            if not matcher.match(item[b'path']):
+                hardlink_masters[orig_path] = (item.get('chunks'), None)
+            if not matcher.match(item.path):
                continue
            if strip_components:
-                item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
-                if not item[b'path']:
+                item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
+                if not item.path:
                    continue
            if not args.dry_run:
-                while dirs and not item[b'path'].startswith(dirs[-1][b'path']):
+                while dirs and not item.path.startswith(dirs[-1].path):
                    archive.extract_item(dirs.pop(-1), stdout=stdout)
            if output_list:
                logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
@ -428,7 +429,7 @@ class Archiver:
                if dry_run:
                    archive.extract_item(item, dry_run=True)
                else:
-                    if stat.S_ISDIR(item[b'mode']):
+                    if stat.S_ISDIR(item.mode):
                        dirs.append(item)
                        archive.extract_item(item, restore_attrs=False)
                    else:
@ -455,58 +456,58 @@ class Archiver:
            return self.compare_chunk_contents(chunks1, chunks2)

        def sum_chunk_size(item, consider_ids=None):
-            if item.get(b'deleted'):
+            if item.get('deleted'):
                return None
            else:
-                return sum(c.size for c in item[b'chunks']
+                return sum(c.size for c in item.chunks
                           if consider_ids is None or c.id in consider_ids)

        def get_owner(item):
            if args.numeric_owner:
-                return item[b'uid'], item[b'gid']
+                return item.uid, item.gid
            else:
-                return item[b'user'], item[b'group']
+                return item.user, item.group

        def get_mode(item):
-            if b'mode' in item:
-                return stat.filemode(item[b'mode'])
+            if 'mode' in item:
+                return stat.filemode(item.mode)
            else:
                return [None]

        def has_hardlink_master(item, hardlink_masters):
-            return stat.S_ISREG(item[b'mode']) and item.get(b'source') in hardlink_masters
+            return stat.S_ISREG(item.mode) and item.get('source') in hardlink_masters

        def compare_link(item1, item2):
            # These are the simple link cases. For special cases, e.g. if a
            # regular file is replaced with a link or vice versa, it is
            # indicated in compare_mode instead.
-            if item1.get(b'deleted'):
+            if item1.get('deleted'):
                return 'added link'
-            elif item2.get(b'deleted'):
+            elif item2.get('deleted'):
                return 'removed link'
-            elif b'source' in item1 and b'source' in item2 and item1[b'source'] != item2[b'source']:
+            elif 'source' in item1 and 'source' in item2 and item1.source != item2.source:
                return 'changed link'

        def contents_changed(item1, item2):
            if can_compare_chunk_ids:
-                return item1[b'chunks'] != item2[b'chunks']
+                return item1.chunks != item2.chunks
            else:
                if sum_chunk_size(item1) != sum_chunk_size(item2):
                    return True
                else:
-                    chunk_ids1 = [c.id for c in item1[b'chunks']]
-                    chunk_ids2 = [c.id for c in item2[b'chunks']]
+                    chunk_ids1 = [c.id for c in item1.chunks]
+                    chunk_ids2 = [c.id for c in item2.chunks]
                    return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2)

        def compare_content(path, item1, item2):
            if contents_changed(item1, item2):
-                if item1.get(b'deleted'):
+                if item1.get('deleted'):
                    return ('added {:>13}'.format(format_file_size(sum_chunk_size(item2))))
-                elif item2.get(b'deleted'):
+                elif item2.get('deleted'):
                    return ('removed {:>11}'.format(format_file_size(sum_chunk_size(item1))))
                else:
-                    chunk_ids1 = {c.id for c in item1[b'chunks']}
-                    chunk_ids2 = {c.id for c in item2[b'chunks']}
+                    chunk_ids1 = {c.id for c in item1.chunks}
+                    chunk_ids2 = {c.id for c in item2.chunks}
                    added_ids = chunk_ids2 - chunk_ids1
                    removed_ids = chunk_ids1 - chunk_ids2
                    added = sum_chunk_size(item2, added_ids)
@ -515,9 +516,9 @@ class Archiver:
                                                 format_file_size(-removed, precision=1, sign=True)))

        def compare_directory(item1, item2):
-            if item2.get(b'deleted') and not item1.get(b'deleted'):
+            if item2.get('deleted') and not item1.get('deleted'):
                return 'removed directory'
-            elif item1.get(b'deleted') and not item2.get(b'deleted'):
+            elif item1.get('deleted') and not item2.get('deleted'):
                return 'added directory'

        def compare_owner(item1, item2):
@ -527,7 +528,7 @@ class Archiver:
                return '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2)

        def compare_mode(item1, item2):
-            if item1[b'mode'] != item2[b'mode']:
+            if item1.mode != item2.mode:
                return '[{} -> {}]'.format(get_mode(item1), get_mode(item2))

        def compare_items(output, path, item1, item2, hardlink_masters, deleted=False):
@ -538,15 +539,15 @@ class Archiver:
            changes = []

            if has_hardlink_master(item1, hardlink_masters):
-                item1 = hardlink_masters[item1[b'source']][0]
+                item1 = hardlink_masters[item1.source][0]

            if has_hardlink_master(item2, hardlink_masters):
-                item2 = hardlink_masters[item2[b'source']][1]
+                item2 = hardlink_masters[item2.source][1]

            if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l':
                changes.append(compare_link(item1, item2))

-            if b'chunks' in item1 and b'chunks' in item2:
+            if 'chunks' in item1 and 'chunks' in item2:
                changes.append(compare_content(path, item1, item2))

            if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd':
@ -570,21 +571,21 @@ class Archiver:

        def compare_archives(archive1, archive2, matcher):
            def hardlink_master_seen(item):
-                return b'source' not in item or not stat.S_ISREG(item[b'mode']) or item[b'source'] in hardlink_masters
+                return 'source' not in item or not stat.S_ISREG(item.mode) or item.source in hardlink_masters

            def is_hardlink_master(item):
-                return item.get(b'hardlink_master', True) and b'source' not in item
+                return item.get('hardlink_master', True) and 'source' not in item

            def update_hardlink_masters(item1, item2):
                if is_hardlink_master(item1) or is_hardlink_master(item2):
-                    hardlink_masters[item1[b'path']] = (item1, item2)
+                    hardlink_masters[item1.path] = (item1, item2)

            def compare_or_defer(item1, item2):
                update_hardlink_masters(item1, item2)
                if not hardlink_master_seen(item1) or not hardlink_master_seen(item2):
                    deferred.append((item1, item2))
                else:
-                    compare_items(output, item1[b'path'], item1, item2, hardlink_masters)
+                    compare_items(output, item1.path, item1, item2, hardlink_masters)

            orphans_archive1 = collections.OrderedDict()
            orphans_archive2 = collections.OrderedDict()
@ -593,44 +594,44 @@ class Archiver:
            output = []

            for item1, item2 in zip_longest(
-                    archive1.iter_items(lambda item: matcher.match(item[b'path'])),
-                    archive2.iter_items(lambda item: matcher.match(item[b'path'])),
+                    archive1.iter_items(lambda item: matcher.match(item.path)),
+                    archive2.iter_items(lambda item: matcher.match(item.path)),
            ):
-                if item1 and item2 and item1[b'path'] == item2[b'path']:
+                if item1 and item2 and item1.path == item2.path:
                    compare_or_defer(item1, item2)
                    continue
                if item1:
-                    matching_orphan = orphans_archive2.pop(item1[b'path'], None)
+                    matching_orphan = orphans_archive2.pop(item1.path, None)
                    if matching_orphan:
                        compare_or_defer(item1, matching_orphan)
                    else:
-                        orphans_archive1[item1[b'path']] = item1
+                        orphans_archive1[item1.path] = item1
                if item2:
-                    matching_orphan = orphans_archive1.pop(item2[b'path'], None)
+                    matching_orphan = orphans_archive1.pop(item2.path, None)
                    if matching_orphan:
                        compare_or_defer(matching_orphan, item2)
                    else:
-                        orphans_archive2[item2[b'path']] = item2
+                        orphans_archive2[item2.path] = item2
            # At this point orphans_* contain items that had no matching partner in the other archive
-            deleted_item = {
-                b'deleted': True,
-                b'chunks': [],
-                b'mode': 0,
-            }
+            deleted_item = Item(
+                deleted=True,
+                chunks=[],
+                mode=0,
+            )
            for added in orphans_archive2.values():
-                path = added[b'path']
-                deleted_item[b'path'] = path
+                path = added.path
+                deleted_item.path = path
                update_hardlink_masters(deleted_item, added)
                compare_items(output, path, deleted_item, added, hardlink_masters, deleted=True)
            for deleted in orphans_archive1.values():
-                path = deleted[b'path']
-                deleted_item[b'path'] = path
+                path = deleted.path
+                deleted_item.path = path
                update_hardlink_masters(deleted, deleted_item)
                compare_items(output, path, deleted, deleted_item, hardlink_masters, deleted=True)
            for item1, item2 in deferred:
                assert hardlink_master_seen(item1)
                assert hardlink_master_seen(item2)
-                compare_items(output, item1[b'path'], item1, item2, hardlink_masters)
+                compare_items(output, item1.path, item1, item2, hardlink_masters)

            for line in sorted(output):
                print_output(line)
@ -749,7 +750,7 @@ class Archiver:
                        sys.stdout.write(bytestring.decode('utf-8', errors='replace'))
                else:
                    write = sys.stdout.buffer.write
-                for item in archive.iter_items(lambda item: matcher.match(item[b'path'])):
+                for item in archive.iter_items(lambda item: matcher.match(item.path)):
                    write(safe_encode(formatter.format_item(item)))
        else:
            for archive_info in manifest.list_archive_infos(sort_by='ts'):
@ -2116,7 +2117,7 @@ def sig_info_handler(signum, stack):  # pragma: no cover
            logger.info("{0} {1}/{2}".format(path, format_file_size(pos), format_file_size(total)))
            break
        if func in ('extract_item', ):  # extract op
-            path = loc['item'][b'path']
+            path = loc['item'].path
            try:
                pos = loc['fd'].tell()
            except Exception:
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@ -16,6 +16,7 @@ from .helpers import get_cache_dir
 from .helpers import decode_dict, int_to_bigint, bigint_to_int, bin_to_hex
 from .helpers import format_file_size
 from .helpers import yes
+from .item import Item
 from .key import PlaintextKey
 from .locking import UpgradableLock
 from .remote import cache_if_remote
@ -298,8 +299,9 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                    if not isinstance(item, dict):
                        logger.error('Error: Did not get expected metadata dict - archive corrupted!')
                        continue
-                    if b'chunks' in item:
-                        for chunk_id, size, csize in item[b'chunks']:
+                    item = Item(internal_dict=item)
+                    if 'chunks' in item:
+                        for chunk_id, size, csize in item.chunks:
                            chunk_idx.add(chunk_id, 1, size, csize)
            if self.do_cache:
                fn = mkpath(archive_id)
--- a/src/borg/constants.py
+++ b/src/borg/constants.py
@ -1,10 +1,9 @@
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
-ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master',
-                 b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
-                 b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
+ITEM_KEYS = set(['path', 'source', 'rdev', 'chunks', 'hardlink_master',
+                 'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime',
+                 'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended', ])

 ARCHIVE_TEXT_KEYS = (b'name', b'comment', b'hostname', b'username', b'time', b'time_end')
-ITEM_TEXT_KEYS = (b'path', b'source', b'user', b'group')

 # default umask, overriden by --umask, defaults to read/write only for owner
 UMASK_DEFAULT = 0o077
--- a/src/borg/fuse.py
+++ b/src/borg/fuse.py
@ -15,7 +15,7 @@ logger = create_logger()

 from .archive import Archive
 from .helpers import daemonize
-from .helpers import bigint_to_int
+from .item import Item
 from .lrucache import LRUCache

 # Does this version of llfuse support ns precision?
@ -38,12 +38,13 @@ class ItemCache:

    def add(self, item):
        pos = self.fd.seek(0, io.SEEK_END)
-        self.fd.write(msgpack.packb(item))
+        self.fd.write(msgpack.packb(item.as_dict()))
        return pos + self.offset

    def get(self, inode):
        self.fd.seek(inode - self.offset, io.SEEK_SET)
-        return next(msgpack.Unpacker(self.fd, read_size=1024))
+        item = next(msgpack.Unpacker(self.fd, read_size=1024))
+        return Item(internal_dict=item)


 class FuseOperations(llfuse.Operations):
@ -57,7 +58,7 @@ class FuseOperations(llfuse.Operations):
        self.items = {}
        self.parent = {}
        self.contents = defaultdict(dict)
-        self.default_dir = {b'mode': 0o40755, b'mtime': int(time.time() * 1e9), b'uid': os.getuid(), b'gid': os.getgid()}
+        self.default_dir = Item(mode=0o40755, mtime=int(time.time() * 1e9), uid=os.getuid(), gid=os.getgid())
        self.pending_archives = {}
        self.accounted_chunks = {}
        self.cache = ItemCache()
@ -86,8 +87,9 @@ class FuseOperations(llfuse.Operations):
            _, data = self.key.decrypt(key, chunk)
            unpacker.feed(data)
            for item in unpacker:
-                segments = prefix + os.fsencode(os.path.normpath(item[b'path'])).split(b'/')
-                del item[b'path']
+                item = Item(internal_dict=item)
+                segments = prefix + os.fsencode(os.path.normpath(item.path)).split(b'/')
+                del item.path
                num_segments = len(segments)
                parent = 1
                for i, segment in enumerate(segments, 1):
@ -98,10 +100,10 @@ class FuseOperations(llfuse.Operations):
                        self.parent[archive_inode] = parent
                    # Leaf segment?
                    if i == num_segments:
-                        if b'source' in item and stat.S_ISREG(item[b'mode']):
-                            inode = self._find_inode(item[b'source'], prefix)
+                        if 'source' in item and stat.S_ISREG(item.mode):
+                            inode = self._find_inode(item.source, prefix)
                            item = self.cache.get(inode)
-                            item[b'nlink'] = item.get(b'nlink', 1) + 1
+                            item.nlink = item.get('nlink', 1) + 1
                            self.items[inode] = item
                        else:
                            inode = self.cache.add(item)
@ -151,58 +153,56 @@ class FuseOperations(llfuse.Operations):
        item = self.get_item(inode)
        size = 0
        dsize = 0
-        try:
-            for key, chunksize, _ in item[b'chunks']:
+        if 'chunks' in item:
+            for key, chunksize, _ in item.chunks:
                size += chunksize
                if self.accounted_chunks.get(key, inode) == inode:
                    self.accounted_chunks[key] = inode
                    dsize += chunksize
-        except KeyError:
-            pass
        entry = llfuse.EntryAttributes()
        entry.st_ino = inode
        entry.generation = 0
        entry.entry_timeout = 300
        entry.attr_timeout = 300
-        entry.st_mode = item[b'mode']
-        entry.st_nlink = item.get(b'nlink', 1)
-        entry.st_uid = item[b'uid']
-        entry.st_gid = item[b'gid']
-        entry.st_rdev = item.get(b'rdev', 0)
+        entry.st_mode = item.mode
+        entry.st_nlink = item.get('nlink', 1)
+        entry.st_uid = item.uid
+        entry.st_gid = item.gid
+        entry.st_rdev = item.get('rdev', 0)
        entry.st_size = size
        entry.st_blksize = 512
        entry.st_blocks = dsize / 512
        # note: older archives only have mtime (not atime nor ctime)
        if have_fuse_xtime_ns:
-            entry.st_mtime_ns = bigint_to_int(item[b'mtime'])
-            if b'atime' in item:
-                entry.st_atime_ns = bigint_to_int(item[b'atime'])
+            entry.st_mtime_ns = item.mtime
+            if 'atime' in item:
+                entry.st_atime_ns = item.atime
            else:
-                entry.st_atime_ns = bigint_to_int(item[b'mtime'])
-            if b'ctime' in item:
-                entry.st_ctime_ns = bigint_to_int(item[b'ctime'])
+                entry.st_atime_ns = item.mtime
+            if 'ctime' in item:
+                entry.st_ctime_ns = item.ctime
            else:
-                entry.st_ctime_ns = bigint_to_int(item[b'mtime'])
+                entry.st_ctime_ns = item.mtime
        else:
-            entry.st_mtime = bigint_to_int(item[b'mtime']) / 1e9
-            if b'atime' in item:
-                entry.st_atime = bigint_to_int(item[b'atime']) / 1e9
+            entry.st_mtime = item.mtime / 1e9
+            if 'atime' in item:
+                entry.st_atime = item.atime / 1e9
            else:
-                entry.st_atime = bigint_to_int(item[b'mtime']) / 1e9
-            if b'ctime' in item:
-                entry.st_ctime = bigint_to_int(item[b'ctime']) / 1e9
+                entry.st_atime = item.mtime / 1e9
+            if 'ctime' in item:
+                entry.st_ctime = item.ctime / 1e9
            else:
-                entry.st_ctime = bigint_to_int(item[b'mtime']) / 1e9
+                entry.st_ctime = item.mtime / 1e9
        return entry

    def listxattr(self, inode, ctx=None):
        item = self.get_item(inode)
-        return item.get(b'xattrs', {}).keys()
+        return item.get('xattrs', {}).keys()

    def getxattr(self, inode, name, ctx=None):
        item = self.get_item(inode)
        try:
-            return item.get(b'xattrs', {})[name]
+            return item.get('xattrs', {})[name]
        except KeyError:
            raise llfuse.FUSEError(errno.ENODATA) from None

@ -234,7 +234,7 @@ class FuseOperations(llfuse.Operations):
    def read(self, fh, offset, size):
        parts = []
        item = self.get_item(fh)
-        for id, s, csize in item[b'chunks']:
+        for id, s, csize in item.chunks:
            if s < offset:
                offset -= s
                continue
@ -264,7 +264,7 @@ class FuseOperations(llfuse.Operations):

    def readlink(self, inode, ctx=None):
        item = self.get_item(inode)
-        return os.fsencode(item[b'source'])
+        return os.fsencode(item.source)

    def mount(self, mountpoint, extra_options, foreground=False):
        options = ['fsname=borgfs', 'ro']
--- a/src/borg/helpers.py
+++ b/src/borg/helpers.py
@ -1157,10 +1157,8 @@ class ItemFormatter:
        class FakeArchive:
            fpr = name = ""

-        fake_item = {
-            b'mode': 0, b'path': '', b'user': '', b'group': '', b'mtime': 0,
-            b'uid': 0, b'gid': 0,
-        }
+        from .item import Item
+        fake_item = Item(mode=0, path='', user='', group='', mtime=0, uid=0, gid=0)
        formatter = cls(FakeArchive, "")
        keys = []
        keys.extend(formatter.call_keys.keys())
@ -1196,12 +1194,12 @@ class ItemFormatter:
            'csize': self.calculate_csize,
            'num_chunks': self.calculate_num_chunks,
            'unique_chunks': self.calculate_unique_chunks,
-            'isomtime': partial(self.format_time, b'mtime'),
-            'isoctime': partial(self.format_time, b'ctime'),
-            'isoatime': partial(self.format_time, b'atime'),
-            'mtime': partial(self.time, b'mtime'),
-            'ctime': partial(self.time, b'ctime'),
-            'atime': partial(self.time, b'atime'),
+            'isomtime': partial(self.format_time, 'mtime'),
+            'isoctime': partial(self.format_time, 'ctime'),
+            'isoatime': partial(self.format_time, 'atime'),
+            'mtime': partial(self.time, 'mtime'),
+            'ctime': partial(self.time, 'ctime'),
+            'atime': partial(self.time, 'atime'),
        }
        for hash_function in hashlib.algorithms_guaranteed:
            self.add_key(hash_function, partial(self.hash_item, hash_function))
@ -1213,11 +1211,11 @@ class ItemFormatter:
        self.used_call_keys = set(self.call_keys) & self.format_keys

    def get_item_data(self, item):
-        mode = stat.filemode(item[b'mode'])
+        mode = stat.filemode(item.mode)
        item_type = mode[0]
        item_data = self.item_data

-        source = item.get(b'source', '')
+        source = item.get('source', '')
        extra = ''
        if source:
            source = remove_surrogates(source)
@ -1228,16 +1226,16 @@ class ItemFormatter:
                extra = ' link to %s' % source
        item_data['type'] = item_type
        item_data['mode'] = mode
-        item_data['user'] = item[b'user'] or item[b'uid']
-        item_data['group'] = item[b'group'] or item[b'gid']
-        item_data['uid'] = item[b'uid']
-        item_data['gid'] = item[b'gid']
-        item_data['path'] = remove_surrogates(item[b'path'])
-        item_data['bpath'] = item[b'path']
+        item_data['user'] = item.user or item.uid
+        item_data['group'] = item.group or item.gid
+        item_data['uid'] = item.uid
+        item_data['gid'] = item.gid
+        item_data['path'] = remove_surrogates(item.path)
+        item_data['bpath'] = item.path
        item_data['source'] = source
        item_data['linktarget'] = source
        item_data['extra'] = extra
-        item_data['flags'] = item.get(b'bsdflags')
+        item_data['flags'] = item.get('bsdflags')
        for key in self.used_call_keys:
            item_data[key] = self.call_keys[key](item)
        return item_data
@ -1246,31 +1244,31 @@ class ItemFormatter:
        return self.format.format_map(self.get_item_data(item))

    def calculate_num_chunks(self, item):
-        return len(item.get(b'chunks', []))
+        return len(item.get('chunks', []))

    def calculate_unique_chunks(self, item):
        chunk_index = self.archive.cache.chunks
-        return sum(1 for c in item.get(b'chunks', []) if chunk_index[c.id].refcount == 1)
+        return sum(1 for c in item.get('chunks', []) if chunk_index[c.id].refcount == 1)

    def calculate_size(self, item):
-        return sum(c.size for c in item.get(b'chunks', []))
+        return sum(c.size for c in item.get('chunks', []))

    def calculate_csize(self, item):
-        return sum(c.csize for c in item.get(b'chunks', []))
+        return sum(c.csize for c in item.get('chunks', []))

    def hash_item(self, hash_function, item):
-        if b'chunks' not in item:
+        if 'chunks' not in item:
            return ""
        hash = hashlib.new(hash_function)
-        for _, data in self.archive.pipeline.fetch_many([c.id for c in item[b'chunks']]):
+        for _, data in self.archive.pipeline.fetch_many([c.id for c in item.chunks]):
            hash.update(data)
        return hash.hexdigest()

    def format_time(self, key, item):
-        return format_time(safe_timestamp(item.get(key) or item[b'mtime']))
+        return format_time(safe_timestamp(item.get(key) or item.mtime))

    def time(self, key, item):
-        return safe_timestamp(item.get(key) or item[b'mtime'])
+        return safe_timestamp(item.get(key) or item.mtime)


 class ChunkIteratorFileWrapper:
@ -1314,7 +1312,7 @@ class ChunkIteratorFileWrapper:

 def open_item(archive, item):
    """Return file-like object for archived item (with chunks)."""
-    chunk_iterator = archive.pipeline.fetch_many([c.id for c in item[b'chunks']])
+    chunk_iterator = archive.pipeline.fetch_many([c.id for c in item.chunks])
    return ChunkIteratorFileWrapper(chunk_iterator)


--- a/src/borg/item.py
+++ b/src/borg/item.py
@ -21,25 +21,34 @@ class PropDict:

    __slots__ = ("_dict", )  # avoid setting attributes not supported by properties

-    def __init__(self, data_dict=None, **kw):
+    def __init__(self, data_dict=None, internal_dict=None, **kw):
        if data_dict is None:
            data = kw
        elif not isinstance(data_dict, dict):
            raise TypeError("data_dict must be dict")
        else:
            data = data_dict
-        # internally, we want an dict with only str-typed keys
-        _dict = {}
-        for k, v in data.items():
+        self._dict = {}
+        self.update_internal(internal_dict or {})
+        self.update(data)
+
+    def update(self, d):
+        for k, v in d.items():
            if isinstance(k, bytes):
                k = k.decode()
-            elif not isinstance(k, str):
-                raise TypeError("dict keys must be str or bytes, not %r" % k)
-            _dict[k] = v
-        unknown_keys = set(_dict) - self.VALID_KEYS
-        if unknown_keys:
-            raise ValueError("dict contains unknown keys %s" % ','.join(unknown_keys))
-        self._dict = _dict
+            setattr(self, self._check_key(k), v)
+
+    def update_internal(self, d):
+        for k, v in d.items():
+            if isinstance(k, bytes):
+                k = k.decode()
+            self._dict[k] = v
+
+    def __eq__(self, other):
+        return self.as_dict() == other.as_dict()
+
+    def __repr__(self):
+        return '%s(internal_dict=%r)' % (self.__class__.__name__, self._dict)

    def as_dict(self):
        """return the internal dictionary"""
@ -110,7 +119,7 @@ class Item(PropDict):
    If an Item shall be serialized, give as_dict() method output to msgpack packer.
    """

-    VALID_KEYS = set(key.decode() for key in ITEM_KEYS)  # we want str-typed keys
+    VALID_KEYS = ITEM_KEYS | {'deleted', 'nlink', }  # str-typed keys

    __slots__ = ("_dict", )  # avoid setting attributes not supported by properties

@ -118,14 +127,14 @@ class Item(PropDict):

    path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
    source = PropDict._make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
+    user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
+    group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
+
    acl_access = PropDict._make_property('acl_access', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
    acl_default = PropDict._make_property('acl_default', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
    acl_extended = PropDict._make_property('acl_extended', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
    acl_nfs4 = PropDict._make_property('acl_nfs4', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)

-    user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
-    group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
-
    mode = PropDict._make_property('mode', int)
    uid = PropDict._make_property('uid', int)
    gid = PropDict._make_property('gid', int)
@ -138,6 +147,9 @@ class Item(PropDict):

    hardlink_master = PropDict._make_property('hardlink_master', bool)

-    chunks = PropDict._make_property('chunks', list)
+    chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')

    xattrs = PropDict._make_property('xattrs', StableDict)
+
+    deleted = PropDict._make_property('deleted', bool)
+    nlink = PropDict._make_property('nlink', int)
--- a/src/borg/platform/darwin.pyx
+++ b/src/borg/platform/darwin.pyx
@ -62,9 +62,9 @@ def acl_get(path, item, st, numeric_owner=False):
        if text == NULL:
            return
        if numeric_owner:
-            item[b'acl_extended'] = _remove_non_numeric_identifier(text)
+            item['acl_extended'] = _remove_non_numeric_identifier(text)
        else:
-            item[b'acl_extended'] = text
+            item['acl_extended'] = text
    finally:
        acl_free(text)
        acl_free(acl)
@ -72,18 +72,16 @@ def acl_get(path, item, st, numeric_owner=False):

 def acl_set(path, item, numeric_owner=False):
    cdef acl_t acl = NULL
-    try:
+    acl_text = item.get('acl_extended')
+    if acl_text is not None:
        try:
            if numeric_owner:
-                acl = acl_from_text(item[b'acl_extended'])
+                acl = acl_from_text(acl_text)
            else:
-                acl = acl_from_text(<bytes>_remove_numeric_id_if_possible(item[b'acl_extended']))
-        except KeyError:
-            return
-        if acl == NULL:
-            return
-        if acl_set_link_np(<bytes>os.fsencode(path), ACL_TYPE_EXTENDED, acl):
-            return
-    finally:
-        acl_free(acl)
-
+                acl = acl_from_text(<bytes>_remove_numeric_id_if_possible(acl_text))
+            if acl == NULL:
+                return
+            if acl_set_link_np(<bytes>os.fsencode(path), ACL_TYPE_EXTENDED, acl):
+                return
+        finally:
+            acl_free(acl)
--- a/src/borg/platform/freebsd.pyx
+++ b/src/borg/platform/freebsd.pyx
@ -57,10 +57,10 @@ def acl_get(path, item, st, numeric_owner=False):
        return
    flags |= ACL_TEXT_NUMERIC_IDS if numeric_owner else 0
    if ret > 0:
-        _get_acl(p, ACL_TYPE_NFS4, item, b'acl_nfs4', flags)
+        _get_acl(p, ACL_TYPE_NFS4, item, 'acl_nfs4', flags)
    else:
-        _get_acl(p, ACL_TYPE_ACCESS, item, b'acl_access', flags)
-        _get_acl(p, ACL_TYPE_DEFAULT, item, b'acl_default', flags)
+        _get_acl(p, ACL_TYPE_ACCESS, item, 'acl_access', flags)
+        _get_acl(p, ACL_TYPE_DEFAULT, item, 'acl_default', flags)


 cdef _set_acl(p, type, item, attribute, numeric_owner=False):
@ -98,6 +98,6 @@ def acl_set(path, item, numeric_owner=False):
    of the user/group names
    """
    p = os.fsencode(path)
-    _set_acl(p, ACL_TYPE_NFS4, item, b'acl_nfs4', numeric_owner)
-    _set_acl(p, ACL_TYPE_ACCESS, item, b'acl_access', numeric_owner)
-    _set_acl(p, ACL_TYPE_DEFAULT, item, b'acl_default', numeric_owner)
+    _set_acl(p, ACL_TYPE_NFS4, item, 'acl_nfs4', numeric_owner)
+    _set_acl(p, ACL_TYPE_ACCESS, item, 'acl_access', numeric_owner)
+    _set_acl(p, ACL_TYPE_DEFAULT, item, 'acl_default', numeric_owner)
--- a/src/borg/platform/linux.pyx
+++ b/src/borg/platform/linux.pyx
@ -171,12 +171,12 @@ def acl_get(path, item, st, numeric_owner=False):
        if access_acl:
            access_text = acl_to_text(access_acl, NULL)
            if access_text:
-                item[b'acl_access'] = converter(access_text)
+                item['acl_access'] = converter(access_text)
        default_acl = acl_get_file(p, ACL_TYPE_DEFAULT)
        if default_acl:
            default_text = acl_to_text(default_acl, NULL)
            if default_text:
-                item[b'acl_default'] = converter(default_text)
+                item['acl_default'] = converter(default_text)
    finally:
        acl_free(default_text)
        acl_free(default_acl)
@ -193,8 +193,8 @@ def acl_set(path, item, numeric_owner=False):
        converter = posix_acl_use_stored_uid_gid
    else:
        converter = acl_use_local_uid_gid
-    access_text = item.get(b'acl_access')
-    default_text = item.get(b'acl_default')
+    access_text = item.get('acl_access')
+    default_text = item.get('acl_default')
    if access_text:
        try:
            access_acl = acl_from_text(<bytes>converter(access_text))
--- a/src/borg/testsuite/archive.py
+++ b/src/borg/testsuite/archive.py
@ -7,6 +7,7 @@ import pytest
 import msgpack

 from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, Statistics
+from ..item import Item
 from ..key import PlaintextKey
 from ..helpers import Manifest
 from . import BaseTestCase
@ -38,12 +39,12 @@ def tests_stats_progress(stats, columns=80):

    out = StringIO()
    stats.update(10**3, 0, unique=False)
-    stats.show_progress(item={b'path': 'foo'}, final=False, stream=out)
+    stats.show_progress(item=Item(path='foo'), final=False, stream=out)
    s = '1.02 kB O 10 B C 10 B D 0 N foo'
    buf = ' ' * (columns - len(s))
    assert out.getvalue() == s + buf + "\r"
    out = StringIO()
-    stats.show_progress(item={b'path': 'foo'*40}, final=False, stream=out)
+    stats.show_progress(item=Item(path='foo'*40), final=False, stream=out)
    s = '1.02 kB O 10 B C 10 B D 0 N foofoofoofoofoofoofoofo...oofoofoofoofoofoofoofoofoo'
    buf = ' ' * (columns - len(s))
    assert out.getvalue() == s + buf + "\r"
@ -93,7 +94,7 @@ class ArchiveTimestampTestCase(BaseTestCase):
 class ChunkBufferTestCase(BaseTestCase):

    def test(self):
-        data = [{b'foo': 1}, {b'bar': 2}]
+        data = [Item(path='p1'), Item(path='p2')]
        cache = MockCache()
        key = PlaintextKey(None)
        chunks = CacheChunkBuffer(cache, key, None)
@ -105,11 +106,11 @@ class ChunkBufferTestCase(BaseTestCase):
        unpacker = msgpack.Unpacker()
        for id in chunks.chunks:
            unpacker.feed(cache.objects[id])
-        self.assert_equal(data, list(unpacker))
+        self.assert_equal(data, [Item(internal_dict=d) for d in unpacker])

    def test_partial(self):
-        big = b"0123456789" * 10000
-        data = [{b'full': 1, b'data': big}, {b'partial': 2, b'data': big}]
+        big = "0123456789" * 10000
+        data = [Item(path='full', source=big), Item(path='partial', source=big)]
        cache = MockCache()
        key = PlaintextKey(None)
        chunks = CacheChunkBuffer(cache, key, None)
@ -126,7 +127,7 @@ class ChunkBufferTestCase(BaseTestCase):
        unpacker = msgpack.Unpacker()
        for id in chunks.chunks:
            unpacker.feed(cache.objects[id])
-        self.assert_equal(data, list(unpacker))
+        self.assert_equal(data, [Item(internal_dict=d) for d in unpacker])


 class RobustUnpackerTestCase(BaseTestCase):
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@ -1641,8 +1641,8 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
        archive, repository = self.open_archive('archive1')
        with repository:
            for item in archive.iter_items():
-                if item[b'path'].endswith('testsuite/archiver.py'):
-                    repository.delete(item[b'chunks'][-1].id)
+                if item.path.endswith('testsuite/archiver.py'):
+                    repository.delete(item.chunks[-1].id)
                    break
            repository.commit()
        self.cmd('check', self.repository_location, exit_code=1)
@ -1696,8 +1696,8 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
        archive, repository = self.open_archive('archive1')
        with repository:
            for item in archive.iter_items():
-                if item[b'path'].endswith('testsuite/archiver.py'):
-                    chunk = item[b'chunks'][-1]
+                if item.path.endswith('testsuite/archiver.py'):
+                    chunk = item.chunks[-1]
                    data = repository.get(chunk.id) + b'1234'
                    repository.put(chunk.id, data)
                    break
--- a/src/borg/testsuite/item.py
+++ b/src/borg/testsuite/item.py
@ -35,13 +35,13 @@ def test_item_empty():

 def test_item_from_dict():
    # does not matter whether we get str or bytes keys
-    item = Item({b'path': b'/a/b/c', b'mode': 0o666})
+    item = Item({b'path': '/a/b/c', b'mode': 0o666})
    assert item.path == '/a/b/c'
    assert item.mode == 0o666
    assert 'path' in item

    # does not matter whether we get str or bytes keys
-    item = Item({'path': b'/a/b/c', 'mode': 0o666})
+    item = Item({'path': '/a/b/c', 'mode': 0o666})
    assert item.path == '/a/b/c'
    assert item.mode == 0o666
    assert 'mode' in item
@ -60,7 +60,7 @@ def test_item_from_dict():


 def test_item_from_kw():
-    item = Item(path=b'/a/b/c', mode=0o666)
+    item = Item(path='/a/b/c', mode=0o666)
    assert item.path == '/a/b/c'
    assert item.mode == 0o666

@ -107,7 +107,7 @@ def test_item_se_str_property():
        item.path = 42

    # non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut
-    item = Item({'path': b'/a/\xfc/c'})
+    item = Item(internal_dict={'path': b'/a/\xfc/c'})
    assert item.path == '/a/\udcfc/c'  # getting a surrogate-escaped representation
    assert item.as_dict() == {'path': b'/a/\xfc/c'}
    del item.path
--- a/src/borg/testsuite/platform.py
+++ b/src/borg/testsuite/platform.py
@ -51,26 +51,26 @@ class PlatformLinuxTestCase(BaseTestCase):
        return item

    def set_acl(self, path, access=None, default=None, numeric_owner=False):
-        item = {b'acl_access': access, b'acl_default': default}
+        item = {'acl_access': access, 'acl_default': default}
        acl_set(path, item, numeric_owner=numeric_owner)

    def test_access_acl(self):
        file = tempfile.NamedTemporaryFile()
        self.assert_equal(self.get_acl(file.name), {})
        self.set_acl(file.name, access=b'user::rw-\ngroup::r--\nmask::rw-\nother::---\nuser:root:rw-:9999\ngroup:root:rw-:9999\n', numeric_owner=False)
-        self.assert_in(b'user:root:rw-:0', self.get_acl(file.name)[b'acl_access'])
-        self.assert_in(b'group:root:rw-:0', self.get_acl(file.name)[b'acl_access'])
-        self.assert_in(b'user:0:rw-:0', self.get_acl(file.name, numeric_owner=True)[b'acl_access'])
+        self.assert_in(b'user:root:rw-:0', self.get_acl(file.name)['acl_access'])
+        self.assert_in(b'group:root:rw-:0', self.get_acl(file.name)['acl_access'])
+        self.assert_in(b'user:0:rw-:0', self.get_acl(file.name, numeric_owner=True)['acl_access'])
        file2 = tempfile.NamedTemporaryFile()
        self.set_acl(file2.name, access=b'user::rw-\ngroup::r--\nmask::rw-\nother::---\nuser:root:rw-:9999\ngroup:root:rw-:9999\n', numeric_owner=True)
-        self.assert_in(b'user:9999:rw-:9999', self.get_acl(file2.name)[b'acl_access'])
-        self.assert_in(b'group:9999:rw-:9999', self.get_acl(file2.name)[b'acl_access'])
+        self.assert_in(b'user:9999:rw-:9999', self.get_acl(file2.name)['acl_access'])
+        self.assert_in(b'group:9999:rw-:9999', self.get_acl(file2.name)['acl_access'])

    def test_default_acl(self):
        self.assert_equal(self.get_acl(self.tmpdir), {})
        self.set_acl(self.tmpdir, access=ACCESS_ACL, default=DEFAULT_ACL)
-        self.assert_equal(self.get_acl(self.tmpdir)[b'acl_access'], ACCESS_ACL)
-        self.assert_equal(self.get_acl(self.tmpdir)[b'acl_default'], DEFAULT_ACL)
+        self.assert_equal(self.get_acl(self.tmpdir)['acl_access'], ACCESS_ACL)
+        self.assert_equal(self.get_acl(self.tmpdir)['acl_default'], DEFAULT_ACL)

    def test_non_ascii_acl(self):
        # Testing non-ascii ACL processing to see whether our code is robust.
@ -86,18 +86,18 @@ class PlatformLinuxTestCase(BaseTestCase):
        group_entry_numeric = 'group:666:rw-:666'.encode('ascii')
        acl = b'\n'.join([nothing_special, user_entry, group_entry])
        self.set_acl(file.name, access=acl, numeric_owner=False)
-        acl_access = self.get_acl(file.name, numeric_owner=False)[b'acl_access']
+        acl_access = self.get_acl(file.name, numeric_owner=False)['acl_access']
        self.assert_in(user_entry, acl_access)
        self.assert_in(group_entry, acl_access)
-        acl_access_numeric = self.get_acl(file.name, numeric_owner=True)[b'acl_access']
+        acl_access_numeric = self.get_acl(file.name, numeric_owner=True)['acl_access']
        self.assert_in(user_entry_numeric, acl_access_numeric)
        self.assert_in(group_entry_numeric, acl_access_numeric)
        file2 = tempfile.NamedTemporaryFile()
        self.set_acl(file2.name, access=acl, numeric_owner=True)
-        acl_access = self.get_acl(file2.name, numeric_owner=False)[b'acl_access']
+        acl_access = self.get_acl(file2.name, numeric_owner=False)['acl_access']
        self.assert_in(user_entry, acl_access)
        self.assert_in(group_entry, acl_access)
-        acl_access_numeric = self.get_acl(file.name, numeric_owner=True)[b'acl_access']
+        acl_access_numeric = self.get_acl(file.name, numeric_owner=True)['acl_access']
        self.assert_in(user_entry_numeric, acl_access_numeric)
        self.assert_in(group_entry_numeric, acl_access_numeric)

@ -125,7 +125,7 @@ class PlatformDarwinTestCase(BaseTestCase):
        return item

    def set_acl(self, path, acl, numeric_owner=False):
-        item = {b'acl_extended': acl}
+        item = {'acl_extended': acl}
        acl_set(path, item, numeric_owner=numeric_owner)

    def test_access_acl(self):
@ -133,11 +133,11 @@ class PlatformDarwinTestCase(BaseTestCase):
        file2 = tempfile.NamedTemporaryFile()
        self.assert_equal(self.get_acl(file.name), {})
        self.set_acl(file.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=False)
-        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000014:staff:20:allow:read', self.get_acl(file.name)[b'acl_extended'])
-        self.assert_in(b'user:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read', self.get_acl(file.name)[b'acl_extended'])
+        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000014:staff:20:allow:read', self.get_acl(file.name)['acl_extended'])
+        self.assert_in(b'user:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read', self.get_acl(file.name)['acl_extended'])
        self.set_acl(file2.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=True)
-        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)[b'acl_extended'])
-        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)[b'acl_extended'])
+        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)['acl_extended'])
+        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)['acl_extended'])


@unittest.skipUnless(sys.platform.startswith(('linux', 'freebsd', 'darwin')), 'POSIX only tests')