Merge pull request #9673 from ThomasWaldmann/remove-put2

Remove put2 / repository v2
2026-06-10 09:21:44 -04:00 · 2026-05-28 23:58:13 +02:00 · 2026-05-28 23:58:13 +02:00 · 47966351bc
commit 47966351bc
parent 33afaa1f3c 828e72be72
9 changed files with 53 additions and 121 deletions
--- a/src/borg/archiver/_common.py
+++ b/src/borg/archiver/_common.py
@ -29,9 +29,9 @@ from ..logger import create_logger
 logger = create_logger(__name__)


-def get_repository(location, *, create, exclusive, lock_wait, lock, args, v1_or_v2):
+def get_repository(location, *, create, exclusive, lock_wait, lock, args, v1_legacy):
    if location.proto in ("ssh", "socket"):
-        if v1_or_v2:
+        if v1_legacy:
            from ..legacy.remote import LegacyRemoteRepository

            RemoteRepoCls = LegacyRemoteRepository
@ -42,12 +42,12 @@ def get_repository(location, *, create, exclusive, lock_wait, lock, args, v1_or_
        )

    elif (
-        location.proto in ("sftp", "file", "http", "https", "rclone", "s3", "b2") and not v1_or_v2
+        location.proto in ("sftp", "file", "http", "https", "rclone", "s3", "b2") and not v1_legacy
    ):  # stuff directly supported by borgstore
        repository = Repository(location, create=create, exclusive=exclusive, lock_wait=lock_wait, lock=lock)

    else:
-        if v1_or_v2:
+        if v1_legacy:
            from ..legacy.repository import LegacyRepository

            RepoCls = LegacyRepository
@ -123,7 +123,7 @@ def with_repository(
                lock_wait=self.lock_wait,
                lock=lock,
                args=args,
-                v1_or_v2=False,
+                v1_legacy=False,
            )

            with repository:
@ -181,7 +181,7 @@ def with_other_repository(manifest=False, cache=False, compatibility=None):
            if not location.valid:  # nothing to do
                return method(self, args, **kwargs)

-            v1_or_v2 = getattr(args, "v1_or_v2", False)
+            v1_legacy = getattr(args, "v1_legacy", False)

            repository = get_repository(
                location,
@ -190,11 +190,11 @@ def with_other_repository(manifest=False, cache=False, compatibility=None):
                lock_wait=self.lock_wait,
                lock=True,
                args=args,
-                v1_or_v2=v1_or_v2,
+                v1_legacy=v1_legacy,
            )

            with repository:
-                acceptable_versions = (1, 2) if v1_or_v2 else (3,)
+                acceptable_versions = (1,) if v1_legacy else (3,)
                if repository.version not in acceptable_versions:
                    raise Error(
                        f"This borg version only accepts version {' or '.join(acceptable_versions)} "
--- a/src/borg/archiver/repo_create_cmd.py
+++ b/src/borg/archiver/repo_create_cmd.py
@ -204,7 +204,7 @@ class RepoCreateMixIn:
            help="reuse the key material from the other repository",
        )
        subparser.add_argument(
-            "--from-borg1", dest="v1_or_v2", action="store_true", help="other repository is Borg 1.x"
+            "--from-borg1", dest="v1_legacy", action="store_true", help="other repository is Borg 1.x"
        )
        subparser.add_argument(
            "-e",
--- a/src/borg/archiver/transfer_cmd.py
+++ b/src/borg/archiver/transfer_cmd.py
@ -175,9 +175,9 @@ class TransferMixIn:
        from .. import upgrade as upgrade_mod
        from ..legacy import upgrade as legacy_upgrade_mod

-        v1_or_v2 = getattr(args, "v1_or_v2", False)
+        v1_legacy = getattr(args, "v1_legacy", False)
        upgrader = args.upgrader
-        if upgrader == "NoOp" and v1_or_v2:
+        if upgrader == "NoOp" and v1_legacy:
            upgrader = "From12To20"

        try:
@ -350,7 +350,7 @@ class TransferMixIn:
            help="transfer archives from the other repository",
        )
        subparser.add_argument(
-            "--from-borg1", dest="v1_or_v2", action="store_true", help="other repository is borg 1.x"
+            "--from-borg1", dest="v1_legacy", action="store_true", help="other repository is borg 1.x"
        )
        subparser.add_argument(
            "--upgrader",
--- a/src/borg/constants.py
+++ b/src/borg/constants.py
@ -51,10 +51,8 @@ ROBJ_DONTCARE = "*"  # used to parse without type assertion (= accept any type)
 # the header, and the total size was set to precisely 20 MiB for borg < 1.3).
 MAX_DATA_SIZE = 20971479

-# MAX_OBJECT_SIZE = MAX_DATA_SIZE + len(PUT2 header)
-# note: for borg >= 1.3, this makes the MAX_OBJECT_SIZE grow slightly over the precise 20 MiB used by
-# borg < 1.3, but this is not expected to cause any issues.
-MAX_OBJECT_SIZE = MAX_DATA_SIZE + 41 + 8  # see assertion at end of repository module
+# MAX_OBJECT_SIZE = MAX_DATA_SIZE + len(PUT header)
+MAX_OBJECT_SIZE = MAX_DATA_SIZE + 41  # see assertion at end of repository module

 # How many segment files Borg puts into a single directory by default.
 DEFAULT_SEGMENTS_PER_DIR = 1000
--- a/src/borg/legacy/remote.py
+++ b/src/borg/legacy/remote.py
@ -337,7 +337,7 @@ class LegacyRemoteRepository:
                lock_wait=lock_wait,
                lock=lock,
                exclusive=exclusive,
-                v1_or_v2=True,  # make remote use LegacyRepository
+                v1_legacy=True,  # make remote use LegacyRepository
            )
            info = self.info()
            self.version = info["version"]
@ -636,8 +636,8 @@ class LegacyRemoteRepository:
                    if chunkid in self.chunkid_to_msgids:
                        self.ignore_responses.add(pop_preload_msgid(chunkid))

-    @api(since=parse_version("1.0.0"), v1_or_v2={"since": parse_version("2.0.0b10"), "previously": True})
-    def open(self, path, create=False, lock_wait=None, lock=True, exclusive=False, v1_or_v2=False):
+    @api(since=parse_version("1.0.0"), v1_legacy={"since": parse_version("2.0.0b21"), "previously": True})
+    def open(self, path, create=False, lock_wait=None, lock=True, exclusive=False, v1_legacy=False):
        """actual remoting is done via self.call in the @api decorator"""

    @api(since=parse_version("2.0.0a3"))
--- a/src/borg/legacy/repository.py
+++ b/src/borg/legacy/repository.py
@ -13,8 +13,6 @@ from itertools import islice
 from collections.abc import Callable
 from zlib import crc32

-import xxhash
-
 from ..constants import *  # NOQA
 from .hashindex import NSIndex1Entry, NSIndex1
 from ..helpers import Error, ErrorWithTraceback, IntegrityError, format_file_size, parse_file_size
@ -28,7 +26,6 @@ from ..fslocking import Lock, LockError, LockErrorT
 from ..logger import create_logger
 from ..manifest import Manifest, NoManifestError
 from ..platform import SaveFile, SyncFile, sync_dir, safe_fadvise
-from ..repoobj import RepoObj
 from ..crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError

 logger = create_logger(__name__)
@ -39,7 +36,6 @@ MAGIC_LEN = len(MAGIC)
 TAG_PUT = 0
 TAG_DELETE = 1
 TAG_COMMIT = 2
-TAG_PUT2 = 3

 # Highest ID usable as TAG_* value
 #
@ -54,9 +50,7 @@ FreeSpace: Callable[[], defaultdict] = partial(defaultdict, int)


 def header_size(tag):
-    if tag == TAG_PUT2:
-        size = LoggedIO.HEADER_ID_SIZE + LoggedIO.ENTRY_HASH_SIZE
-    elif tag == TAG_PUT or tag == TAG_DELETE:
+    if tag == TAG_PUT or tag == TAG_DELETE:
        size = LoggedIO.HEADER_ID_SIZE
    elif tag == TAG_COMMIT:
        size = LoggedIO.header_fmt.size
@ -789,7 +783,7 @@ class LegacyRepository:
                    continue
                in_index = self.index.get(key)
                is_index_object = in_index and (in_index.segment, in_index.offset) == (segment, offset)
-                if tag in (TAG_PUT2, TAG_PUT) and is_index_object:
+                if tag in (TAG_PUT,) and is_index_object:
                    try:
                        new_segment, offset = self.io.write_put(key, data, raise_full=True)
                    except LoggedIO.SegmentFull:
@ -799,7 +793,7 @@ class LegacyRepository:
                    segments.setdefault(new_segment, 0)
                    segments[new_segment] += 1
                    segments[segment] -= 1
-                elif tag in (TAG_PUT2, TAG_PUT) and not is_index_object:
+                elif tag in (TAG_PUT,) and not is_index_object:
                    # If this is a PUT shadowed by a later tag, then it will be gone when this segment is deleted after
                    # this loop. Therefore it is removed from the shadow index.
                    try:
@ -832,7 +826,7 @@ class LegacyRepository:
                        # Consider the following series of operations if we would not do this, i.e. this entire if:
                        # would be removed.
                        # Columns are segments, lines are different keys (line 1 = some key, line 2 = some other key)
-                        # Legend: P=TAG_PUT/TAG_PUT2, D=TAG_DELETE, c=commit, i=index is written for latest commit
+                        # Legend: P=TAG_PUT, D=TAG_DELETE, c=commit, i=index is written for latest commit
                        #
                        # Segment | 1     | 2   | 3
                        # --------+-------+-----+------
@ -922,7 +916,7 @@ class LegacyRepository:
        """some code shared between replay_segments and check"""
        self.segments[segment] = 0
        for tag, key, offset, size, _ in objects:
-            if tag in (TAG_PUT2, TAG_PUT):
+            if tag in (TAG_PUT,):
                try:
                    # If this PUT supersedes an older PUT, mark the old segment for compaction and count the free space
                    in_index = self.index[key]
@ -973,7 +967,7 @@ class LegacyRepository:

        self.compact[segment] = 0
        for tag, key, offset, size, _ in self.io.iter_objects(segment, read_data=False):
-            if tag in (TAG_PUT2, TAG_PUT):
+            if tag in (TAG_PUT,):
                in_index = self.index.get(key)
                if not in_index or (in_index.segment, in_index.offset) != (segment, offset):
                    # This PUT is superseded later.
@ -1203,7 +1197,7 @@ class LegacyRepository:
        # to keep a PUT in an earlier segment in the "effectively deleted" state.
        self.shadow_index.setdefault(id, []).append(segment)
        self.segments[segment] -= 1
-        self.compact[segment] += header_size(TAG_PUT2) + size
+        self.compact[segment] += header_size(TAG_PUT) + size
        segment, size = self.io.write_delete(id)
        self.compact[segment] += size
        self.segments.setdefault(segment, 0)
@ -1247,7 +1241,6 @@ class LoggedIO:
    COMMIT = crc_fmt.pack(crc32(_commit)) + _commit

    HEADER_ID_SIZE = header_fmt.size + 32
-    ENTRY_HASH_SIZE = 8

    def __init__(self, path, limit, segments_per_dir, capacity=90):
        self.path = path
@ -1493,7 +1486,7 @@ class LoggedIO:
        header = fd.read(self.header_fmt.size)
        while header:
            size, tag, key, data = self._read(
-                fd, header, segment, offset, (TAG_PUT2, TAG_DELETE, TAG_COMMIT, TAG_PUT), read_data=read_data
+                fd, header, segment, offset, (TAG_PUT, TAG_DELETE, TAG_COMMIT), read_data=read_data
            )
            # tuple[3]: corresponds to len(data) == length of the full chunk payload (meta_len+enc_meta+enc_data)
            # tuple[4]: data will be None if read_data is False.
@ -1534,20 +1527,7 @@ class LoggedIO:
                            if size_invalid or tag > MAX_TAG_ID:
                                d = d[1:]
                                continue
-                            if tag == TAG_PUT2:
-                                c_offset = self.HEADER_ID_SIZE + self.ENTRY_HASH_SIZE
-                                # skip if header is invalid
-                                if crc32(d[4:c_offset]) & 0xFFFFFFFF != crc:
-                                    d = d[1:]
-                                    continue
-                                # skip if content is invalid
-                                if (
-                                    self.entry_hash(d[4 : self.HEADER_ID_SIZE], d[c_offset:size])
-                                    != d[self.HEADER_ID_SIZE : c_offset]
-                                ):
-                                    d = d[1:]
-                                    continue
-                            elif tag in (TAG_DELETE, TAG_COMMIT, TAG_PUT):
+                            if tag in (TAG_DELETE, TAG_COMMIT, TAG_PUT):
                                if crc32(d[4:size]) & 0xFFFFFFFF != crc:
                                    d = d[1:]
                                    continue
@ -1560,12 +1540,6 @@ class LoggedIO:
                        del d
                        data.release()

-    def entry_hash(self, *data):
-        h = xxhash.xxh64()
-        for d in data:
-            h.update(d)
-        return h.digest()
-
    def read(self, segment, offset, id, *, read_data=True, expected_size=None):
        """
        Read entry from *segment* at *offset* with *id*.
@ -1577,7 +1551,7 @@ class LoggedIO:
        fd = self.get_fd(segment)
        fd.seek(offset)
        header = fd.read(self.header_fmt.size)
-        size, tag, key, data = self._read(fd, header, segment, offset, (TAG_PUT2, TAG_PUT), read_data=read_data)
+        size, tag, key, data = self._read(fd, header, segment, offset, (TAG_PUT,), read_data=read_data)
        if id != key:
            raise IntegrityError(
                f"Invalid segment entry header, is not for wanted id [segment {segment}, offset {offset}]"
@ -1594,14 +1568,10 @@ class LoggedIO:
        Code shared by read() and iter_objects().

        Confidence in returned data:
-        PUT2 tags, read_data == True: crc32 check (header) plus digest check (header+data)
-        PUT2 tags, read_data == False: crc32 check (header)
        PUT tags, read_data == True: crc32 check (header+data)
        PUT tags, read_data == False: crc32 check can not be done, all data obtained must be considered informational

        read_data == False behaviour:
-        PUT2 tags: return enough of the chunk so that the client is able to decrypt the metadata,
-                   do not read, but just seek over the data.
        PUT tags:  return None and just seek over the data.
        """

@ -1628,7 +1598,7 @@ class LoggedIO:
            raise IntegrityError(f"Invalid segment entry size {size} - too big [segment {segment}, offset {offset}]")
        if size < fmt.size:
            raise IntegrityError(f"Invalid segment entry size {size} - too small [segment {segment}, offset {offset}]")
-        if tag not in (TAG_PUT2, TAG_DELETE, TAG_COMMIT, TAG_PUT):
+        if tag not in (TAG_PUT, TAG_DELETE, TAG_COMMIT):
            raise IntegrityError(
                f"Invalid segment entry header, did not get a known tag " f"[segment {segment}, offset {offset}]"
            )
@ -1640,7 +1610,7 @@ class LoggedIO:
            check_crc32(crc, header)
            # that's all for COMMITs.
        else:
-            # all other tags (TAG_PUT2, TAG_DELETE, TAG_PUT) have a key
+            # all other tags (TAG_PUT, TAG_DELETE) have a key
            key = fd.read(32)
            length -= 32
            if len(key) != 32:
@ -1654,38 +1624,7 @@ class LoggedIO:
            else:
                # TAG_PUT: we can not do a crc32 header check here, because the crc32 is computed over header+data!
                #          for the check, see code below when read_data is True.
-                if tag == TAG_PUT2:
-                    entry_hash = fd.read(self.ENTRY_HASH_SIZE)
-                    length -= self.ENTRY_HASH_SIZE
-                    if len(entry_hash) != self.ENTRY_HASH_SIZE:
-                        raise IntegrityError(
-                            f"Segment entry hash short read [segment {segment}, offset {offset}]: "
-                            f"expected {self.ENTRY_HASH_SIZE}, got {len(entry_hash)} bytes"
-                        )
-                    check_crc32(crc, header, key, entry_hash)
                if not read_data:
-                    if tag == TAG_PUT2:
-                        # PUT2 is only used in new repos and they also have different RepoObj layout,
-                        # supporting separately encrypted metadata and data.
-                        # In this case, we return enough bytes so the client can decrypt the metadata
-                        # and seek over the rest (over the encrypted data).
-                        hdr_size = RepoObj.obj_header.size
-                        hdr = fd.read(hdr_size)
-                        length -= hdr_size
-                        if len(hdr) != hdr_size:
-                            raise IntegrityError(
-                                f"Segment entry meta length short read [segment {segment}, offset {offset}]: "
-                                f"expected {hdr_size}, got {len(hdr)} bytes"
-                            )
-                        meta_size = RepoObj.obj_header.unpack(hdr)[0]
-                        meta = fd.read(meta_size)
-                        length -= meta_size
-                        if len(meta) != meta_size:
-                            raise IntegrityError(
-                                f"Segment entry meta short read [segment {segment}, offset {offset}]: "
-                                f"expected {meta_size}, got {len(meta)} bytes"
-                            )
-                        data = hdr + meta  # shortened chunk - enough so the client can decrypt the metadata
                    # in any case, we seek over the remainder of the chunk
                    oldpos = fd.tell()
                    seeked = fd.seek(length, os.SEEK_CUR) - oldpos
@ -1701,10 +1640,7 @@ class LoggedIO:
                            f"Segment entry data short read [segment {segment}, offset {offset}]: "
                            f"expected {length}, got {len(data)} bytes"
                        )
-                    if tag == TAG_PUT2:
-                        if self.entry_hash(memoryview(header)[4:], key, data) != entry_hash:
-                            raise IntegrityError(f"Segment entry hash mismatch [segment {segment}, offset {offset}]")
-                    elif tag == TAG_PUT:
+                    if tag == TAG_PUT:
                        check_crc32(crc, header, key, data)
        return size, tag, key, data

@ -1714,12 +1650,11 @@ class LoggedIO:
            # this would push the segment entry size beyond MAX_OBJECT_SIZE.
            raise IntegrityError(f"More than allowed put data [{data_size} > {MAX_DATA_SIZE}]")
        fd = self.get_write_fd(want_new=(id == Manifest.MANIFEST_ID), raise_full=raise_full)
-        size = data_size + self.HEADER_ID_SIZE + self.ENTRY_HASH_SIZE
+        size = data_size + self.HEADER_ID_SIZE
        offset = self.offset
-        header = self.header_no_crc_fmt.pack(size, TAG_PUT2)
-        entry_hash = self.entry_hash(header, id, data)
-        crc = self.crc_fmt.pack(crc32(entry_hash, crc32(id, crc32(header))) & 0xFFFFFFFF)
-        fd.write(b"".join((crc, header, id, entry_hash)))
+        header = self.header_no_crc_fmt.pack(size, TAG_PUT)
+        crc = self.crc_fmt.pack(crc32(data, crc32(id, crc32(header))) & 0xFFFFFFFF)
+        fd.write(b"".join((crc, header, id)))
        fd.write(data)
        self.offset += size
        return self.segment, offset
@ -1745,4 +1680,4 @@ class LoggedIO:
        return self.segment - 1  # close_segment() increments it


-assert LoggedIO.HEADER_ID_SIZE + LoggedIO.ENTRY_HASH_SIZE == 41 + 8  # see constants.MAX_OBJECT_SIZE
+assert LoggedIO.HEADER_ID_SIZE == 41  # see constants.MAX_OBJECT_SIZE
--- a/src/borg/remote.py
+++ b/src/borg/remote.py
@ -356,14 +356,14 @@ class RepositoryServer:  # pragma: no cover
        path = os.path.realpath(path)
        return path

-    def open(self, path, create=False, lock_wait=None, lock=True, exclusive=None, v1_or_v2=False):
-        if v1_or_v2:
+    def open(self, path, create=False, lock_wait=None, lock=True, exclusive=None, v1_legacy=False):
+        if v1_legacy:
            from .legacy.repository import LegacyRepository

            self.RepoCls = LegacyRepository
        else:
            self.RepoCls = Repository
-        self.rpc_methods = self._legacy_rpc_methods if v1_or_v2 else self._rpc_methods
+        self.rpc_methods = self._legacy_rpc_methods if v1_legacy else self._rpc_methods
        logging.debug("Resolving repository path %r", path)
        path = self._resolve_path(path)
        logging.debug("Resolved repository path to %r", path)
@ -386,7 +386,7 @@ class RepositoryServer:  # pragma: no cover
            else:
                raise PathNotAllowed(path)
        kwargs = dict(lock_wait=lock_wait, lock=lock, exclusive=exclusive, send_log_cb=self.send_queued_log)
-        if not v1_or_v2:
+        if not v1_legacy:
            kwargs["permissions"] = self.permissions
        self.repository = self.RepoCls(path, create, **kwargs)
        self.repository.__enter__()  # clean exit handled by serve() method
@ -957,8 +957,8 @@ class RemoteRepository:
                    if chunkid in self.chunkid_to_msgids:
                        self.ignore_responses.add(pop_preload_msgid(chunkid))

-    @api(since=parse_version("1.0.0"), v1_or_v2={"since": parse_version("2.0.0b9"), "previously": True})
-    def open(self, path, create=False, lock_wait=None, lock=True, exclusive=False, v1_or_v2=False):
+    @api(since=parse_version("1.0.0"), v1_legacy={"since": parse_version("2.0.0b21"), "previously": True})
+    def open(self, path, create=False, lock_wait=None, lock=True, exclusive=False, v1_legacy=False):
        """actual remoting is done via self.call in the @api decorator"""

    @api(since=parse_version("2.0.0a3"))
--- a/src/borg/testsuite/archiver/_common_test.py
+++ b/src/borg/testsuite/archiver/_common_test.py
@ -2,29 +2,29 @@ from unittest.mock import MagicMock, patch


 def test_get_repository_ssh_v1_uses_legacy_remote():
-    """get_repository picks LegacyRemoteRepository when proto=ssh and v1_or_v2=True."""
+    """get_repository picks LegacyRemoteRepository when proto=ssh and v1_legacy=True."""
    from ...archiver._common import get_repository

    location = MagicMock()
    location.proto = "ssh"

    with patch("borg.legacy.remote.LegacyRemoteRepository") as mock_cls:
-        get_repository(location, create=False, exclusive=False, lock_wait=None, lock=True, args=None, v1_or_v2=True)
+        get_repository(location, create=False, exclusive=False, lock_wait=None, lock=True, args=None, v1_legacy=True)

    mock_cls.assert_called_once_with(location, create=False, exclusive=False, lock_wait=None, lock=True, args=None)


 def test_get_repository_local_v1_uses_legacy_repository(tmp_path):
-    """get_repository picks LegacyRepository for a local-style path when v1_or_v2=True."""
+    """get_repository picks LegacyRepository for a local-style path when v1_legacy=True."""
    from ...archiver._common import get_repository

-    # proto="file" with v1_or_v2=True skips the borgstore elif (which requires not v1_or_v2)
+    # proto="file" with v1_legacy=True skips the borgstore elif (which requires not v1_legacy)
    # and falls to the else branch where LegacyRepository is imported.
    location = MagicMock()
    location.proto = "file"
    location.path = str(tmp_path)

    with patch("borg.legacy.repository.LegacyRepository") as mock_cls:
-        get_repository(location, create=False, exclusive=False, lock_wait=None, lock=True, args=None, v1_or_v2=True)
+        get_repository(location, create=False, exclusive=False, lock_wait=None, lock=True, args=None, v1_legacy=True)

    mock_cls.assert_called_once_with(str(tmp_path), create=False, exclusive=False, lock_wait=None, lock=True)
--- a/src/borg/testsuite/legacyrepository_test.py
+++ b/src/borg/testsuite/legacyrepository_test.py
@ -15,7 +15,7 @@ from ..fslocking import Lock, LockFailed
 from ..platformflags import is_win32
 from ..legacy.remote import LegacyRemoteRepository, InvalidRPCMethod, PathNotAllowed
 from ..legacy.repository import LegacyRepository, LoggedIO
-from ..legacy.repository import MAGIC, MAX_DATA_SIZE, TAG_DELETE, TAG_PUT2, TAG_PUT, TAG_COMMIT
+from ..legacy.repository import MAGIC, MAX_DATA_SIZE, TAG_DELETE, TAG_PUT, TAG_COMMIT
 from ..repoobj import RepoObj
 from .hashindex_test import H

@ -110,7 +110,7 @@ def repo_dump(repository, label=None):
    label = label + ": " if label is not None else ""
    H_trans = {H(i): i for i in range(10)}
    H_trans[None] = -1  # key == None appears in commits
-    tag_trans = {TAG_PUT2: "put2", TAG_PUT: "put", TAG_DELETE: "del", TAG_COMMIT: "comm"}
+    tag_trans = {TAG_PUT: "put", TAG_DELETE: "del", TAG_COMMIT: "comm"}
    for segment, fn in repository.io.segment_iterator():
        for tag, key, offset, size, _ in repository.io.iter_objects(segment):
            print("%s%s H(%d) -> %s[%d..+%d]" % (label, tag_trans[tag], H_trans[key], fn, offset, size))
@ -152,12 +152,11 @@ def test_read_data(repo_fixtures, request):
        meta, data = b"meta", b"data"
        hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta).digest(), xxh64(data).digest())
        chunk_complete = hdr + meta + data
-        chunk_short = hdr + meta
        repository.put(H(0), chunk_complete)
        repository.commit(compact=False)
        assert repository.get(H(0)) == chunk_complete
        assert repository.get(H(0), read_data=True) == chunk_complete
-        assert repository.get(H(0), read_data=False) == chunk_short
+        assert repository.get(H(0), read_data=False) is None


 def test_consistency(repo_fixtures, request):
@ -235,13 +234,13 @@ def test_max_data_size(repo_fixtures, request):

 def _assert_sparse(repository):
    # the superseded 123456... PUT
-    assert repository.compact[0] == 41 + 8 + 0  # len(fchunk(b"123456789"))
+    assert repository.compact[0] == 41 + 0  # len(fchunk(b"123456789"))
    # a COMMIT
    assert repository.compact[1] == 9
    # the DELETE issued by the superseding PUT (or issued directly)
    assert repository.compact[2] == 41
    repository._rebuild_sparse(0)
-    assert repository.compact[0] == 41 + 8 + len(fchunk(b"123456789"))  # 9 is chunk or commit?
+    assert repository.compact[0] == 41 + len(fchunk(b"123456789"))  # 9 is chunk or commit?


 def test_sparse1(repository):
@ -269,10 +268,10 @@ def test_sparse_delete(repository):
        repository.delete(H(0))
        repository.io._write_fd.sync()
        # the on-line tracking works on a per-object basis...
-        assert repository.compact[0] == 41 + 8 + 41 + 0  # len(chunk0) information is lost
+        assert repository.compact[0] == 41 + 41 + 0  # len(chunk0) information is lost
        repository._rebuild_sparse(0)
        # ...while _rebuild_sparse can mark whole segments as completely sparse (which then includes the segment magic)
-        assert repository.compact[0] == 41 + 8 + 41 + len(chunk0) + len(MAGIC)
+        assert repository.compact[0] == 41 + 41 + len(chunk0) + len(MAGIC)
        repository.commit(compact=True)
        assert 0 not in [segment for segment, _ in repository.io.segment_iterator()]