repository: remove N=1 fallback from get(), update _chunks eagerly in put()

get() raises ObjectNotFound when entry is missing or UNKNOWN_BYTES32; put() marks the id in _chunks immediately so the index is live after each write.
2026-06-15 04:21:38 -04:00 · 2026-06-10 23:01:29 +05:30 · 2026-06-10 23:01:29 +05:30 · 1df2065f85
commit 1df2065f85
parent 54adb9b7f6
2 changed files with 30 additions and 9 deletions
--- a/src/borg/repository.py
+++ b/src/borg/repository.py
@ -415,9 +415,8 @@ class Repository:
        """Set the ChunkIndex get() uses to resolve pack locations.

        The caller retains ownership; Repository holds a borrowed reference.
-        Pass None to reset to an empty index.
        """
-        self._chunks = chunks if chunks is not None else ChunkIndex()
+        self._chunks = chunks

    def flush(self):
        """Flush any buffered pack writer chunks."""
@ -435,7 +434,6 @@ class Repository:
        if self.store_opened:
            self.store.close()
            self.store_opened = False
-        self._chunks = None
        self.opened = False

    def info(self):
@ -612,11 +610,12 @@ class Repository:

    def get(self, id, read_data=True, raise_missing=True):
        self._lock_refresh()
-        pack_id = id  # N=1 fallback: pack_id == chunk_id
-        obj_offset, obj_size = 0, None
        entry = self._chunks.get(id)
-        if entry is not None and entry.pack_id != UNKNOWN_BYTES32:  # UNKNOWN: buffered, not yet flushed
-            pack_id, obj_offset, obj_size = entry.pack_id, entry.obj_offset, entry.obj_size
+        if entry is None or entry.pack_id == UNKNOWN_BYTES32:
+            if raise_missing:
+                raise self.ObjectNotFound(id, str(self._location))
+            return None
+        pack_id, obj_offset, obj_size = entry.pack_id, entry.obj_offset, entry.obj_size
        id_hex = bin_to_hex(id)
        key = "packs/" + bin_to_hex(pack_id)
        try:
@ -672,7 +671,11 @@ class Repository:
        data_size = len(data)
        if data_size > MAX_DATA_SIZE:
            raise IntegrityError(f"More than allowed put data [{data_size} > {MAX_DATA_SIZE}]")
-        return self._pack_writer.add(id, data)
+        pack_results = self._pack_writer.add(id, data)
+        self._chunks.add(id, 0)  # mark seen; uncompressed size filled in by cache layer
+        if pack_results:
+            self._chunks.update_pack_info(pack_results)
+        return pack_results

    def delete(self, id, wait=True):
        """delete a repo object
--- a/src/borg/testsuite/repository_test.py
+++ b/src/borg/testsuite/repository_test.py
@ -77,15 +77,21 @@ def pdchunk(chunk):


 def test_basic_operations(repo_fixtures, request):
+    chunks = ChunkIndex()
    with get_repository_from_fixture(repo_fixtures, request) as repository:
        for x in range(100):
-            repository.put(H(x), fchunk(b"SOMEDATA"))
+            pack_results = repository.put(H(x), fchunk(b"SOMEDATA"))
+            if pack_results:
+                for chunk_id, *_ in pack_results:
+                    chunks.add(chunk_id, 0)
+                chunks.update_pack_info(pack_results)
        key50 = H(50)
        assert pdchunk(repository.get(key50)) == b"SOMEDATA"
        repository.delete(key50)
        with pytest.raises(Repository.ObjectNotFound):
            repository.get(key50)
    with reopen(repository) as repository:
+        repository.set_chunk_index(chunks)
        with pytest.raises(Repository.ObjectNotFound):
            repository.get(key50)
        for x in range(100):
@ -256,6 +262,18 @@ def test_get_uses_chunk_index_location(tmp_path):
        assert repository.get(id2) == chunk2


+def test_put_marks_id_in_chunk_index(tmp_path):
+    # put() immediately updates _chunks: add() marks the id as seen, then update_pack_info
+    # fills in the real pack location for the current session.
+    with Repository(str(tmp_path / "repo"), exclusive=True, create=True) as repository:
+        id1 = H(1)
+        repository.put(id1, fchunk(b"ZEROS"))
+        entry = repository._chunks.get(id1)
+        assert entry is not None
+        assert entry.pack_id == id1  # N=1: pack_id == chunk_id, set by update_pack_info in put()
+        assert entry.size == 0  # uncompressed size filled in by cache layer
+
+
 def test_pack_writer_final_partial_pack_uses_sha256():
    # When max_count > 1, a final flush with only 1 piece must still use SHA256,
    # not the N=1 pack_id == chunk_id hack.