diff --git a/src/borg/repository.py b/src/borg/repository.py index 1a7690ad6..2fb2a5aad 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -415,9 +415,8 @@ class Repository: """Set the ChunkIndex get() uses to resolve pack locations. The caller retains ownership; Repository holds a borrowed reference. - Pass None to reset to an empty index. """ - self._chunks = chunks if chunks is not None else ChunkIndex() + self._chunks = chunks def flush(self): """Flush any buffered pack writer chunks.""" @@ -435,7 +434,6 @@ class Repository: if self.store_opened: self.store.close() self.store_opened = False - self._chunks = None self.opened = False def info(self): @@ -612,11 +610,12 @@ class Repository: def get(self, id, read_data=True, raise_missing=True): self._lock_refresh() - pack_id = id # N=1 fallback: pack_id == chunk_id - obj_offset, obj_size = 0, None entry = self._chunks.get(id) - if entry is not None and entry.pack_id != UNKNOWN_BYTES32: # UNKNOWN: buffered, not yet flushed - pack_id, obj_offset, obj_size = entry.pack_id, entry.obj_offset, entry.obj_size + if entry is None or entry.pack_id == UNKNOWN_BYTES32: + if raise_missing: + raise self.ObjectNotFound(id, str(self._location)) + return None + pack_id, obj_offset, obj_size = entry.pack_id, entry.obj_offset, entry.obj_size id_hex = bin_to_hex(id) key = "packs/" + bin_to_hex(pack_id) try: @@ -672,7 +671,11 @@ class Repository: data_size = len(data) if data_size > MAX_DATA_SIZE: raise IntegrityError(f"More than allowed put data [{data_size} > {MAX_DATA_SIZE}]") - return self._pack_writer.add(id, data) + pack_results = self._pack_writer.add(id, data) + self._chunks.add(id, 0) # mark seen; uncompressed size filled in by cache layer + if pack_results: + self._chunks.update_pack_info(pack_results) + return pack_results def delete(self, id, wait=True): """delete a repo object diff --git a/src/borg/testsuite/repository_test.py b/src/borg/testsuite/repository_test.py index 04ba25858..bba7faea4 100644 --- a/src/borg/testsuite/repository_test.py +++ b/src/borg/testsuite/repository_test.py @@ -77,15 +77,21 @@ def pdchunk(chunk): def test_basic_operations(repo_fixtures, request): + chunks = ChunkIndex() with get_repository_from_fixture(repo_fixtures, request) as repository: for x in range(100): - repository.put(H(x), fchunk(b"SOMEDATA")) + pack_results = repository.put(H(x), fchunk(b"SOMEDATA")) + if pack_results: + for chunk_id, *_ in pack_results: + chunks.add(chunk_id, 0) + chunks.update_pack_info(pack_results) key50 = H(50) assert pdchunk(repository.get(key50)) == b"SOMEDATA" repository.delete(key50) with pytest.raises(Repository.ObjectNotFound): repository.get(key50) with reopen(repository) as repository: + repository.set_chunk_index(chunks) with pytest.raises(Repository.ObjectNotFound): repository.get(key50) for x in range(100): @@ -256,6 +262,18 @@ def test_get_uses_chunk_index_location(tmp_path): assert repository.get(id2) == chunk2 +def test_put_marks_id_in_chunk_index(tmp_path): + # put() immediately updates _chunks: add() marks the id as seen, then update_pack_info + # fills in the real pack location for the current session. + with Repository(str(tmp_path / "repo"), exclusive=True, create=True) as repository: + id1 = H(1) + repository.put(id1, fchunk(b"ZEROS")) + entry = repository._chunks.get(id1) + assert entry is not None + assert entry.pack_id == id1 # N=1: pack_id == chunk_id, set by update_pack_info in put() + assert entry.size == 0 # uncompressed size filled in by cache layer + + def test_pack_writer_final_partial_pack_uses_sha256(): # When max_count > 1, a final flush with only 1 piece must still use SHA256, # not the N=1 pack_id == chunk_id hack.