mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-15 04:21:38 -04:00
repository: remove N=1 fallback from get(), update _chunks eagerly in put()
get() raises ObjectNotFound when entry is missing or UNKNOWN_BYTES32; put() marks the id in _chunks immediately so the index is live after each write.
This commit is contained in:
parent
54adb9b7f6
commit
1df2065f85
2 changed files with 30 additions and 9 deletions
|
|
@ -415,9 +415,8 @@ class Repository:
|
|||
"""Set the ChunkIndex get() uses to resolve pack locations.
|
||||
|
||||
The caller retains ownership; Repository holds a borrowed reference.
|
||||
Pass None to reset to an empty index.
|
||||
"""
|
||||
self._chunks = chunks if chunks is not None else ChunkIndex()
|
||||
self._chunks = chunks
|
||||
|
||||
def flush(self):
|
||||
"""Flush any buffered pack writer chunks."""
|
||||
|
|
@ -435,7 +434,6 @@ class Repository:
|
|||
if self.store_opened:
|
||||
self.store.close()
|
||||
self.store_opened = False
|
||||
self._chunks = None
|
||||
self.opened = False
|
||||
|
||||
def info(self):
|
||||
|
|
@ -612,11 +610,12 @@ class Repository:
|
|||
|
||||
def get(self, id, read_data=True, raise_missing=True):
|
||||
self._lock_refresh()
|
||||
pack_id = id # N=1 fallback: pack_id == chunk_id
|
||||
obj_offset, obj_size = 0, None
|
||||
entry = self._chunks.get(id)
|
||||
if entry is not None and entry.pack_id != UNKNOWN_BYTES32: # UNKNOWN: buffered, not yet flushed
|
||||
pack_id, obj_offset, obj_size = entry.pack_id, entry.obj_offset, entry.obj_size
|
||||
if entry is None or entry.pack_id == UNKNOWN_BYTES32:
|
||||
if raise_missing:
|
||||
raise self.ObjectNotFound(id, str(self._location))
|
||||
return None
|
||||
pack_id, obj_offset, obj_size = entry.pack_id, entry.obj_offset, entry.obj_size
|
||||
id_hex = bin_to_hex(id)
|
||||
key = "packs/" + bin_to_hex(pack_id)
|
||||
try:
|
||||
|
|
@ -672,7 +671,11 @@ class Repository:
|
|||
data_size = len(data)
|
||||
if data_size > MAX_DATA_SIZE:
|
||||
raise IntegrityError(f"More than allowed put data [{data_size} > {MAX_DATA_SIZE}]")
|
||||
return self._pack_writer.add(id, data)
|
||||
pack_results = self._pack_writer.add(id, data)
|
||||
self._chunks.add(id, 0) # mark seen; uncompressed size filled in by cache layer
|
||||
if pack_results:
|
||||
self._chunks.update_pack_info(pack_results)
|
||||
return pack_results
|
||||
|
||||
def delete(self, id, wait=True):
|
||||
"""delete a repo object
|
||||
|
|
|
|||
|
|
@ -77,15 +77,21 @@ def pdchunk(chunk):
|
|||
|
||||
|
||||
def test_basic_operations(repo_fixtures, request):
|
||||
chunks = ChunkIndex()
|
||||
with get_repository_from_fixture(repo_fixtures, request) as repository:
|
||||
for x in range(100):
|
||||
repository.put(H(x), fchunk(b"SOMEDATA"))
|
||||
pack_results = repository.put(H(x), fchunk(b"SOMEDATA"))
|
||||
if pack_results:
|
||||
for chunk_id, *_ in pack_results:
|
||||
chunks.add(chunk_id, 0)
|
||||
chunks.update_pack_info(pack_results)
|
||||
key50 = H(50)
|
||||
assert pdchunk(repository.get(key50)) == b"SOMEDATA"
|
||||
repository.delete(key50)
|
||||
with pytest.raises(Repository.ObjectNotFound):
|
||||
repository.get(key50)
|
||||
with reopen(repository) as repository:
|
||||
repository.set_chunk_index(chunks)
|
||||
with pytest.raises(Repository.ObjectNotFound):
|
||||
repository.get(key50)
|
||||
for x in range(100):
|
||||
|
|
@ -256,6 +262,18 @@ def test_get_uses_chunk_index_location(tmp_path):
|
|||
assert repository.get(id2) == chunk2
|
||||
|
||||
|
||||
def test_put_marks_id_in_chunk_index(tmp_path):
|
||||
# put() immediately updates _chunks: add() marks the id as seen, then update_pack_info
|
||||
# fills in the real pack location for the current session.
|
||||
with Repository(str(tmp_path / "repo"), exclusive=True, create=True) as repository:
|
||||
id1 = H(1)
|
||||
repository.put(id1, fchunk(b"ZEROS"))
|
||||
entry = repository._chunks.get(id1)
|
||||
assert entry is not None
|
||||
assert entry.pack_id == id1 # N=1: pack_id == chunk_id, set by update_pack_info in put()
|
||||
assert entry.size == 0 # uncompressed size filled in by cache layer
|
||||
|
||||
|
||||
def test_pack_writer_final_partial_pack_uses_sha256():
|
||||
# When max_count > 1, a final flush with only 1 piece must still use SHA256,
|
||||
# not the N=1 pack_id == chunk_id hack.
|
||||
|
|
|
|||
Loading…
Reference in a new issue