diff --git a/src/borg/repository.py b/src/borg/repository.py index 297a3eb1f..690a8f959 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -774,6 +774,8 @@ class Repository: try: self.shadow_index[key].remove(segment) except (KeyError, ValueError): + # do not remove entry with empty shadowed_segments list here, + # it is needed for shadowed_put_exists code (see below)! pass elif tag == TAG_DELETE and not in_index: # If the shadow index doesn't contain this key, then we can't say if there's a shadowed older tag, @@ -825,6 +827,11 @@ class Repository: new_segment, size = self.io.write_delete(key) self.compact[new_segment] += size segments.setdefault(new_segment, 0) + else: + # we did not keep the delete tag for key (see if-branch) + if not self.shadow_index[key]: + # shadowed segments list is empty -> remove it + del self.shadow_index[key] assert segments[segment] == 0, 'Corrupted segment reference count - corrupted index or hints' unused.append(segment) pi.show() @@ -1135,13 +1142,11 @@ class Repository: except KeyError: pass else: - self.segments[segment] -= 1 - size = self.io.read(segment, offset, id, read_data=False) - self.storage_quota_use -= size - self.compact[segment] += size - segment, size = self.io.write_delete(id) - self.compact[segment] += size - self.segments.setdefault(segment, 0) + # note: doing a delete first will do some bookkeeping. + # we do not want to update the shadow_index here, because + # we know already that we will PUT to this id, so it will + # be in the repo index (and we won't need it in the shadow_index). + self._delete(id, segment, offset, update_shadow_index=False) segment, offset = self.io.write_put(id, data) self.storage_quota_use += len(data) + self.io.put_header_fmt.size self.segments.setdefault(segment, 0) @@ -1164,7 +1169,16 @@ class Repository: segment, offset = self.index.pop(id) except KeyError: raise self.ObjectNotFound(id, self.path) from None - self.shadow_index.setdefault(id, []).append(segment) + # if we get here, there is an object with this id in the repo, + # we write a DEL here that shadows the respective PUT. + # after the delete, the object is not in the repo index any more, + # for the compaction code, we need to update the shadow_index in this case. + self._delete(id, segment, offset, update_shadow_index=True) + + def _delete(self, id, segment, offset, *, update_shadow_index): + # common code used by put and delete + if update_shadow_index: + self.shadow_index.setdefault(id, []).append(segment) self.segments[segment] -= 1 size = self.io.read(segment, offset, id, read_data=False) self.storage_quota_use -= size diff --git a/src/borg/testsuite/repository.py b/src/borg/testsuite/repository.py index be203800e..11225b2f5 100644 --- a/src/borg/testsuite/repository.py +++ b/src/borg/testsuite/repository.py @@ -14,7 +14,7 @@ from ..helpers import IntegrityError from ..helpers import msgpack from ..locking import Lock, LockFailed from ..remote import RemoteRepository, InvalidRPCMethod, PathNotAllowed, ConnectionClosedWithHint, handle_remote_line -from ..repository import Repository, LoggedIO, MAGIC, MAX_DATA_SIZE, TAG_DELETE +from ..repository import Repository, LoggedIO, MAGIC, MAX_DATA_SIZE, TAG_DELETE, TAG_PUT, TAG_COMMIT from . import BaseTestCase from .hashindex import H @@ -54,6 +54,16 @@ class RepositoryTestCaseBase(BaseTestCase): self.repository.put(H(2), b'boo') self.repository.delete(H(3)) + def repo_dump(self, label=None): + label = label + ': ' if label is not None else '' + H_trans = {H(i): i for i in range(10)} + H_trans[None] = -1 # key == None appears in commits + tag_trans = {TAG_PUT: 'put', TAG_DELETE: 'del', TAG_COMMIT: 'comm'} + for segment, fn in self.repository.io.segment_iterator(): + for tag, key, offset, size in self.repository.io.iter_objects(segment): + print("%s%s H(%d) -> %s[%d..+%d]" % (label, tag_trans[tag], H_trans[key], fn, offset, size)) + print() + class RepositoryTestCase(RepositoryTestCaseBase): @@ -313,8 +323,10 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): self.repository.put(H(1), b'1') self.repository.put(H(2), b'2') self.repository.commit() + self.repo_dump('p1 p2 cc') self.repository.delete(H(1)) self.repository.commit() + self.repo_dump('d1 cc') last_segment = self.repository.io.get_latest_segment() - 1 num_deletes = 0 for tag, key, offset, size in self.repository.io.iter_objects(last_segment): @@ -325,11 +337,16 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): assert last_segment in self.repository.compact self.repository.put(H(3), b'3') self.repository.commit() + self.repo_dump('p3 cc') assert last_segment not in self.repository.compact assert not self.repository.io.segment_exists(last_segment) for segment, _ in self.repository.io.segment_iterator(): for tag, key, offset, size in self.repository.io.iter_objects(segment): assert tag != TAG_DELETE + assert key != H(1) + # after compaction, there should be no empty shadowed_segments lists left over. + # we have no put or del any more for H(1), so we lost knowledge about H(1). + assert H(1) not in self.repository.shadow_index def test_shadowed_entries_are_preserved(self): get_latest_segment = self.repository.io.get_latest_segment @@ -370,16 +387,19 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): self.repository.delete(H(1)) assert self.repository.shadow_index[H(1)] == [0] self.repository.commit() + self.repo_dump('p1 d1 cc') # note how an empty list means that nothing is shadowed for sure - assert self.repository.shadow_index[H(1)] == [] + assert self.repository.shadow_index[H(1)] == [] # because the delete is considered unstable self.repository.put(H(1), b'1') self.repository.delete(H(1)) + self.repo_dump('p1 d1') # 0 put/delete; 1 commit; 2 compacted; 3 commit; 4 put/delete assert self.repository.shadow_index[H(1)] == [4] self.repository.rollback() + self.repo_dump('r') self.repository.put(H(2), b'1') # After the rollback segment 4 shouldn't be considered anymore - assert self.repository.shadow_index[H(1)] == [] + assert self.repository.shadow_index[H(1)] == [] # because the delete is considered unstable class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase): @@ -790,6 +810,25 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase): self.assert_equal(self.repository.get(H(0)), b'data2') +class RepositoryHintsTestCase(RepositoryTestCaseBase): + + def test_hints_behaviour(self): + self.repository.put(H(0), b'data') + self.assert_equal(self.repository.shadow_index, {}) + self.assert_true(len(self.repository.compact) == 0) + self.repository.delete(H(0)) + self.repository.commit() + # this is to make the previous delete "stable", see delete_is_not_stable: + self.repository.put(H(1), b'data') + self.repository.commit() + # in 1.1-maint, commit() always implicitly compacts. + # nothing to compact any more! no info left about stuff that does not exist any more: + self.assert_not_in(H(0), self.repository.shadow_index) + # segment 0 was compacted away, no info about it left: + self.assert_not_in(0, self.repository.compact) + self.assert_not_in(0, self.repository.segments) + + class RemoteRepositoryTestCase(RepositoryTestCase): repository = None # type: RemoteRepository