diff --git a/src/borg/repository.py b/src/borg/repository.py index f02224e86..d538a37a5 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -1225,11 +1225,11 @@ class Repository: except KeyError: pass else: - # note: doing a delete first will do some bookkeeping. - # we do not want to update the shadow_index here, because - # we know already that we will PUT to this id, so it will - # be in the repo index (and we won't need it in the shadow_index). - self._delete(id, segment, offset, update_shadow_index=False) + # this put call supersedes a previous put to same id. + # it is essential to do a delete first to get correct quota bookkeeping + # and also a correctly updated shadow_index, so that the compaction code + # does not wrongly resurrect an old PUT by dropping a DEL that is still needed. + self._delete(id, segment, offset) segment, offset = self.io.write_put(id, data) self.storage_quota_use += len(data) + self.io.put_header_fmt.size self.segments.setdefault(segment, 0) @@ -1252,16 +1252,15 @@ class Repository: segment, offset = self.index.pop(id) except KeyError: raise self.ObjectNotFound(id, self.path) from None - # if we get here, there is an object with this id in the repo, - # we write a DEL here that shadows the respective PUT. - # after the delete, the object is not in the repo index any more, - # for the compaction code, we need to update the shadow_index in this case. - self._delete(id, segment, offset, update_shadow_index=True) + self._delete(id, segment, offset) - def _delete(self, id, segment, offset, *, update_shadow_index): + def _delete(self, id, segment, offset): # common code used by put and delete - if update_shadow_index: - self.shadow_index.setdefault(id, []).append(segment) + # because we'll write a DEL tag to the repository, we must update the shadow index. + # this is always true, no matter whether we are called from put() or delete(). + # the compaction code needs this to not drop DEL tags if they are still required + # to keep a PUT in an earlier segment in the "effectively deleted" state. + self.shadow_index.setdefault(id, []).append(segment) self.segments[segment] -= 1 size = self.io.read(segment, offset, id, read_data=False) self.compact[segment] += size diff --git a/src/borg/testsuite/repository.py b/src/borg/testsuite/repository.py index 39017d72a..b537ccc81 100644 --- a/src/borg/testsuite/repository.py +++ b/src/borg/testsuite/repository.py @@ -345,7 +345,8 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): # we have no put or del any more for H(1), so we lost knowledge about H(1). assert H(1) not in self.repository.shadow_index - def test_shadowed_entries_are_preserved(self): + def test_shadowed_entries_are_preserved1(self): + # this tests the shadowing-by-del behaviour get_latest_segment = self.repository.io.get_latest_segment self.repository.put(H(1), b'1') # This is the segment with our original PUT of interest @@ -379,6 +380,54 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase): # Must not reappear assert H(1) not in self.repository + def test_shadowed_entries_are_preserved2(self): + # this tests the shadowing-by-double-put behaviour, see issue #5661 + # assume this repo state: + # seg1: PUT H1 + # seg2: COMMIT + # seg3: DEL H1, PUT H1, DEL H1, PUT H2 + # seg4: COMMIT + # Note how due to the final DEL H1 in seg3, H1 is effectively deleted. + # + # compaction of only seg3: + # PUT H1 gets dropped because it is not needed any more. + # DEL H1 must be kept, because there is still a PUT H1 in seg1 which must not + # "reappear" in the index if the index gets rebuilt. + get_latest_segment = self.repository.io.get_latest_segment + self.repository.put(H(1), b'1') + # This is the segment with our original PUT of interest + put_segment = get_latest_segment() + self.repository.commit(compact=False) + + # We now put H(1) again (which implicitly does DEL(H(1)) followed by PUT(H(1), ...)), + # delete H(1) afterwards, and force this segment to not be compacted, which can happen + # if it's not sparse enough (symbolized by H(2) here). + self.repository.put(H(1), b'1') + self.repository.delete(H(1)) + self.repository.put(H(2), b'1') + delete_segment = get_latest_segment() + + # We pretend these are mostly dense (not sparse) and won't be compacted + del self.repository.compact[put_segment] + del self.repository.compact[delete_segment] + + self.repository.commit(compact=True) + + # Now we perform an unrelated operation on the segment containing the DELETE, + # causing it to be compacted. + self.repository.delete(H(2)) + self.repository.commit(compact=True) + + assert self.repository.io.segment_exists(put_segment) + assert not self.repository.io.segment_exists(delete_segment) + + # Basic case, since the index survived this must be ok + assert H(1) not in self.repository + # Nuke index, force replay + os.unlink(os.path.join(self.repository.path, 'index.%d' % get_latest_segment())) + # Must not reappear + assert H(1) not in self.repository + def test_shadow_index_rollback(self): self.repository.put(H(1), b'1') self.repository.delete(H(1))