diff --git a/src/borg/archive.py b/src/borg/archive.py index 8ce1c5d3b..57db40dc2 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1338,7 +1338,7 @@ class FilesystemObjectProcessors: item.chunks = [] for chunk_id, chunk_size in hl_chunks: # process one-by-one, so we will know in item.chunks how far we got - chunk_entry = cache.chunk_incref(chunk_id, chunk_size, self.stats) + chunk_entry = cache.reuse_chunk(chunk_id, chunk_size, self.stats) item.chunks.append(chunk_entry) else: # normal case, no "2nd+" hardlink if not is_special_file: @@ -1364,7 +1364,7 @@ class FilesystemObjectProcessors: item.chunks = [] for chunk in chunks: # process one-by-one, so we will know in item.chunks how far we got - cache.chunk_incref(chunk.id, chunk.size, self.stats) + cache.reuse_chunk(chunk.id, chunk.size, self.stats) item.chunks.append(chunk) status = "U" # regular file, unchanged else: @@ -2169,7 +2169,7 @@ class ArchiveRecreater: def process_chunks(self, archive, target, item): if not target.recreate_rechunkify: for chunk_id, size in item.chunks: - self.cache.chunk_incref(chunk_id, size, target.stats) + self.cache.reuse_chunk(chunk_id, size, target.stats) return item.chunks chunk_iterator = self.iter_chunks(archive, target, list(item.chunks)) chunk_processor = partial(self.chunk_processor, target) @@ -2179,7 +2179,7 @@ class ArchiveRecreater: chunk_id, data = cached_hash(chunk, self.key.id_hash) size = len(data) if chunk_id in self.seen_chunks: - return self.cache.chunk_incref(chunk_id, size, target.stats) + return self.cache.reuse_chunk(chunk_id, size, target.stats) chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False, ro_type=ROBJ_FILE_STREAM) self.cache.repository.async_response(wait=False) self.seen_chunks.add(chunk_entry.id) diff --git a/src/borg/archiver/transfer_cmd.py b/src/borg/archiver/transfer_cmd.py index 780513e55..b9e962869 100644 --- a/src/borg/archiver/transfer_cmd.py +++ b/src/borg/archiver/transfer_cmd.py @@ -100,7 +100,7 @@ class TransferMixIn: if "chunks" in item: chunks = [] for chunk_id, size in item.chunks: - chunk_present = cache.seen_chunk(chunk_id, size) != 0 + chunk_present = cache.seen_chunk(chunk_id, size) if not chunk_present: # target repo does not yet have this chunk if not dry_run: cdata = other_repository.get(chunk_id) @@ -147,7 +147,7 @@ class TransferMixIn: transfer_size += size else: if not dry_run: - chunk_entry = cache.chunk_incref(chunk_id, size, archive.stats) + chunk_entry = cache.reuse_chunk(chunk_id, size, archive.stats) chunks.append(chunk_entry) present_size += size if not dry_run: diff --git a/src/borg/cache.py b/src/borg/cache.py index b36fb3c63..3ea4f0370 100644 --- a/src/borg/cache.py +++ b/src/borg/cache.py @@ -579,12 +579,6 @@ class ChunksMixin: self._chunks = self._load_chunks_from_repo() return self._chunks - def chunk_incref(self, id, size, stats): - assert isinstance(size, int) and size > 0 - count, _size = self.chunks.incref(id) - stats.update(size, False) - return ChunkListEntry(id, size) - def seen_chunk(self, id, size=None): entry = self.chunks.get(id, ChunkIndexEntry(0, None)) if entry.refcount and size is not None: @@ -593,7 +587,12 @@ class ChunksMixin: # AdHocWithFilesCache / AdHocCache: # Here *size* is used to update the chunk's size information, which will be zero for existing chunks. self.chunks[id] = entry._replace(size=size) - return entry.refcount + return entry.refcount != 0 + + def reuse_chunk(self, id, size, stats): + assert isinstance(size, int) and size > 0 + stats.update(size, False) + return ChunkListEntry(id, size) def add_chunk( self, @@ -615,15 +614,15 @@ class ChunksMixin: size = len(data) # data is still uncompressed else: raise ValueError("when giving compressed data for a chunk, the uncompressed size must be given also") - refcount = self.seen_chunk(id, size) - if refcount: - return self.chunk_incref(id, size, stats) + exists = self.seen_chunk(id, size) + if exists: + return self.reuse_chunk(id, size, stats) cdata = self.repo_objs.format( id, meta, data, compress=compress, size=size, ctype=ctype, clevel=clevel, ro_type=ro_type ) self.repository.put(id, cdata, wait=wait) - self.chunks.add(id, 1, size) - stats.update(size, not refcount) + self.chunks.add(id, ChunkIndex.MAX_VALUE, size) + stats.update(size, not exists) return ChunkListEntry(id, size) def _load_chunks_from_repo(self): @@ -639,9 +638,7 @@ class ChunksMixin: if not result: break marker = result[-1][0] - # All chunks from the repository have a refcount of MAX_VALUE, which is sticky, - # therefore we can't/won't delete them. Chunks we added ourselves in this borg run - # are tracked correctly. + # All chunks have a refcount of MAX_VALUE, which is sticky, therefore we can't/won't delete them. init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0) # plaintext size for id, stored_size in result: num_chunks += 1 diff --git a/src/borg/testsuite/cache.py b/src/borg/testsuite/cache.py index 79b26e04c..f1e6e558a 100644 --- a/src/borg/testsuite/cache.py +++ b/src/borg/testsuite/cache.py @@ -45,11 +45,10 @@ class TestAdHocCache: assert cache.cache_mode == "d" assert cache.files is None - def test_incref_after_add_chunk(self, cache): + def test_reuse_after_add_chunk(self, cache): assert cache.add_chunk(H(3), {}, b"5678", stats=Statistics()) == (H(3), 4) - assert cache.chunk_incref(H(3), 4, Statistics()) == (H(3), 4) + assert cache.reuse_chunk(H(3), 4, Statistics()) == (H(3), 4) - def test_existing_incref_after_add_chunk(self, cache): - """This case occurs with part files, see Archive.chunk_file.""" + def test_existing_reuse_after_add_chunk(self, cache): assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4) - assert cache.chunk_incref(H(1), 4, Statistics()) == (H(1), 4) + assert cache.reuse_chunk(H(1), 4, Statistics()) == (H(1), 4) diff --git a/src/borg/upgrade.py b/src/borg/upgrade.py index 22a27c18c..35d71bec2 100644 --- a/src/borg/upgrade.py +++ b/src/borg/upgrade.py @@ -85,7 +85,7 @@ class UpgraderFrom12To20: if chunks is not None: item.chunks = chunks for chunk_id, chunk_size in chunks: - self.cache.chunk_incref(chunk_id, chunk_size, self.archive.stats) + self.cache.reuse_chunk(chunk_id, chunk_size, self.archive.stats) if chunks_healthy is not None: item.chunks_healthy = chunks del item.source # not used for hardlinks any more, replaced by hlid