mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-10 09:21:44 -04:00
cache: renamed .chunk_incref -> .reuse_chunk, boolean .seen_chunk
reuse_chunk is the complement of add_chunk for already existing chunks. It doesn't do refcounting anymore. .seen_chunk does not return the refcount anymore, but just whether the chunk exists. If we add a new chunk, it immediately sets its refcount to MAX_VALUE, so there is no difference anymore between previously existing chunks and new chunks added. This makes the stats even more useless, but we have less complexity.
This commit is contained in:
parent
f9d2e6827b
commit
ccc84c7a4e
5 changed files with 23 additions and 27 deletions
|
|
@ -1338,7 +1338,7 @@ class FilesystemObjectProcessors:
|
|||
item.chunks = []
|
||||
for chunk_id, chunk_size in hl_chunks:
|
||||
# process one-by-one, so we will know in item.chunks how far we got
|
||||
chunk_entry = cache.chunk_incref(chunk_id, chunk_size, self.stats)
|
||||
chunk_entry = cache.reuse_chunk(chunk_id, chunk_size, self.stats)
|
||||
item.chunks.append(chunk_entry)
|
||||
else: # normal case, no "2nd+" hardlink
|
||||
if not is_special_file:
|
||||
|
|
@ -1364,7 +1364,7 @@ class FilesystemObjectProcessors:
|
|||
item.chunks = []
|
||||
for chunk in chunks:
|
||||
# process one-by-one, so we will know in item.chunks how far we got
|
||||
cache.chunk_incref(chunk.id, chunk.size, self.stats)
|
||||
cache.reuse_chunk(chunk.id, chunk.size, self.stats)
|
||||
item.chunks.append(chunk)
|
||||
status = "U" # regular file, unchanged
|
||||
else:
|
||||
|
|
@ -2169,7 +2169,7 @@ class ArchiveRecreater:
|
|||
def process_chunks(self, archive, target, item):
|
||||
if not target.recreate_rechunkify:
|
||||
for chunk_id, size in item.chunks:
|
||||
self.cache.chunk_incref(chunk_id, size, target.stats)
|
||||
self.cache.reuse_chunk(chunk_id, size, target.stats)
|
||||
return item.chunks
|
||||
chunk_iterator = self.iter_chunks(archive, target, list(item.chunks))
|
||||
chunk_processor = partial(self.chunk_processor, target)
|
||||
|
|
@ -2179,7 +2179,7 @@ class ArchiveRecreater:
|
|||
chunk_id, data = cached_hash(chunk, self.key.id_hash)
|
||||
size = len(data)
|
||||
if chunk_id in self.seen_chunks:
|
||||
return self.cache.chunk_incref(chunk_id, size, target.stats)
|
||||
return self.cache.reuse_chunk(chunk_id, size, target.stats)
|
||||
chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False, ro_type=ROBJ_FILE_STREAM)
|
||||
self.cache.repository.async_response(wait=False)
|
||||
self.seen_chunks.add(chunk_entry.id)
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ class TransferMixIn:
|
|||
if "chunks" in item:
|
||||
chunks = []
|
||||
for chunk_id, size in item.chunks:
|
||||
chunk_present = cache.seen_chunk(chunk_id, size) != 0
|
||||
chunk_present = cache.seen_chunk(chunk_id, size)
|
||||
if not chunk_present: # target repo does not yet have this chunk
|
||||
if not dry_run:
|
||||
cdata = other_repository.get(chunk_id)
|
||||
|
|
@ -147,7 +147,7 @@ class TransferMixIn:
|
|||
transfer_size += size
|
||||
else:
|
||||
if not dry_run:
|
||||
chunk_entry = cache.chunk_incref(chunk_id, size, archive.stats)
|
||||
chunk_entry = cache.reuse_chunk(chunk_id, size, archive.stats)
|
||||
chunks.append(chunk_entry)
|
||||
present_size += size
|
||||
if not dry_run:
|
||||
|
|
|
|||
|
|
@ -579,12 +579,6 @@ class ChunksMixin:
|
|||
self._chunks = self._load_chunks_from_repo()
|
||||
return self._chunks
|
||||
|
||||
def chunk_incref(self, id, size, stats):
|
||||
assert isinstance(size, int) and size > 0
|
||||
count, _size = self.chunks.incref(id)
|
||||
stats.update(size, False)
|
||||
return ChunkListEntry(id, size)
|
||||
|
||||
def seen_chunk(self, id, size=None):
|
||||
entry = self.chunks.get(id, ChunkIndexEntry(0, None))
|
||||
if entry.refcount and size is not None:
|
||||
|
|
@ -593,7 +587,12 @@ class ChunksMixin:
|
|||
# AdHocWithFilesCache / AdHocCache:
|
||||
# Here *size* is used to update the chunk's size information, which will be zero for existing chunks.
|
||||
self.chunks[id] = entry._replace(size=size)
|
||||
return entry.refcount
|
||||
return entry.refcount != 0
|
||||
|
||||
def reuse_chunk(self, id, size, stats):
|
||||
assert isinstance(size, int) and size > 0
|
||||
stats.update(size, False)
|
||||
return ChunkListEntry(id, size)
|
||||
|
||||
def add_chunk(
|
||||
self,
|
||||
|
|
@ -615,15 +614,15 @@ class ChunksMixin:
|
|||
size = len(data) # data is still uncompressed
|
||||
else:
|
||||
raise ValueError("when giving compressed data for a chunk, the uncompressed size must be given also")
|
||||
refcount = self.seen_chunk(id, size)
|
||||
if refcount:
|
||||
return self.chunk_incref(id, size, stats)
|
||||
exists = self.seen_chunk(id, size)
|
||||
if exists:
|
||||
return self.reuse_chunk(id, size, stats)
|
||||
cdata = self.repo_objs.format(
|
||||
id, meta, data, compress=compress, size=size, ctype=ctype, clevel=clevel, ro_type=ro_type
|
||||
)
|
||||
self.repository.put(id, cdata, wait=wait)
|
||||
self.chunks.add(id, 1, size)
|
||||
stats.update(size, not refcount)
|
||||
self.chunks.add(id, ChunkIndex.MAX_VALUE, size)
|
||||
stats.update(size, not exists)
|
||||
return ChunkListEntry(id, size)
|
||||
|
||||
def _load_chunks_from_repo(self):
|
||||
|
|
@ -639,9 +638,7 @@ class ChunksMixin:
|
|||
if not result:
|
||||
break
|
||||
marker = result[-1][0]
|
||||
# All chunks from the repository have a refcount of MAX_VALUE, which is sticky,
|
||||
# therefore we can't/won't delete them. Chunks we added ourselves in this borg run
|
||||
# are tracked correctly.
|
||||
# All chunks have a refcount of MAX_VALUE, which is sticky, therefore we can't/won't delete them.
|
||||
init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0) # plaintext size
|
||||
for id, stored_size in result:
|
||||
num_chunks += 1
|
||||
|
|
|
|||
|
|
@ -45,11 +45,10 @@ class TestAdHocCache:
|
|||
assert cache.cache_mode == "d"
|
||||
assert cache.files is None
|
||||
|
||||
def test_incref_after_add_chunk(self, cache):
|
||||
def test_reuse_after_add_chunk(self, cache):
|
||||
assert cache.add_chunk(H(3), {}, b"5678", stats=Statistics()) == (H(3), 4)
|
||||
assert cache.chunk_incref(H(3), 4, Statistics()) == (H(3), 4)
|
||||
assert cache.reuse_chunk(H(3), 4, Statistics()) == (H(3), 4)
|
||||
|
||||
def test_existing_incref_after_add_chunk(self, cache):
|
||||
"""This case occurs with part files, see Archive.chunk_file."""
|
||||
def test_existing_reuse_after_add_chunk(self, cache):
|
||||
assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)
|
||||
assert cache.chunk_incref(H(1), 4, Statistics()) == (H(1), 4)
|
||||
assert cache.reuse_chunk(H(1), 4, Statistics()) == (H(1), 4)
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ class UpgraderFrom12To20:
|
|||
if chunks is not None:
|
||||
item.chunks = chunks
|
||||
for chunk_id, chunk_size in chunks:
|
||||
self.cache.chunk_incref(chunk_id, chunk_size, self.archive.stats)
|
||||
self.cache.reuse_chunk(chunk_id, chunk_size, self.archive.stats)
|
||||
if chunks_healthy is not None:
|
||||
item.chunks_healthy = chunks
|
||||
del item.source # not used for hardlinks any more, replaced by hlid
|
||||
|
|
|
|||
Loading…
Reference in a new issue