repo-compress: use chunkindex rather than repository.list()

repository.list is slow, so rather use the chunkindex,
which might be cached in future. currently, it also uses
repository.list, but at least we can solve the problem
at one place then.
This commit is contained in:
Thomas Waldmann 2024-09-19 18:59:03 +02:00
parent 3fd1587dd7
commit f1a39a059e
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01

View file

@ -14,26 +14,20 @@ from ..logger import create_logger
logger = create_logger()
def find_chunks(repository, repo_objs, stats, ctype, clevel, olevel):
def find_chunks(repository, repo_objs, cache, stats, ctype, clevel, olevel):
"""find chunks that need processing (usually: recompression)."""
recompress_ids = []
compr_keys = stats["compr_keys"] = set()
compr_wanted = ctype, clevel, olevel
marker = None
while True:
result = repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
if not result:
break
marker = result[-1][0]
chunk_ids = [id for id, _ in result]
for id, chunk_no_data in zip(chunk_ids, repository.get_many(chunk_ids, read_data=False)):
meta = repo_objs.parse_meta(id, chunk_no_data, ro_type=ROBJ_DONTCARE)
compr_found = meta["ctype"], meta["clevel"], meta.get("olevel", -1)
if compr_found != compr_wanted:
recompress_ids.append(id)
compr_keys.add(compr_found)
stats[compr_found] += 1
stats["checked_count"] += 1
for id, _ in cache.chunks.iteritems():
chunk_no_data = repository.get(id, read_data=False)
meta = repo_objs.parse_meta(id, chunk_no_data, ro_type=ROBJ_DONTCARE)
compr_found = meta["ctype"], meta["clevel"], meta.get("olevel", -1)
if compr_found != compr_wanted:
recompress_ids.append(id)
compr_keys.add(compr_found)
stats[compr_found] += 1
stats["checked_count"] += 1
return recompress_ids
@ -88,8 +82,8 @@ def format_compression_spec(ctype, clevel, olevel):
class RepoCompressMixIn:
@with_repository(cache=False, manifest=True, compatibility=(Manifest.Operation.CHECK,))
def do_repo_compress(self, args, repository, manifest):
@with_repository(cache=True, manifest=True, compatibility=(Manifest.Operation.CHECK,))
def do_repo_compress(self, args, repository, manifest, cache):
"""Repository (re-)compression"""
def get_csettings(c):
@ -110,7 +104,7 @@ class RepoCompressMixIn:
stats_find = defaultdict(int)
stats_process = defaultdict(int)
recompress_ids = find_chunks(repository, repo_objs, stats_find, ctype, clevel, olevel)
recompress_ids = find_chunks(repository, repo_objs, cache, stats_find, ctype, clevel, olevel)
recompress_candidate_count = len(recompress_ids)
chunks_limit = min(1000, max(100, recompress_candidate_count // 1000))