From e48ccc7627b256044a7f611bff74bb2af3568be5 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 25 Nov 2024 21:55:30 +0100 Subject: [PATCH] simplify fetch_many api it now takes either: - a list of ChunkListEntry namedtuples - a list of chunk IDs [bytes] --- src/borg/archive.py | 31 ++++++++++++++++++++----------- src/borg/archiver/tar_cmds.py | 4 +--- src/borg/helpers/misc.py | 2 +- src/borg/helpers/parseformat.py | 2 +- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 384f68398..0228568c1 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -312,10 +312,22 @@ class DownloadPipeline: self.repository.preload([c.id for c in item.chunks]) return preload_chunks - def fetch_many(self, ids, is_preloaded=False, ro_type=None): + def fetch_many(self, chunks, is_preloaded=False, ro_type=None): assert ro_type is not None - for id_, cdata in zip(ids, self.repository.get_many(ids, is_preloaded=is_preloaded)): - _, data = self.repo_objs.parse(id_, cdata, ro_type=ro_type) + ids = [] + sizes = [] + if all(isinstance(chunk, ChunkListEntry) for chunk in chunks): + for chunk in chunks: + ids.append(chunk.id) + sizes.append(chunk.size) + elif all(isinstance(chunk, bytes) for chunk in chunks): + ids = list(chunks) + sizes = [None] * len(ids) + else: + raise TypeError(f"unsupported or mixed element types: {chunks}") + for id, size, cdata in zip(ids, sizes, self.repository.get_many(ids, is_preloaded=is_preloaded)): + _, data = self.repo_objs.parse(id, cdata, ro_type=ro_type) + assert size is None or len(data) == size yield data @@ -770,9 +782,7 @@ Duration: {0.duration} # it would get stuck. if "chunks" in item: item_chunks_size = 0 - for data in self.pipeline.fetch_many( - [c.id for c in item.chunks], is_preloaded=True, ro_type=ROBJ_FILE_STREAM - ): + for data in self.pipeline.fetch_many(item.chunks, is_preloaded=True, ro_type=ROBJ_FILE_STREAM): if pi: pi.show(increase=len(data), info=[remove_surrogates(item.path)]) if stdout: @@ -821,8 +831,7 @@ Duration: {0.duration} with backup_io("open"): fd = open(path, "wb") with fd: - ids = [c.id for c in item.chunks] - for data in self.pipeline.fetch_many(ids, is_preloaded=True, ro_type=ROBJ_FILE_STREAM): + for data in self.pipeline.fetch_many(item.chunks, is_preloaded=True, ro_type=ROBJ_FILE_STREAM): if pi: pi.show(increase=len(data), info=[remove_surrogates(item.path)]) with backup_io("write"): @@ -1005,8 +1014,8 @@ Duration: {0.duration} path, item1, item2, - archive1.pipeline.fetch_many([c.id for c in item1.get("chunks", [])], ro_type=ROBJ_FILE_STREAM), - archive2.pipeline.fetch_many([c.id for c in item2.get("chunks", [])], ro_type=ROBJ_FILE_STREAM), + archive1.pipeline.fetch_many(item1.get("chunks", []), ro_type=ROBJ_FILE_STREAM), + archive2.pipeline.fetch_many(item2.get("chunks", []), ro_type=ROBJ_FILE_STREAM), can_compare_chunk_ids=can_compare_chunk_ids, ) @@ -2193,7 +2202,7 @@ class ArchiveRecreater: return chunk_entry def iter_chunks(self, archive, target, chunks): - chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _ in chunks], ro_type=ROBJ_FILE_STREAM) + chunk_iterator = archive.pipeline.fetch_many(chunks, ro_type=ROBJ_FILE_STREAM) if target.recreate_rechunkify: # The target.chunker will read the file contents through ChunkIteratorFileWrapper chunk-by-chunk # (does not load the entire file into memory) diff --git a/src/borg/archiver/tar_cmds.py b/src/borg/archiver/tar_cmds.py index d95b992ef..0be7fa02e 100644 --- a/src/borg/archiver/tar_cmds.py +++ b/src/borg/archiver/tar_cmds.py @@ -113,9 +113,7 @@ class TarMixIn: """ Return a file-like object that reads from the chunks of *item*. """ - chunk_iterator = archive.pipeline.fetch_many( - [chunk_id for chunk_id, _ in item.chunks], is_preloaded=True, ro_type=ROBJ_FILE_STREAM - ) + chunk_iterator = archive.pipeline.fetch_many(item.chunks, is_preloaded=True, ro_type=ROBJ_FILE_STREAM) if pi: info = [remove_surrogates(item.path)] return ChunkIteratorFileWrapper( diff --git a/src/borg/helpers/misc.py b/src/borg/helpers/misc.py index 6028b93a3..a46ab2fa9 100644 --- a/src/borg/helpers/misc.py +++ b/src/borg/helpers/misc.py @@ -124,7 +124,7 @@ class ChunkIteratorFileWrapper: def open_item(archive, item): """Return file-like object for archived item (with chunks).""" - chunk_iterator = archive.pipeline.fetch_many([c.id for c in item.chunks], ro_type=ROBJ_FILE_STREAM) + chunk_iterator = archive.pipeline.fetch_many(item.chunks, ro_type=ROBJ_FILE_STREAM) return ChunkIteratorFileWrapper(chunk_iterator) diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index 908aab712..b23bc78ca 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -911,7 +911,7 @@ class ItemFormatter(BaseFormatter): hash = self.xxh64() elif hash_function in self.hash_algorithms: hash = hashlib.new(hash_function) - for data in self.archive.pipeline.fetch_many([c.id for c in item.chunks], ro_type=ROBJ_FILE_STREAM): + for data in self.archive.pipeline.fetch_many(item.chunks, ro_type=ROBJ_FILE_STREAM): hash.update(data) return hash.hexdigest()