diff --git a/src/borg/archive.py b/src/borg/archive.py index 04b6afe25..1d0a83a19 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1747,9 +1747,9 @@ class ArchiveChecker: pi = ProgressIndicatorPercent( total=chunks_count_index, msg="Verifying data %6.2f%%", step=0.01, msgid="check.verify_data" ) - marker = None + state = None while True: - chunk_ids, marker = self.repository.scan(limit=100, marker=marker) + chunk_ids, state = self.repository.scan(limit=100, state=state) if not chunk_ids: break chunks_count_segments += len(chunk_ids) diff --git a/src/borg/archiver/debug_cmd.py b/src/borg/archiver/debug_cmd.py index 57811fc41..3f24c3ce8 100644 --- a/src/borg/archiver/debug_cmd.py +++ b/src/borg/archiver/debug_cmd.py @@ -152,12 +152,10 @@ class DebugMixIn: cdata = repository.get(ids[0]) key = key_factory(repository, cdata) repo_objs = RepoObj(key) - marker = None + state = None i = 0 while True: - ids, marker = repository.scan( - limit=LIST_SCAN_LIMIT, marker=marker - ) # must use on-disk order scanning here + ids, state = repository.scan(limit=LIST_SCAN_LIMIT, state=state) # must use on-disk order scanning here if not ids: break for id in ids: @@ -203,12 +201,12 @@ class DebugMixIn: key = key_factory(repository, cdata) repo_objs = RepoObj(key) - marker = None + state = None last_data = b"" last_id = None i = 0 while True: - ids, marker = repository.scan(limit=LIST_SCAN_LIMIT, marker=marker) # must use on-disk order scanning here + ids, state = repository.scan(limit=LIST_SCAN_LIMIT, state=state) # must use on-disk order scanning here if not ids: break for id in ids: diff --git a/src/borg/remote.py b/src/borg/remote.py index dd7861122..35d2f7219 100644 --- a/src/borg/remote.py +++ b/src/borg/remote.py @@ -989,8 +989,8 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+. def list(self, limit=None, marker=None, mask=0, value=0): """actual remoting is done via self.call in the @api decorator""" - @api(since=parse_version("1.1.0b3")) - def scan(self, limit=None, marker=None): + @api(since=parse_version("2.0.0b2")) + def scan(self, limit=None, state=None): """actual remoting is done via self.call in the @api decorator""" @api(since=parse_version("2.0.0b2")) diff --git a/src/borg/repository.py b/src/borg/repository.py index e6c9015fa..de681d07a 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -1207,15 +1207,15 @@ class Repository: self.index = self.open_index(self.get_transaction_id()) return [id_ for id_, _ in islice(self.index.iteritems(marker=marker, mask=mask, value=value), limit)] - def scan(self, limit=None, marker=None): + def scan(self, limit=None, state=None): """ - list IDs starting from after - in on-disk order, so that a client + list (the next) chunk IDs from the repository - in on-disk order, so that a client fetching data in this order does linear reads and reuses stuff from disk cache. - marker can either be None (default, meaning "start from the beginning") or the object - returned from a previous scan call (meaning "continue scanning where we stopped previously"). + state can either be None (initially, when starting to scan) or the object + returned from a previous scan call (meaning "continue scanning"). - returns: list of chunk ids, marker + returns: list of chunk ids, state We rely on repository.check() has run already (either now or some time before) and that: @@ -1230,11 +1230,11 @@ class Repository: if not self.index: self.index = self.open_index(transaction_id) # smallest valid seg is 0, smallest valid offs is 8 - start_segment, start_offset = marker if marker is not None else (0, 0) + start_segment, start_offset, end_segment = state if state is not None else (0, 0, transaction_id) ids, segment, offset = [], 0, 0 # we only scan up to end_segment == transaction_id to only scan **committed** chunks, # avoiding scanning into newly written chunks. - for segment, filename in self.io.segment_iterator(start_segment, transaction_id): + for segment, filename in self.io.segment_iterator(start_segment, end_segment): obj_iterator = self.io.iter_objects(segment, start_offset, read_data=False) while True: try: @@ -1255,8 +1255,8 @@ class Repository: # we have found an existing and current object ids.append(id) if len(ids) == limit: - return ids, (segment, offset) - return ids, (segment, offset) + return ids, (segment, offset, end_segment) + return ids, (segment, offset, end_segment) def flags(self, id, mask=0xFFFFFFFF, value=None): """ diff --git a/src/borg/testsuite/repository.py b/src/borg/testsuite/repository.py index 2b6440d0e..b6aa493c0 100644 --- a/src/borg/testsuite/repository.py +++ b/src/borg/testsuite/repository.py @@ -191,10 +191,10 @@ class RepositoryTestCase(RepositoryTestCaseBase): self.repository.commit(compact=False) all, _ = self.repository.scan() assert len(all) == 100 - first_half, marker = self.repository.scan(limit=50) + first_half, state = self.repository.scan(limit=50) assert len(first_half) == 50 assert first_half == all[:50] - second_half, _ = self.repository.scan(marker=marker) + second_half, _ = self.repository.scan(state=state) assert len(second_half) == 50 assert second_half == all[50:] # check result order == on-disk order (which is hash order)