repository.scan: use same end_segment within same scan

achieved by putting it into the state that is now used instead of the marker.
This commit is contained in:
Thomas Waldmann 2022-09-19 21:14:25 +02:00
parent c0e674ce61
commit c4e54ca44e
5 changed files with 19 additions and 21 deletions

View file

@ -1747,9 +1747,9 @@ class ArchiveChecker:
pi = ProgressIndicatorPercent(
total=chunks_count_index, msg="Verifying data %6.2f%%", step=0.01, msgid="check.verify_data"
)
marker = None
state = None
while True:
chunk_ids, marker = self.repository.scan(limit=100, marker=marker)
chunk_ids, state = self.repository.scan(limit=100, state=state)
if not chunk_ids:
break
chunks_count_segments += len(chunk_ids)

View file

@ -152,12 +152,10 @@ class DebugMixIn:
cdata = repository.get(ids[0])
key = key_factory(repository, cdata)
repo_objs = RepoObj(key)
marker = None
state = None
i = 0
while True:
ids, marker = repository.scan(
limit=LIST_SCAN_LIMIT, marker=marker
) # must use on-disk order scanning here
ids, state = repository.scan(limit=LIST_SCAN_LIMIT, state=state) # must use on-disk order scanning here
if not ids:
break
for id in ids:
@ -203,12 +201,12 @@ class DebugMixIn:
key = key_factory(repository, cdata)
repo_objs = RepoObj(key)
marker = None
state = None
last_data = b""
last_id = None
i = 0
while True:
ids, marker = repository.scan(limit=LIST_SCAN_LIMIT, marker=marker) # must use on-disk order scanning here
ids, state = repository.scan(limit=LIST_SCAN_LIMIT, state=state) # must use on-disk order scanning here
if not ids:
break
for id in ids:

View file

@ -989,8 +989,8 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
def list(self, limit=None, marker=None, mask=0, value=0):
"""actual remoting is done via self.call in the @api decorator"""
@api(since=parse_version("1.1.0b3"))
def scan(self, limit=None, marker=None):
@api(since=parse_version("2.0.0b2"))
def scan(self, limit=None, state=None):
"""actual remoting is done via self.call in the @api decorator"""
@api(since=parse_version("2.0.0b2"))

View file

@ -1207,15 +1207,15 @@ class Repository:
self.index = self.open_index(self.get_transaction_id())
return [id_ for id_, _ in islice(self.index.iteritems(marker=marker, mask=mask, value=value), limit)]
def scan(self, limit=None, marker=None):
def scan(self, limit=None, state=None):
"""
list <limit> IDs starting from after <marker> - in on-disk order, so that a client
list (the next) <limit> chunk IDs from the repository - in on-disk order, so that a client
fetching data in this order does linear reads and reuses stuff from disk cache.
marker can either be None (default, meaning "start from the beginning") or the object
returned from a previous scan call (meaning "continue scanning where we stopped previously").
state can either be None (initially, when starting to scan) or the object
returned from a previous scan call (meaning "continue scanning").
returns: list of chunk ids, marker
returns: list of chunk ids, state
We rely on repository.check() has run already (either now or some time before) and that:
@ -1230,11 +1230,11 @@ class Repository:
if not self.index:
self.index = self.open_index(transaction_id)
# smallest valid seg is <uint32> 0, smallest valid offs is <uint32> 8
start_segment, start_offset = marker if marker is not None else (0, 0)
start_segment, start_offset, end_segment = state if state is not None else (0, 0, transaction_id)
ids, segment, offset = [], 0, 0
# we only scan up to end_segment == transaction_id to only scan **committed** chunks,
# avoiding scanning into newly written chunks.
for segment, filename in self.io.segment_iterator(start_segment, transaction_id):
for segment, filename in self.io.segment_iterator(start_segment, end_segment):
obj_iterator = self.io.iter_objects(segment, start_offset, read_data=False)
while True:
try:
@ -1255,8 +1255,8 @@ class Repository:
# we have found an existing and current object
ids.append(id)
if len(ids) == limit:
return ids, (segment, offset)
return ids, (segment, offset)
return ids, (segment, offset, end_segment)
return ids, (segment, offset, end_segment)
def flags(self, id, mask=0xFFFFFFFF, value=None):
"""

View file

@ -191,10 +191,10 @@ class RepositoryTestCase(RepositoryTestCaseBase):
self.repository.commit(compact=False)
all, _ = self.repository.scan()
assert len(all) == 100
first_half, marker = self.repository.scan(limit=50)
first_half, state = self.repository.scan(limit=50)
assert len(first_half) == 50
assert first_half == all[:50]
second_half, _ = self.repository.scan(marker=marker)
second_half, _ = self.repository.scan(state=state)
assert len(second_half) == 50
assert second_half == all[50:]
# check result order == on-disk order (which is hash order)