mirror of
https://github.com/borgbackup/borg.git
synced 2026-02-20 00:10:35 -05:00
repository.scan: use same end_segment within same scan
achieved by putting it into the state that is now used instead of the marker.
This commit is contained in:
parent
c0e674ce61
commit
c4e54ca44e
5 changed files with 19 additions and 21 deletions
|
|
@ -1747,9 +1747,9 @@ class ArchiveChecker:
|
|||
pi = ProgressIndicatorPercent(
|
||||
total=chunks_count_index, msg="Verifying data %6.2f%%", step=0.01, msgid="check.verify_data"
|
||||
)
|
||||
marker = None
|
||||
state = None
|
||||
while True:
|
||||
chunk_ids, marker = self.repository.scan(limit=100, marker=marker)
|
||||
chunk_ids, state = self.repository.scan(limit=100, state=state)
|
||||
if not chunk_ids:
|
||||
break
|
||||
chunks_count_segments += len(chunk_ids)
|
||||
|
|
|
|||
|
|
@ -152,12 +152,10 @@ class DebugMixIn:
|
|||
cdata = repository.get(ids[0])
|
||||
key = key_factory(repository, cdata)
|
||||
repo_objs = RepoObj(key)
|
||||
marker = None
|
||||
state = None
|
||||
i = 0
|
||||
while True:
|
||||
ids, marker = repository.scan(
|
||||
limit=LIST_SCAN_LIMIT, marker=marker
|
||||
) # must use on-disk order scanning here
|
||||
ids, state = repository.scan(limit=LIST_SCAN_LIMIT, state=state) # must use on-disk order scanning here
|
||||
if not ids:
|
||||
break
|
||||
for id in ids:
|
||||
|
|
@ -203,12 +201,12 @@ class DebugMixIn:
|
|||
key = key_factory(repository, cdata)
|
||||
repo_objs = RepoObj(key)
|
||||
|
||||
marker = None
|
||||
state = None
|
||||
last_data = b""
|
||||
last_id = None
|
||||
i = 0
|
||||
while True:
|
||||
ids, marker = repository.scan(limit=LIST_SCAN_LIMIT, marker=marker) # must use on-disk order scanning here
|
||||
ids, state = repository.scan(limit=LIST_SCAN_LIMIT, state=state) # must use on-disk order scanning here
|
||||
if not ids:
|
||||
break
|
||||
for id in ids:
|
||||
|
|
|
|||
|
|
@ -989,8 +989,8 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
|
|||
def list(self, limit=None, marker=None, mask=0, value=0):
|
||||
"""actual remoting is done via self.call in the @api decorator"""
|
||||
|
||||
@api(since=parse_version("1.1.0b3"))
|
||||
def scan(self, limit=None, marker=None):
|
||||
@api(since=parse_version("2.0.0b2"))
|
||||
def scan(self, limit=None, state=None):
|
||||
"""actual remoting is done via self.call in the @api decorator"""
|
||||
|
||||
@api(since=parse_version("2.0.0b2"))
|
||||
|
|
|
|||
|
|
@ -1207,15 +1207,15 @@ class Repository:
|
|||
self.index = self.open_index(self.get_transaction_id())
|
||||
return [id_ for id_, _ in islice(self.index.iteritems(marker=marker, mask=mask, value=value), limit)]
|
||||
|
||||
def scan(self, limit=None, marker=None):
|
||||
def scan(self, limit=None, state=None):
|
||||
"""
|
||||
list <limit> IDs starting from after <marker> - in on-disk order, so that a client
|
||||
list (the next) <limit> chunk IDs from the repository - in on-disk order, so that a client
|
||||
fetching data in this order does linear reads and reuses stuff from disk cache.
|
||||
|
||||
marker can either be None (default, meaning "start from the beginning") or the object
|
||||
returned from a previous scan call (meaning "continue scanning where we stopped previously").
|
||||
state can either be None (initially, when starting to scan) or the object
|
||||
returned from a previous scan call (meaning "continue scanning").
|
||||
|
||||
returns: list of chunk ids, marker
|
||||
returns: list of chunk ids, state
|
||||
|
||||
We rely on repository.check() has run already (either now or some time before) and that:
|
||||
|
||||
|
|
@ -1230,11 +1230,11 @@ class Repository:
|
|||
if not self.index:
|
||||
self.index = self.open_index(transaction_id)
|
||||
# smallest valid seg is <uint32> 0, smallest valid offs is <uint32> 8
|
||||
start_segment, start_offset = marker if marker is not None else (0, 0)
|
||||
start_segment, start_offset, end_segment = state if state is not None else (0, 0, transaction_id)
|
||||
ids, segment, offset = [], 0, 0
|
||||
# we only scan up to end_segment == transaction_id to only scan **committed** chunks,
|
||||
# avoiding scanning into newly written chunks.
|
||||
for segment, filename in self.io.segment_iterator(start_segment, transaction_id):
|
||||
for segment, filename in self.io.segment_iterator(start_segment, end_segment):
|
||||
obj_iterator = self.io.iter_objects(segment, start_offset, read_data=False)
|
||||
while True:
|
||||
try:
|
||||
|
|
@ -1255,8 +1255,8 @@ class Repository:
|
|||
# we have found an existing and current object
|
||||
ids.append(id)
|
||||
if len(ids) == limit:
|
||||
return ids, (segment, offset)
|
||||
return ids, (segment, offset)
|
||||
return ids, (segment, offset, end_segment)
|
||||
return ids, (segment, offset, end_segment)
|
||||
|
||||
def flags(self, id, mask=0xFFFFFFFF, value=None):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -191,10 +191,10 @@ class RepositoryTestCase(RepositoryTestCaseBase):
|
|||
self.repository.commit(compact=False)
|
||||
all, _ = self.repository.scan()
|
||||
assert len(all) == 100
|
||||
first_half, marker = self.repository.scan(limit=50)
|
||||
first_half, state = self.repository.scan(limit=50)
|
||||
assert len(first_half) == 50
|
||||
assert first_half == all[:50]
|
||||
second_half, _ = self.repository.scan(marker=marker)
|
||||
second_half, _ = self.repository.scan(state=state)
|
||||
assert len(second_half) == 50
|
||||
assert second_half == all[50:]
|
||||
# check result order == on-disk order (which is hash order)
|
||||
|
|
|
|||
Loading…
Reference in a new issue