mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-11 01:41:57 -04:00
FileReader: refactor read method
Simplified and improved handling of mixed types of chunks during reading. The allocation type of resulting chunks is now determined based on contributing chunks.
This commit is contained in:
parent
3dac8f180c
commit
43635a2edc
1 changed files with 55 additions and 58 deletions
|
|
@ -294,8 +294,19 @@ class FileReader:
|
|||
"""
|
||||
Read a Chunk of up to 'size' bytes from the file.
|
||||
|
||||
This method tries to yield a Chunk of the requested size, if possible, by considering
|
||||
multiple chunks from the buffer.
|
||||
|
||||
The allocation type of the resulting chunk depends on the allocation types of the contributing chunks:
|
||||
- If one of the chunks is CH_DATA, it will create all-zero bytes for other chunks that are not CH_DATA
|
||||
- If all contributing chunks are CH_HOLE, the resulting chunk will also be CH_HOLE
|
||||
- If the contributing chunks are a mix of CH_HOLE and CH_ALLOC, the resulting chunk will be CH_HOLE
|
||||
|
||||
:param size: Number of bytes to read
|
||||
:return: Chunk object containing the read data. If no data is available, returns Chunk(None, size=0, allocation=CH_DATA).
|
||||
:return: Chunk object containing the read data.
|
||||
If no data is available, returns Chunk(None, size=0, allocation=CH_ALLOC).
|
||||
If less than requested bytes were available (at EOF), the returned chunk might be smaller
|
||||
than requested.
|
||||
"""
|
||||
# Initialize if not already done
|
||||
if self.blockify_gen is None:
|
||||
|
|
@ -314,83 +325,69 @@ class FileReader:
|
|||
if not self.buffer:
|
||||
return Chunk(None, size=0, allocation=CH_ALLOC)
|
||||
|
||||
# Get the first chunk from the buffer
|
||||
chunk = self.buffer[0]
|
||||
chunk_size = chunk.meta["size"]
|
||||
allocation = chunk.meta["allocation"]
|
||||
data = chunk.data
|
||||
|
||||
# If this is a non-data chunk, handle it specially
|
||||
if allocation != CH_DATA or data is None:
|
||||
# For non-data chunks, we return a Chunk with the allocation type and size
|
||||
size_to_return = min(size, chunk_size - self.offset)
|
||||
|
||||
# Update buffer state
|
||||
if size_to_return == chunk_size - self.offset:
|
||||
self.buffer.pop(0)
|
||||
self.offset = 0
|
||||
else:
|
||||
self.offset += size_to_return
|
||||
|
||||
self.remaining_bytes -= size_to_return
|
||||
|
||||
return Chunk(None, size=size_to_return, allocation=allocation)
|
||||
|
||||
# For data chunks, proceed as before
|
||||
# Prepare to collect the requested data
|
||||
result = bytearray()
|
||||
bytes_to_read = min(size, self.remaining_bytes)
|
||||
bytes_read = 0
|
||||
|
||||
# Read data from the buffer
|
||||
# Track if we've seen different allocation types
|
||||
has_data = False
|
||||
has_hole = False
|
||||
has_alloc = False
|
||||
|
||||
# Read data from the buffer, combining chunks as needed
|
||||
while bytes_read < bytes_to_read and self.buffer:
|
||||
chunk = self.buffer[0]
|
||||
chunk_size = chunk.meta["size"]
|
||||
allocation = chunk.meta["allocation"]
|
||||
data = chunk.data
|
||||
|
||||
# We now handle all chunk types, so no need to skip non-data chunks
|
||||
|
||||
# If this is a non-data chunk, break to handle it
|
||||
if allocation != CH_DATA or data is None:
|
||||
if bytes_read > 0:
|
||||
# We've already read some data, so return that first
|
||||
break
|
||||
else:
|
||||
# No data read yet, return info about this non-data chunk
|
||||
size_to_return = min(size, chunk_size - self.offset)
|
||||
|
||||
# Update buffer state
|
||||
if size_to_return == chunk_size - self.offset:
|
||||
self.buffer.pop(0)
|
||||
self.offset = 0
|
||||
else:
|
||||
self.offset += size_to_return
|
||||
|
||||
self.remaining_bytes -= size_to_return
|
||||
|
||||
return Chunk(None, size=size_to_return, allocation=allocation)
|
||||
# Track allocation types
|
||||
if allocation == CH_DATA:
|
||||
has_data = True
|
||||
elif allocation == CH_HOLE:
|
||||
has_hole = True
|
||||
elif allocation == CH_ALLOC:
|
||||
has_alloc = True
|
||||
else:
|
||||
raise ValueError(f"Invalid allocation type: {allocation}")
|
||||
|
||||
# Calculate how much we can read from this chunk
|
||||
available = chunk_size - self.offset
|
||||
to_read = min(available, bytes_to_read - bytes_read)
|
||||
|
||||
# Read the data
|
||||
if to_read > 0:
|
||||
# Process the chunk based on its allocation type
|
||||
if allocation == CH_DATA:
|
||||
assert data is not None
|
||||
# For data chunks, add the actual data
|
||||
result.extend(data[self.offset:self.offset + to_read])
|
||||
bytes_read += to_read
|
||||
else:
|
||||
# For non-data chunks, add zeros if we've seen a data chunk
|
||||
if has_data:
|
||||
result.extend(b'\0' * to_read)
|
||||
# Otherwise, we'll just track the size without adding data
|
||||
|
||||
# Update offset or remove chunk if fully consumed
|
||||
if to_read < available:
|
||||
self.offset += to_read
|
||||
else:
|
||||
self.offset = 0
|
||||
self.buffer.pop(0)
|
||||
bytes_read += to_read
|
||||
|
||||
self.remaining_bytes -= to_read
|
||||
# Update offset or remove chunk if fully consumed
|
||||
if to_read < available:
|
||||
self.offset += to_read
|
||||
else:
|
||||
self.offset = 0
|
||||
self.buffer.pop(0)
|
||||
|
||||
# Return a Chunk object with the data
|
||||
return Chunk(bytes(result), size=bytes_read, allocation=CH_DATA)
|
||||
self.remaining_bytes -= to_read
|
||||
|
||||
# Determine the allocation type of the resulting chunk
|
||||
if has_data:
|
||||
# If any chunk was CH_DATA, the result is CH_DATA
|
||||
return Chunk(bytes(result), size=bytes_read, allocation=CH_DATA)
|
||||
elif has_hole:
|
||||
# If any chunk was CH_HOLE (and none were CH_DATA), the result is CH_HOLE
|
||||
return Chunk(None, size=bytes_read, allocation=CH_HOLE)
|
||||
else:
|
||||
# Otherwise, all chunks were CH_ALLOC
|
||||
return Chunk(None, size=bytes_read, allocation=CH_ALLOC)
|
||||
|
||||
|
||||
class ChunkerFixed:
|
||||
|
|
|
|||
Loading…
Reference in a new issue