mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-11 01:41:57 -04:00
Chunker: integrate FileReader for unified read logic
Replaced inline file reading logic with `FileReader` to standardize handling across chunkers. Improved buffer updates and allocation handling for sparse files and optimized read operations.
This commit is contained in:
parent
1c0d3eaa9f
commit
ad6d0ef2e8
1 changed files with 20 additions and 25 deletions
|
|
@ -183,8 +183,7 @@ class FileFMAPReader:
|
|||
assert fd is not None or fh >= 0
|
||||
self.fd = fd
|
||||
self.fh = fh
|
||||
assert read_size > 0
|
||||
assert read_size <= len(zeros)
|
||||
assert 0 < read_size <= len(zeros)
|
||||
self.read_size = read_size # how much data we want to read at once
|
||||
self.reading_time = 0.0 # time spent in reading/seeking
|
||||
# should borg try to do sparse input processing?
|
||||
|
|
@ -263,6 +262,7 @@ class FileReader:
|
|||
not need to match the Chunk sizes we got from the FileFMAPReader.
|
||||
"""
|
||||
def __init__(self, *, fd=None, fh=-1, read_size=0, sparse=False, fmap=None):
|
||||
assert read_size > 0
|
||||
self.reader = FileFMAPReader(fd=fd, fh=fh, read_size=read_size, sparse=sparse, fmap=fmap)
|
||||
self.buffer = [] # list of Chunk objects
|
||||
self.offset = 0 # offset into the first buffer object's data
|
||||
|
|
@ -569,6 +569,8 @@ cdef class Chunker:
|
|||
cdef size_t min_size, buf_size, window_size, remaining, position, last
|
||||
cdef long long bytes_read, bytes_yielded # off_t in C, using long long for compatibility
|
||||
cdef readonly float chunking_time
|
||||
cdef object file_reader # FileReader instance
|
||||
cdef size_t reader_block_size
|
||||
|
||||
def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size):
|
||||
min_size = 1 << chunk_min_exp
|
||||
|
|
@ -593,6 +595,7 @@ cdef class Chunker:
|
|||
self.bytes_yielded = 0
|
||||
self._fd = None
|
||||
self.chunking_time = 0.0
|
||||
self.reader_block_size = 1024 * 1024
|
||||
|
||||
def __dealloc__(self):
|
||||
"""Free the chunker's resources."""
|
||||
|
|
@ -606,7 +609,7 @@ cdef class Chunker:
|
|||
cdef int fill(self) except 0:
|
||||
"""Fill the chunker's buffer with more data."""
|
||||
cdef ssize_t n
|
||||
cdef object data_py
|
||||
cdef object chunk
|
||||
|
||||
# Move remaining data to the beginning of the buffer
|
||||
memmove(self.data, self.data + self.last, self.position + self.remaining - self.last)
|
||||
|
|
@ -617,32 +620,23 @@ cdef class Chunker:
|
|||
if self.eof or n == 0:
|
||||
return 1
|
||||
|
||||
if self.fh >= 0:
|
||||
# Use OS-level file descriptor
|
||||
with nogil:
|
||||
n = read(self.fh, self.data + self.position + self.remaining, n)
|
||||
# Use FileReader to read data
|
||||
chunk = self.file_reader.read(n)
|
||||
n = chunk.meta["size"]
|
||||
|
||||
if n > 0:
|
||||
self.remaining += n
|
||||
self.bytes_read += n
|
||||
elif n == 0:
|
||||
self.eof = 1
|
||||
if n > 0:
|
||||
# Only copy data if it's not a hole
|
||||
if chunk.meta["allocation"] == CH_DATA:
|
||||
# Copy data from chunk to our buffer
|
||||
memcpy(self.data + self.position + self.remaining, <const unsigned char*>PyBytes_AsString(chunk.data), n)
|
||||
else:
|
||||
# Error occurred
|
||||
raise OSError(errno.errno, os.strerror(errno.errno))
|
||||
# For holes, fill with zeros
|
||||
memcpy(self.data + self.position + self.remaining, <const unsigned char*>PyBytes_AsString(zeros[:n]), n)
|
||||
|
||||
self.remaining += n
|
||||
self.bytes_read += n
|
||||
else:
|
||||
# Use Python file object
|
||||
data_py = self._fd.read(n)
|
||||
n = len(data_py)
|
||||
|
||||
if n:
|
||||
# Copy data from Python bytes to our buffer
|
||||
memcpy(self.data + self.position + self.remaining, <const unsigned char*>PyBytes_AsString(data_py), n)
|
||||
self.remaining += n
|
||||
self.bytes_read += n
|
||||
else:
|
||||
self.eof = 1
|
||||
self.eof = 1
|
||||
|
||||
return 1
|
||||
|
||||
|
|
@ -722,6 +716,7 @@ cdef class Chunker:
|
|||
"""
|
||||
self._fd = fd
|
||||
self.fh = fh
|
||||
self.file_reader = FileReader(fd=fd, fh=fh, read_size=self.reader_block_size)
|
||||
self.done = 0
|
||||
self.remaining = 0
|
||||
self.bytes_read = 0
|
||||
|
|
|
|||
Loading…
Reference in a new issue