Remove support for header_size in file readers.

The `header_size` parameter and related logic have been removed from file readers, simplifying their implementation. This change eliminates unnecessary complexity while maintaining all functional capabilities via `read_size` and `fmap`.
This commit is contained in:
Thomas Waldmann 2025-05-27 15:07:51 +02:00
parent f036152789
commit 1f03a776d5
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
2 changed files with 8 additions and 31 deletions

View file

@ -28,7 +28,6 @@ class FileFMAPReader:
fd: BinaryIO = None,
fh: int = -1,
read_size: int = 0,
header_size: int = 0,
sparse: bool = False,
fmap: List[fmap_entry] = None,
) -> None: ...
@ -42,7 +41,6 @@ class FileReader:
fd: BinaryIO = None,
fh: int = -1,
read_size: int = 0,
header_size: int = 0,
sparse: bool = False,
fmap: List[fmap_entry] = None,
) -> None: ...

View file

@ -171,24 +171,21 @@ class FileFMAPReader:
It optionally supports:
- a header block of different size
- using a sparsemap to read only data ranges and seek over hole ranges
for sparse files.
- using an externally given filemap to read only specific ranges from
a file.
Note: the last block of a data or hole range may be less than the block size,
Note: the last block of a data or hole range may be less than the read_size,
this is supported and not considered to be an error.
"""
def __init__(self, *, fd=None, fh=-1, read_size=0, header_size=0, sparse=False, fmap=None):
def __init__(self, *, fd=None, fh=-1, read_size=0, sparse=False, fmap=None):
assert fd is not None or fh >= 0
self.fd = fd
self.fh = fh
assert read_size > 0
assert read_size <= len(zeros)
self.read_size = read_size # how much data we want to read at once
assert header_size <= read_size
self.header_size = header_size # size of the first block
self.reading_time = 0.0 # time spent in reading/seeking
# should borg try to do sparse input processing?
# whether it actually can be done depends on the input file being seekable.
@ -200,45 +197,27 @@ class FileFMAPReader:
fmap = None
if self.try_sparse:
try:
if self.header_size > 0:
header_map = [(0, self.header_size, True), ]
dseek(self.header_size, os.SEEK_SET, self.fd, self.fh)
body_map = list(sparsemap(self.fd, self.fh))
dseek(0, os.SEEK_SET, self.fd, self.fh)
else:
header_map = []
body_map = list(sparsemap(self.fd, self.fh))
fmap = list(sparsemap(self.fd, self.fh))
except OSError as err:
# seeking did not work
pass
else:
fmap = header_map + body_map
if fmap is None:
# either sparse processing (building the fmap) was not tried or it failed.
# in these cases, we just build a "fake fmap" that considers the whole file
# as range(s) of data (no holes), so we can use the same code.
# we build different fmaps here for the purpose of correct block alignment
# with or without a header block (of potentially different size).
if self.header_size > 0:
header_map = [(0, self.header_size, True), ]
body_map = [(self.header_size, 2 ** 62, True), ]
else:
header_map = []
body_map = [(0, 2 ** 62, True), ]
fmap = header_map + body_map
fmap = [(0, 2 ** 62, True), ]
self.reading_time += time.monotonic() - started_fmap
return fmap
def blockify(self):
"""
Read <read_size> sized blocks from a file, optionally supporting a differently sized header block.
Read <read_size> sized blocks from a file.
"""
if self.fmap is None:
self.fmap = self._build_fmap()
offset = 0
# note: the optional header block is implemented via the first fmap entry
for range_start, range_size, is_data in self.fmap:
if range_start != offset:
# this is for the case when the fmap does not cover the file completely,
@ -280,8 +259,8 @@ class FileReader:
It maintains a buffer that is filled by using FileFMAPReader.blockify generator when needed.
The data in that buffer is consumed by clients calling FileReader.read.
"""
def __init__(self, *, fd=None, fh=-1, read_size=0, header_size=0, sparse=False, fmap=None):
self.reader = FileFMAPReader(fd=fd, fh=fh, read_size=read_size, header_size=header_size, sparse=sparse, fmap=fmap)
def __init__(self, *, fd=None, fh=-1, read_size=0, sparse=False, fmap=None):
self.reader = FileFMAPReader(fd=fd, fh=fh, read_size=read_size, sparse=sparse, fmap=fmap)
self.buffer = [] # list of (data, meta) tuples
self.offset = 0 # offset into the first buffer object's data
self.remaining_bytes = 0 # total bytes available in buffer
@ -457,7 +436,7 @@ class ChunkerFixed:
"""
# Initialize the reader with the file descriptors
self.reader = FileReader(fd=fd, fh=fh, read_size=self.reader_block_size,
header_size=self.header_size, sparse=self.sparse, fmap=fmap)
sparse=self.sparse, fmap=fmap)
# Handle header if present
if self.header_size > 0: