From 1f03a776d561a6ebd4ee07f58f336449e89b66d3 Mon Sep 17 00:00:00 2001
From: Thomas Waldmann <tw@waldmann-edv.de>
Date: Tue, 27 May 2025 15:07:51 +0200
Subject: [PATCH] Remove support for `header_size` in file readers.

The `header_size` parameter and related logic have been removed from file readers, simplifying their implementation. This change eliminates unnecessary complexity while maintaining all functional capabilities via `read_size` and `fmap`.
---
 src/borg/chunker.pyi |  2 --
 src/borg/chunker.pyx | 37 ++++++++-----------------------------
 2 files changed, 8 insertions(+), 31 deletions(-)

diff --git a/src/borg/chunker.pyi b/src/borg/chunker.pyi
index 1ba388046..7026a436f 100644
--- a/src/borg/chunker.pyi
+++ b/src/borg/chunker.pyi
@@ -28,7 +28,6 @@ class FileFMAPReader:
         fd: BinaryIO = None,
         fh: int = -1,
         read_size: int = 0,
-        header_size: int = 0,
         sparse: bool = False,
         fmap: List[fmap_entry] = None,
     ) -> None: ...
@@ -42,7 +41,6 @@ class FileReader:
         fd: BinaryIO = None,
         fh: int = -1,
         read_size: int = 0,
-        header_size: int = 0,
         sparse: bool = False,
         fmap: List[fmap_entry] = None,
     ) -> None: ...
diff --git a/src/borg/chunker.pyx b/src/borg/chunker.pyx
index ed3ae22db..e33fca242 100644
--- a/src/borg/chunker.pyx
+++ b/src/borg/chunker.pyx
@@ -171,24 +171,21 @@ class FileFMAPReader:
 
     It optionally supports:
 
-    - a header block of different size
     - using a sparsemap to read only data ranges and seek over hole ranges
       for sparse files.
     - using an externally given filemap to read only specific ranges from
       a file.
 
-    Note: the last block of a data or hole range may be less than the block size,
+    Note: the last block of a data or hole range may be less than the read_size,
           this is supported and not considered to be an error.
     """
-    def __init__(self, *, fd=None, fh=-1, read_size=0, header_size=0, sparse=False, fmap=None):
+    def __init__(self, *, fd=None, fh=-1, read_size=0, sparse=False, fmap=None):
         assert fd is not None or fh >= 0
         self.fd = fd
         self.fh = fh
         assert read_size > 0
         assert read_size <= len(zeros)
         self.read_size = read_size  # how much data we want to read at once
-        assert header_size <= read_size
-        self.header_size = header_size  # size of the first block
         self.reading_time = 0.0  # time spent in reading/seeking
         # should borg try to do sparse input processing?
         # whether it actually can be done depends on the input file being seekable.
@@ -200,45 +197,27 @@ class FileFMAPReader:
         fmap = None
         if self.try_sparse:
             try:
-                if self.header_size > 0:
-                    header_map = [(0, self.header_size, True), ]
-                    dseek(self.header_size, os.SEEK_SET, self.fd, self.fh)
-                    body_map = list(sparsemap(self.fd, self.fh))
-                    dseek(0, os.SEEK_SET, self.fd, self.fh)
-                else:
-                    header_map = []
-                    body_map = list(sparsemap(self.fd, self.fh))
+                fmap = list(sparsemap(self.fd, self.fh))
             except OSError as err:
                 # seeking did not work
                 pass
-            else:
-                fmap = header_map + body_map
 
         if fmap is None:
             # either sparse processing (building the fmap) was not tried or it failed.
             # in these cases, we just build a "fake fmap" that considers the whole file
             # as range(s) of data (no holes), so we can use the same code.
-            # we build different fmaps here for the purpose of correct block alignment
-            # with or without a header block (of potentially different size).
-            if self.header_size > 0:
-                header_map = [(0, self.header_size, True), ]
-                body_map = [(self.header_size, 2 ** 62, True), ]
-            else:
-                header_map = []
-                body_map = [(0, 2 ** 62, True), ]
-            fmap = header_map + body_map
+            fmap = [(0, 2 ** 62, True), ]
         self.reading_time += time.monotonic() - started_fmap
         return fmap
 
     def blockify(self):
         """
-        Read <read_size> sized blocks from a file, optionally supporting a differently sized header block.
+        Read <read_size> sized blocks from a file.
         """
         if self.fmap is None:
             self.fmap = self._build_fmap()
 
         offset = 0
-        # note: the optional header block is implemented via the first fmap entry
         for range_start, range_size, is_data in self.fmap:
             if range_start != offset:
                 # this is for the case when the fmap does not cover the file completely,
@@ -280,8 +259,8 @@ class FileReader:
     It maintains a buffer that is filled by using FileFMAPReader.blockify generator when needed.
     The data in that buffer is consumed by clients calling FileReader.read.
     """
-    def __init__(self, *, fd=None, fh=-1, read_size=0, header_size=0, sparse=False, fmap=None):
-        self.reader = FileFMAPReader(fd=fd, fh=fh, read_size=read_size, header_size=header_size, sparse=sparse, fmap=fmap)
+    def __init__(self, *, fd=None, fh=-1, read_size=0, sparse=False, fmap=None):
+        self.reader = FileFMAPReader(fd=fd, fh=fh, read_size=read_size, sparse=sparse, fmap=fmap)
         self.buffer = []  # list of (data, meta) tuples
         self.offset = 0  # offset into the first buffer object's data
         self.remaining_bytes = 0  # total bytes available in buffer
@@ -457,7 +436,7 @@ class ChunkerFixed:
         """
         # Initialize the reader with the file descriptors
         self.reader = FileReader(fd=fd, fh=fh, read_size=self.reader_block_size,
-                                header_size=self.header_size, sparse=self.sparse, fmap=fmap)
+                                sparse=self.sparse, fmap=fmap)
 
         # Handle header if present
         if self.header_size > 0: