From 827b82938f21402aefb45e57ee95a29d34b9be7b Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Fri, 8 May 2026 23:58:54 +0200 Subject: [PATCH] FileReader.read: detect zero-filled slices from CH_DATA blocks When FileReader.read() sliced a large CH_DATA block (read at 1MB granularity) into smaller block_size chunks (e.g. 4096 bytes), zero-filled slices were returned as CH_DATA with zero bytes instead of CH_ALLOC. Add a zeros.startswith(result) check before returning a CH_DATA chunk, converting all-zero slices to CH_ALLOC. This ensures sparse-aware consumers correctly identify allocated-but-zero regions regardless of whether the file was read with sparse=True or sparse=False. --- src/borg/chunkers/reader.pyx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/borg/chunkers/reader.pyx b/src/borg/chunkers/reader.pyx index 115281d42..7d9e35e8e 100644 --- a/src/borg/chunkers/reader.pyx +++ b/src/borg/chunkers/reader.pyx @@ -327,7 +327,12 @@ class FileReader: # Determine the allocation type of the resulting chunk if has_data: - # If any chunk was CH_DATA, the result is CH_DATA + # If any chunk was CH_DATA, check if the result is all zeros. + # This can happen when a large CH_DATA block (read at read_size granularity) + # contains both real data and zero-filled regions, and we are slicing out + # a zero-filled portion at the block_size granularity. + if zeros.startswith(result): + return Chunk(None, size=bytes_read, allocation=CH_ALLOC) return Chunk(bytes(result), size=bytes_read, allocation=CH_DATA) elif has_hole: # If any chunk was CH_HOLE (and none were CH_DATA), the result is CH_HOLE