diff --git a/src/borg/chunkers/reader.pyx b/src/borg/chunkers/reader.pyx index 2a45e55ca..d33917a3a 100644 --- a/src/borg/chunkers/reader.pyx +++ b/src/borg/chunkers/reader.pyx @@ -137,7 +137,7 @@ class FileFMAPReader: if self.try_sparse: try: fmap = list(sparsemap(self.fd, self.fh)) - except OSError as err: + except (OSError, ValueError) as err: # seeking did not work pass @@ -170,6 +170,9 @@ class FileFMAPReader: # read block from the range data = dread(offset, wanted, self.fd, self.fh) got = len(data) + # Detect zero-filled blocks regardless of sparse mode. + # Zero detection is important to avoid reading/storing allocated zeros + # even when we are not using sparse file handling based on SEEK_HOLE/SEEK_DATA. if zeros.startswith(data): data = None allocation = CH_ALLOC diff --git a/src/borg/testsuite/chunkers/fixed_test.py b/src/borg/testsuite/chunkers/fixed_test.py index b8598a926..8fac894f1 100644 --- a/src/borg/testsuite/chunkers/fixed_test.py +++ b/src/borg/testsuite/chunkers/fixed_test.py @@ -40,7 +40,40 @@ def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse): fn = str(tmpdir / fname) make_sparsefile(fn, sparse_map, header_size=header_size) - get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size) + expected_content = make_content(sparse_map, header_size=header_size) + + # ChunkerFixed splits everything into fixed-size chunks (except maybe the header) + # We need to split the expected content similarly. + expected = [] + + # Handle header if present (it's the first item if header_size > 0) + if header_size > 0: + header = expected_content.pop(0) + expected.append(header) + + # Flatten the rest and split into 4096 chunks + current_chunk_size = 4096 + for item in expected_content: + if isinstance(item, int): + # Hole + count = item + while count > 0: + size = min(count, current_chunk_size) + expected.append(size) + count -= size + else: + # Data + data = item + while len(data) > 0: + size = min(len(data), current_chunk_size) + expected.append(data[:size]) + data = data[size:] + + if not sparse: + # if the chunker is not sparse-aware, it will read holes as zeros + expected = [b"\0" * x if isinstance(x, int) else x for x in expected] + + assert get_chunks(fn, sparse=sparse, header_size=header_size) == expected @pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1") diff --git a/src/borg/testsuite/chunkers/interaction_test.py b/src/borg/testsuite/chunkers/interaction_test.py index 45c175299..4417dc423 100644 --- a/src/borg/testsuite/chunkers/interaction_test.py +++ b/src/borg/testsuite/chunkers/interaction_test.py @@ -29,7 +29,7 @@ def test_reader_chunker_interaction(chunker_params): random_data = os.urandom(data_size // 3) + b"\0" * (data_size // 3) + os.urandom(data_size // 3) # Chunk the data - chunker = get_chunker(*chunker_params) + chunker = get_chunker(*chunker_params, sparse=True) data_file = BytesIO(random_data) chunks = list(chunker.chunkify(data_file)) diff --git a/src/borg/testsuite/chunkers/reader_test.py b/src/borg/testsuite/chunkers/reader_test.py index ff56350ee..f269d95cb 100644 --- a/src/borg/testsuite/chunkers/reader_test.py +++ b/src/borg/testsuite/chunkers/reader_test.py @@ -170,7 +170,7 @@ def test_filereader_read_with_mock(mock_chunks, read_size, expected_data, expect ) def test_filefmapreader_basic(file_content, read_size, expected_chunks): """Test basic functionality of FileFMAPReader with different file contents.""" - reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=False, fmap=None) + reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=True, fmap=None) # Collect all chunks from blockify chunks = list(reader.blockify()) @@ -252,7 +252,7 @@ def test_filefmapreader_allocation_types(zeros_length, read_size, expected_alloc # Create a file with all zeros file_content = b"\0" * zeros_length - reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=False, fmap=None) + reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=True, fmap=None) # Collect all chunks from blockify chunks = list(reader.blockify())