FileFMAPReader: add extensive test coverage

Includes unit tests for basic functionality, handling of custom file maps, allocation types, sparse file support, and `_build_fmap` method.
This commit is contained in:
Thomas Waldmann 2025-05-27 21:54:47 +02:00
parent e65755e114
commit 1c0d3eaa9f
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01

View file

@ -5,7 +5,7 @@ import tempfile
import pytest
from .chunker_test import cf
from ..chunker import Chunker, ChunkerFixed, sparsemap, has_seek_hole, ChunkerFailing, FileReader, Chunk
from ..chunker import Chunker, ChunkerFixed, sparsemap, has_seek_hole, ChunkerFailing, FileReader, FileFMAPReader, Chunk
from ..constants import * # NOQA
BS = 4096 # fs block size
@ -284,3 +284,190 @@ def test_filereader_read_with_mock(mock_chunks, read_size, expected_data, expect
assert chunk.data == expected_data
assert chunk.meta["allocation"] == expected_allocation
assert chunk.meta["size"] == expected_size
@pytest.mark.parametrize(
"file_content, read_size, expected_chunks",
[
# Empty file
(b"", 1024, []),
# Small data
(b"data", 1024, [{"data": b"data", "allocation": CH_DATA, "size": 4}]),
# Data larger than read_size
(
b"0123456789",
4,
[
{"data": b"0123", "allocation": CH_DATA, "size": 4},
{"data": b"4567", "allocation": CH_DATA, "size": 4},
{"data": b"89", "allocation": CH_DATA, "size": 2},
],
),
# Data with zeros (should be detected as allocated zeros)
(
b"data" + b"\0" * 8 + b"more",
4,
[
{"data": b"data", "allocation": CH_DATA, "size": 4},
{"data": None, "allocation": CH_ALLOC, "size": 4},
{"data": None, "allocation": CH_ALLOC, "size": 4},
{"data": b"more", "allocation": CH_DATA, "size": 4},
],
),
],
)
def test_filefmapreader_basic(file_content, read_size, expected_chunks):
"""Test basic functionality of FileFMAPReader with different file contents."""
reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=False, fmap=None)
# Collect all chunks from blockify
chunks = list(reader.blockify())
# Check the number of chunks
assert len(chunks) == len(expected_chunks)
# Check each chunk
for i, chunk in enumerate(chunks):
assert chunk.data == expected_chunks[i]["data"]
assert chunk.meta["allocation"] == expected_chunks[i]["allocation"]
assert chunk.meta["size"] == expected_chunks[i]["size"]
@pytest.mark.parametrize(
"file_content, fmap, read_size, expected_chunks",
[
# Custom fmap with data and holes
(
b"dataXXXXmore",
[(0, 4, True), (4, 4, False), (8, 4, True)],
4,
[
{"data": b"data", "allocation": CH_DATA, "size": 4},
{"data": None, "allocation": CH_HOLE, "size": 4},
{"data": b"more", "allocation": CH_DATA, "size": 4},
],
),
# Custom fmap with only holes
(
b"\0\0\0\0\0\0\0\0",
[(0, 8, False)],
4,
[{"data": None, "allocation": CH_HOLE, "size": 4}, {"data": None, "allocation": CH_HOLE, "size": 4}],
),
# Custom fmap with only data
(
b"datadata",
[(0, 8, True)],
4,
[{"data": b"data", "allocation": CH_DATA, "size": 4}, {"data": b"data", "allocation": CH_DATA, "size": 4}],
),
# Custom fmap with partial coverage (should seek to the right position)
(
b"skipthispartreadthispart",
[(12, 12, True)],
4,
[
{"data": b"read", "allocation": CH_DATA, "size": 4},
{"data": b"this", "allocation": CH_DATA, "size": 4},
{"data": b"part", "allocation": CH_DATA, "size": 4},
],
),
],
)
def test_filefmapreader_with_fmap(file_content, fmap, read_size, expected_chunks):
"""Test FileFMAPReader with an externally provided file map."""
reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=False, fmap=fmap)
# Collect all chunks from blockify
chunks = list(reader.blockify())
# Check the number of chunks
assert len(chunks) == len(expected_chunks)
# Check each chunk
for i, chunk in enumerate(chunks):
assert chunk.data == expected_chunks[i]["data"]
assert chunk.meta["allocation"] == expected_chunks[i]["allocation"]
assert chunk.meta["size"] == expected_chunks[i]["size"]
@pytest.mark.parametrize(
"zeros_length, read_size, expected_allocation",
[(4, 4, CH_ALLOC), (8192, 4096, CH_ALLOC)], # Small block of zeros # Large block of zeros
)
def test_filefmapreader_allocation_types(zeros_length, read_size, expected_allocation):
"""Test FileFMAPReader's handling of different allocation types."""
# Create a file with all zeros
file_content = b"\0" * zeros_length
reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=False, fmap=None)
# Collect all chunks from blockify
chunks = list(reader.blockify())
# Check that all chunks are of the expected allocation type
for chunk in chunks:
assert chunk.meta["allocation"] == expected_allocation
assert chunk.data is None # All-zero data should be None
@pytest.mark.skipif(not fs_supports_sparse(), reason="fs does not support sparse files")
def test_filefmapreader_with_real_sparse_file(tmpdir):
"""Test FileFMAPReader with a real sparse file."""
# Create a sparse file
fn = str(tmpdir / "sparse_file")
sparse_map = [(0, BS, True), (BS, 2 * BS, False), (3 * BS, BS, True)]
make_sparsefile(fn, sparse_map)
# Expected chunks when reading with sparse=True
expected_chunks_sparse = [
{"data_type": bytes, "allocation": CH_DATA, "size": BS},
{"data_type": type(None), "allocation": CH_HOLE, "size": BS},
{"data_type": type(None), "allocation": CH_HOLE, "size": BS},
{"data_type": bytes, "allocation": CH_DATA, "size": BS},
]
# Expected chunks when reading with sparse=False
expected_chunks_non_sparse = [
{"data_type": bytes, "allocation": CH_DATA, "size": BS},
{"data_type": bytes, "allocation": CH_DATA, "size": BS},
{"data_type": bytes, "allocation": CH_DATA, "size": BS},
{"data_type": bytes, "allocation": CH_DATA, "size": BS},
]
# Test with sparse=True
with open(fn, "rb") as fd:
reader = FileFMAPReader(fd=fd, fh=-1, read_size=BS, sparse=True, fmap=None)
chunks = list(reader.blockify())
assert len(chunks) == len(expected_chunks_sparse)
for i, chunk in enumerate(chunks):
assert isinstance(chunk.data, expected_chunks_sparse[i]["data_type"])
assert chunk.meta["allocation"] == expected_chunks_sparse[i]["allocation"]
assert chunk.meta["size"] == expected_chunks_sparse[i]["size"]
# Test with sparse=False
with open(fn, "rb") as fd:
reader = FileFMAPReader(fd=fd, fh=-1, read_size=BS, sparse=False, fmap=None)
chunks = list(reader.blockify())
assert len(chunks) == len(expected_chunks_non_sparse)
for i, chunk in enumerate(chunks):
assert isinstance(chunk.data, expected_chunks_non_sparse[i]["data_type"])
assert chunk.meta["allocation"] == expected_chunks_non_sparse[i]["allocation"]
assert chunk.meta["size"] == expected_chunks_non_sparse[i]["size"]
def test_filefmapreader_build_fmap():
"""Test FileFMAPReader's _build_fmap method."""
# Create a reader with sparse=False
reader = FileFMAPReader(fd=BytesIO(b"data"), fh=-1, read_size=4, sparse=False, fmap=None)
# Call _build_fmap
fmap = reader._build_fmap()
# Check that a default fmap is created
assert len(fmap) == 1
assert fmap[0][0] == 0 # start
assert fmap[0][1] == 2**62 # size
assert fmap[0][2] is True # is_data