Merge pull request #9589 from ThomasWaldmann/chunkerfixed-sparse-handling-tests-master
Some checks are pending
Lint / lint (push) Waiting to run
CI / lint (push) Waiting to run
CI / security (push) Waiting to run
CI / asan_ubsan (push) Blocked by required conditions
CI / native_tests (push) Blocked by required conditions
CI / vm_tests (Haiku, false, haiku, r1beta5) (push) Blocked by required conditions
CI / vm_tests (NetBSD, false, netbsd, 10.1) (push) Blocked by required conditions
CI / vm_tests (OmniOS, false, omnios, r151056) (push) Blocked by required conditions
CI / vm_tests (OpenBSD, false, openbsd, 7.8) (push) Blocked by required conditions
CI / vm_tests (borg-freebsd-14-x86_64-gh, FreeBSD, true, freebsd, 14.3) (push) Blocked by required conditions
CI / windows_tests (push) Blocked by required conditions
CodeQL / Analyze (push) Waiting to run

ChunkerFixed sparse handling tests (master)
This commit is contained in:
TW 2026-05-10 08:49:36 +02:00 committed by GitHub
commit bae103637d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 102 additions and 13 deletions

View file

@ -137,8 +137,11 @@ class FileFMAPReader:
if self.try_sparse:
try:
fmap = list(sparsemap(self.fd, self.fh))
except OSError as err:
# seeking did not work
except (OSError, ValueError) as err:
# Building a sparse map failed:
# - OSError: low-level lseek with SEEK_HOLE/SEEK_DATA not supported by FS/OS.
# - ValueError: high-level file objects (e.g. io.BytesIO or some fd wrappers)
# don't accept SEEK_HOLE/SEEK_DATA as a valid "whence" and raise ValueError.
pass
if fmap is None:
@ -170,6 +173,9 @@ class FileFMAPReader:
# read block from the range
data = dread(offset, wanted, self.fd, self.fh)
got = len(data)
# Detect zero-filled blocks regardless of sparse mode.
# Zero detection is important to avoid reading/storing allocated zeros
# even when we are not using sparse file handling based on SEEK_HOLE/SEEK_DATA.
if zeros.startswith(data):
data = None
allocation = CH_ALLOC
@ -321,7 +327,12 @@ class FileReader:
# Determine the allocation type of the resulting chunk
if has_data:
# If any chunk was CH_DATA, the result is CH_DATA
# If any chunk was CH_DATA, check if the result is all zeros.
# This can happen when a large CH_DATA block (read at read_size granularity)
# contains both real data and zero-filled regions, and we are slicing out
# a zero-filled portion at the block_size granularity.
if zeros.startswith(result):
return Chunk(None, size=bytes_read, allocation=CH_ALLOC)
return Chunk(bytes(result), size=bytes_read, allocation=CH_DATA)
elif has_hole:
# If any chunk was CH_HOLE (and none were CH_DATA), the result is CH_HOLE

View file

@ -77,14 +77,38 @@ def fs_supports_sparse():
BS = 4096 # filesystem block size
# Some sparse files. X = content blocks, _ = sparse blocks.
# Block size must always be BS.
# X__XXX____
map_sparse1 = [(0 * BS, 1 * BS, True), (1 * BS, 2 * BS, False), (3 * BS, 3 * BS, True), (6 * BS, 4 * BS, False)]
map_sparse1 = [
(0, BS, True),
(1 * BS, BS, False),
(2 * BS, BS, False),
(3 * BS, BS, True),
(4 * BS, BS, True),
(5 * BS, BS, True),
(6 * BS, BS, False),
(7 * BS, BS, False),
(8 * BS, BS, False),
(9 * BS, BS, False),
]
# _XX___XXXX
map_sparse2 = [(0 * BS, 1 * BS, False), (1 * BS, 2 * BS, True), (3 * BS, 3 * BS, False), (6 * BS, 4 * BS, True)]
map_sparse2 = [
(0, BS, False),
(1 * BS, BS, True),
(2 * BS, BS, True),
(3 * BS, BS, False),
(4 * BS, BS, False),
(5 * BS, BS, False),
(6 * BS, BS, True),
(7 * BS, BS, True),
(8 * BS, BS, True),
(9 * BS, BS, True),
]
# XXX
map_notsparse = [(0 * BS, 3 * BS, True)]
map_notsparse = [(0, BS, True), (BS, BS, True), (2 * BS, BS, True)]
# ___
map_onlysparse = [(0 * BS, 3 * BS, False)]
map_onlysparse = [(0, BS, False), (BS, BS, False), (2 * BS, BS, False)]

View file

@ -4,13 +4,42 @@ import random
import pytest
from . import cf, cf_expand, make_sparsefile, make_content, fs_supports_sparse
from . import cf, cf_expand, make_sparsefile, make_content
from . import BS, map_sparse1, map_sparse2, map_onlysparse, map_notsparse
from ...chunkers import ChunkerFixed
from ...constants import * # NOQA
@pytest.mark.skipif(not fs_supports_sparse(), reason="filesystem does not support sparse files")
def pretty_print(msg, items):
"""
Pretty-print the result of get_chunks.
For each element in the sequence:
- If it's a bytes object consisting solely of b"H", print "header length: X" where X is its length.
- If it's a bytes object consisting solely of b"X", print "body length: X" where X is its length.
- If it's an int, print "sparse: length: X" where X is the integer value (interpreted as a length).
"""
print(msg)
print("-" * len(msg))
for item in items:
if isinstance(item, bytes):
# Detect sequences of only 'H' (header) or only 'X' (body)
if item.replace(b"H", b"") == b"":
print(f"header({len(item)})")
elif item.replace(b"X", b"") == b"":
print(f"body({len(item)})")
elif item.replace(b"\0", b"") == b"":
print(f"zeros({len(item)})")
else:
# Fallback: unknown content, print as body with its length
print(f"other({len(item)})")
elif isinstance(item, int):
print(f"sparse({item})")
else:
# Unexpected element type, just print a generic line.
print(f"???({item})")
@pytest.mark.parametrize(
"fname, sparse_map, header_size, sparse",
[
@ -34,13 +63,19 @@ from ...constants import * # NOQA
)
def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse):
def get_chunks(fname, sparse, header_size):
chunker = ChunkerFixed(4096, header_size=header_size, sparse=sparse)
chunker = ChunkerFixed(BS, header_size=header_size, sparse=sparse)
with open(fname, "rb") as fd:
return cf(chunker.chunkify(fd))
# this only works if sparse map blocks are same size as fixed chunker blocks
fn = str(tmpdir / fname)
make_sparsefile(fn, sparse_map, header_size=header_size)
get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size)
expected_content = make_content(sparse_map, header_size=header_size)
got_chunks = get_chunks(fn, sparse=sparse, header_size=header_size)
print(f"sparse: {sparse}")
pretty_print("expected", expected_content)
pretty_print("got", got_chunks)
assert expected_content == got_chunks
@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1")

View file

@ -9,6 +9,22 @@ from ...chunkers import sparsemap, FileReader, FileFMAPReader, Chunk
from ...constants import * # NOQA
def coalesce_sparse_map(sparse_map):
"""Coalesce adjacent ranges with the same is_data flag, as the OS would report them."""
if not sparse_map:
return []
result = []
start, size, is_data = sparse_map[0]
for next_start, next_size, next_is_data in sparse_map[1:]:
if next_is_data == is_data:
size += next_size
else:
result.append((start, size, is_data))
start, size, is_data = next_start, next_size, next_is_data
result.append((start, size, is_data))
return result
@pytest.mark.skipif(not fs_supports_sparse(), reason="filesystem does not support sparse files")
@pytest.mark.parametrize(
"fname, sparse_map",
@ -28,8 +44,11 @@ def test_sparsemap(tmpdir, fname, sparse_map):
fn = str(tmpdir / fname)
make_sparsefile(fn, sparse_map)
assert get_sparsemap_fh(fn) == sparse_map
assert get_sparsemap_fd(fn) == sparse_map
# The OS coalesces adjacent ranges of the same type (data or hole),
# so we compare against the coalesced version of the expected map.
expected = coalesce_sparse_map(sparse_map)
assert get_sparsemap_fh(fn) == expected
assert get_sparsemap_fd(fn) == expected
@pytest.mark.parametrize(