mirror of
https://github.com/borgbackup/borg.git
synced 2026-05-23 10:36:32 -04:00
Merge pull request #9589 from ThomasWaldmann/chunkerfixed-sparse-handling-tests-master
Some checks are pending
Lint / lint (push) Waiting to run
CI / lint (push) Waiting to run
CI / security (push) Waiting to run
CI / asan_ubsan (push) Blocked by required conditions
CI / native_tests (push) Blocked by required conditions
CI / vm_tests (Haiku, false, haiku, r1beta5) (push) Blocked by required conditions
CI / vm_tests (NetBSD, false, netbsd, 10.1) (push) Blocked by required conditions
CI / vm_tests (OmniOS, false, omnios, r151056) (push) Blocked by required conditions
CI / vm_tests (OpenBSD, false, openbsd, 7.8) (push) Blocked by required conditions
CI / vm_tests (borg-freebsd-14-x86_64-gh, FreeBSD, true, freebsd, 14.3) (push) Blocked by required conditions
CI / windows_tests (push) Blocked by required conditions
CodeQL / Analyze (push) Waiting to run
Some checks are pending
Lint / lint (push) Waiting to run
CI / lint (push) Waiting to run
CI / security (push) Waiting to run
CI / asan_ubsan (push) Blocked by required conditions
CI / native_tests (push) Blocked by required conditions
CI / vm_tests (Haiku, false, haiku, r1beta5) (push) Blocked by required conditions
CI / vm_tests (NetBSD, false, netbsd, 10.1) (push) Blocked by required conditions
CI / vm_tests (OmniOS, false, omnios, r151056) (push) Blocked by required conditions
CI / vm_tests (OpenBSD, false, openbsd, 7.8) (push) Blocked by required conditions
CI / vm_tests (borg-freebsd-14-x86_64-gh, FreeBSD, true, freebsd, 14.3) (push) Blocked by required conditions
CI / windows_tests (push) Blocked by required conditions
CodeQL / Analyze (push) Waiting to run
ChunkerFixed sparse handling tests (master)
This commit is contained in:
commit
bae103637d
4 changed files with 102 additions and 13 deletions
|
|
@ -137,8 +137,11 @@ class FileFMAPReader:
|
|||
if self.try_sparse:
|
||||
try:
|
||||
fmap = list(sparsemap(self.fd, self.fh))
|
||||
except OSError as err:
|
||||
# seeking did not work
|
||||
except (OSError, ValueError) as err:
|
||||
# Building a sparse map failed:
|
||||
# - OSError: low-level lseek with SEEK_HOLE/SEEK_DATA not supported by FS/OS.
|
||||
# - ValueError: high-level file objects (e.g. io.BytesIO or some fd wrappers)
|
||||
# don't accept SEEK_HOLE/SEEK_DATA as a valid "whence" and raise ValueError.
|
||||
pass
|
||||
|
||||
if fmap is None:
|
||||
|
|
@ -170,6 +173,9 @@ class FileFMAPReader:
|
|||
# read block from the range
|
||||
data = dread(offset, wanted, self.fd, self.fh)
|
||||
got = len(data)
|
||||
# Detect zero-filled blocks regardless of sparse mode.
|
||||
# Zero detection is important to avoid reading/storing allocated zeros
|
||||
# even when we are not using sparse file handling based on SEEK_HOLE/SEEK_DATA.
|
||||
if zeros.startswith(data):
|
||||
data = None
|
||||
allocation = CH_ALLOC
|
||||
|
|
@ -321,7 +327,12 @@ class FileReader:
|
|||
|
||||
# Determine the allocation type of the resulting chunk
|
||||
if has_data:
|
||||
# If any chunk was CH_DATA, the result is CH_DATA
|
||||
# If any chunk was CH_DATA, check if the result is all zeros.
|
||||
# This can happen when a large CH_DATA block (read at read_size granularity)
|
||||
# contains both real data and zero-filled regions, and we are slicing out
|
||||
# a zero-filled portion at the block_size granularity.
|
||||
if zeros.startswith(result):
|
||||
return Chunk(None, size=bytes_read, allocation=CH_ALLOC)
|
||||
return Chunk(bytes(result), size=bytes_read, allocation=CH_DATA)
|
||||
elif has_hole:
|
||||
# If any chunk was CH_HOLE (and none were CH_DATA), the result is CH_HOLE
|
||||
|
|
|
|||
|
|
@ -77,14 +77,38 @@ def fs_supports_sparse():
|
|||
BS = 4096 # filesystem block size
|
||||
|
||||
# Some sparse files. X = content blocks, _ = sparse blocks.
|
||||
# Block size must always be BS.
|
||||
|
||||
# X__XXX____
|
||||
map_sparse1 = [(0 * BS, 1 * BS, True), (1 * BS, 2 * BS, False), (3 * BS, 3 * BS, True), (6 * BS, 4 * BS, False)]
|
||||
map_sparse1 = [
|
||||
(0, BS, True),
|
||||
(1 * BS, BS, False),
|
||||
(2 * BS, BS, False),
|
||||
(3 * BS, BS, True),
|
||||
(4 * BS, BS, True),
|
||||
(5 * BS, BS, True),
|
||||
(6 * BS, BS, False),
|
||||
(7 * BS, BS, False),
|
||||
(8 * BS, BS, False),
|
||||
(9 * BS, BS, False),
|
||||
]
|
||||
|
||||
# _XX___XXXX
|
||||
map_sparse2 = [(0 * BS, 1 * BS, False), (1 * BS, 2 * BS, True), (3 * BS, 3 * BS, False), (6 * BS, 4 * BS, True)]
|
||||
map_sparse2 = [
|
||||
(0, BS, False),
|
||||
(1 * BS, BS, True),
|
||||
(2 * BS, BS, True),
|
||||
(3 * BS, BS, False),
|
||||
(4 * BS, BS, False),
|
||||
(5 * BS, BS, False),
|
||||
(6 * BS, BS, True),
|
||||
(7 * BS, BS, True),
|
||||
(8 * BS, BS, True),
|
||||
(9 * BS, BS, True),
|
||||
]
|
||||
|
||||
# XXX
|
||||
map_notsparse = [(0 * BS, 3 * BS, True)]
|
||||
map_notsparse = [(0, BS, True), (BS, BS, True), (2 * BS, BS, True)]
|
||||
|
||||
# ___
|
||||
map_onlysparse = [(0 * BS, 3 * BS, False)]
|
||||
map_onlysparse = [(0, BS, False), (BS, BS, False), (2 * BS, BS, False)]
|
||||
|
|
|
|||
|
|
@ -4,13 +4,42 @@ import random
|
|||
|
||||
import pytest
|
||||
|
||||
from . import cf, cf_expand, make_sparsefile, make_content, fs_supports_sparse
|
||||
from . import cf, cf_expand, make_sparsefile, make_content
|
||||
from . import BS, map_sparse1, map_sparse2, map_onlysparse, map_notsparse
|
||||
from ...chunkers import ChunkerFixed
|
||||
from ...constants import * # NOQA
|
||||
|
||||
|
||||
@pytest.mark.skipif(not fs_supports_sparse(), reason="filesystem does not support sparse files")
|
||||
def pretty_print(msg, items):
|
||||
"""
|
||||
Pretty-print the result of get_chunks.
|
||||
|
||||
For each element in the sequence:
|
||||
- If it's a bytes object consisting solely of b"H", print "header length: X" where X is its length.
|
||||
- If it's a bytes object consisting solely of b"X", print "body length: X" where X is its length.
|
||||
- If it's an int, print "sparse: length: X" where X is the integer value (interpreted as a length).
|
||||
"""
|
||||
print(msg)
|
||||
print("-" * len(msg))
|
||||
for item in items:
|
||||
if isinstance(item, bytes):
|
||||
# Detect sequences of only 'H' (header) or only 'X' (body)
|
||||
if item.replace(b"H", b"") == b"":
|
||||
print(f"header({len(item)})")
|
||||
elif item.replace(b"X", b"") == b"":
|
||||
print(f"body({len(item)})")
|
||||
elif item.replace(b"\0", b"") == b"":
|
||||
print(f"zeros({len(item)})")
|
||||
else:
|
||||
# Fallback: unknown content, print as body with its length
|
||||
print(f"other({len(item)})")
|
||||
elif isinstance(item, int):
|
||||
print(f"sparse({item})")
|
||||
else:
|
||||
# Unexpected element type, just print a generic line.
|
||||
print(f"???({item})")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fname, sparse_map, header_size, sparse",
|
||||
[
|
||||
|
|
@ -34,13 +63,19 @@ from ...constants import * # NOQA
|
|||
)
|
||||
def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse):
|
||||
def get_chunks(fname, sparse, header_size):
|
||||
chunker = ChunkerFixed(4096, header_size=header_size, sparse=sparse)
|
||||
chunker = ChunkerFixed(BS, header_size=header_size, sparse=sparse)
|
||||
with open(fname, "rb") as fd:
|
||||
return cf(chunker.chunkify(fd))
|
||||
|
||||
# this only works if sparse map blocks are same size as fixed chunker blocks
|
||||
fn = str(tmpdir / fname)
|
||||
make_sparsefile(fn, sparse_map, header_size=header_size)
|
||||
get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size)
|
||||
expected_content = make_content(sparse_map, header_size=header_size)
|
||||
got_chunks = get_chunks(fn, sparse=sparse, header_size=header_size)
|
||||
print(f"sparse: {sparse}")
|
||||
pretty_print("expected", expected_content)
|
||||
pretty_print("got", got_chunks)
|
||||
assert expected_content == got_chunks
|
||||
|
||||
|
||||
@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1")
|
||||
|
|
|
|||
|
|
@ -9,6 +9,22 @@ from ...chunkers import sparsemap, FileReader, FileFMAPReader, Chunk
|
|||
from ...constants import * # NOQA
|
||||
|
||||
|
||||
def coalesce_sparse_map(sparse_map):
|
||||
"""Coalesce adjacent ranges with the same is_data flag, as the OS would report them."""
|
||||
if not sparse_map:
|
||||
return []
|
||||
result = []
|
||||
start, size, is_data = sparse_map[0]
|
||||
for next_start, next_size, next_is_data in sparse_map[1:]:
|
||||
if next_is_data == is_data:
|
||||
size += next_size
|
||||
else:
|
||||
result.append((start, size, is_data))
|
||||
start, size, is_data = next_start, next_size, next_is_data
|
||||
result.append((start, size, is_data))
|
||||
return result
|
||||
|
||||
|
||||
@pytest.mark.skipif(not fs_supports_sparse(), reason="filesystem does not support sparse files")
|
||||
@pytest.mark.parametrize(
|
||||
"fname, sparse_map",
|
||||
|
|
@ -28,8 +44,11 @@ def test_sparsemap(tmpdir, fname, sparse_map):
|
|||
|
||||
fn = str(tmpdir / fname)
|
||||
make_sparsefile(fn, sparse_map)
|
||||
assert get_sparsemap_fh(fn) == sparse_map
|
||||
assert get_sparsemap_fd(fn) == sparse_map
|
||||
# The OS coalesces adjacent ranges of the same type (data or hole),
|
||||
# so we compare against the coalesced version of the expected map.
|
||||
expected = coalesce_sparse_map(sparse_map)
|
||||
assert get_sparsemap_fh(fn) == expected
|
||||
assert get_sparsemap_fd(fn) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
|
|
|||
Loading…
Reference in a new issue