mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-10 17:32:13 -04:00
Merge pull request #8964 from ThomasWaldmann/issue-8963
fixing reader bug, fixes #8963
This commit is contained in:
commit
53c0d0a449
2 changed files with 75 additions and 4 deletions
|
|
@ -303,10 +303,11 @@ class FileReader:
|
|||
# For data chunks, add the actual data
|
||||
result.extend(data[self.offset:self.offset + to_read])
|
||||
else:
|
||||
# For non-data chunks, add zeros if we've seen a data chunk
|
||||
if has_data:
|
||||
result.extend(b'\0' * to_read)
|
||||
# Otherwise, we'll just track the size without adding data
|
||||
# For non-data chunks, always add zeros to the result.
|
||||
# We will only yield a CH_DATA chunk with the result bytes,
|
||||
# if there was at least one CH_DATA chunk contributing to the result,
|
||||
# otherwise we will yield a CH_HOLE or CH_ALLOC chunk.
|
||||
result.extend(b'\0' * to_read)
|
||||
|
||||
bytes_read += to_read
|
||||
|
||||
|
|
|
|||
70
src/borg/testsuite/chunkers/interaction_test.py
Normal file
70
src/borg/testsuite/chunkers/interaction_test.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
import os
|
||||
import pytest
|
||||
from io import BytesIO
|
||||
|
||||
from ...chunkers import get_chunker
|
||||
from ...constants import * # NOQA
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"chunker_params",
|
||||
[
|
||||
(CH_FIXED, 1048576, 0), # == reader_block_size
|
||||
(CH_FIXED, 1048576 // 2, 0), # reader_block_size / N
|
||||
(CH_FIXED, 1048576 * 2, 0), # N * reader_block_size
|
||||
(CH_FIXED, 1234567, 0), # does not fit well, larger than reader_block_size
|
||||
(CH_FIXED, 123456, 0), # does not fit well, smaller than reader_block_size
|
||||
(CH_BUZHASH, CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE),
|
||||
(CH_BUZHASH64, CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE),
|
||||
],
|
||||
)
|
||||
def test_reader_chunker_interaction(chunker_params):
|
||||
"""
|
||||
Test that chunking random/zero data produces chunks that can be reassembled to match the original data.
|
||||
|
||||
If one of these fails, there is likely a problem with buffer management.
|
||||
"""
|
||||
# Generate some data
|
||||
data_size = 6 * 12341234
|
||||
random_data = os.urandom(data_size // 3) + b"\0" * (data_size // 3) + os.urandom(data_size // 3)
|
||||
|
||||
# Chunk the data
|
||||
chunker = get_chunker(*chunker_params)
|
||||
data_file = BytesIO(random_data)
|
||||
chunks = list(chunker.chunkify(data_file))
|
||||
|
||||
data_chunks = 0
|
||||
hole_chunks = 0
|
||||
alloc_chunks = 0
|
||||
for chunk in chunks:
|
||||
if chunk.meta["allocation"] == CH_DATA:
|
||||
data_chunks += 1
|
||||
elif chunk.meta["allocation"] == CH_HOLE:
|
||||
hole_chunks += 1
|
||||
elif chunk.meta["allocation"] == CH_ALLOC:
|
||||
alloc_chunks += 1
|
||||
|
||||
assert data_chunks > 0, "No data chunks found"
|
||||
assert alloc_chunks > 0, "No alloc chunks found"
|
||||
assert hole_chunks == 0, "Hole chunks found, this is not expected!"
|
||||
|
||||
# Reassemble the chunks
|
||||
reassembled = BytesIO()
|
||||
for i, chunk in enumerate(chunks):
|
||||
if chunk.meta["allocation"] == CH_DATA:
|
||||
# For data chunks, write the actual data
|
||||
reassembled.write(bytes(chunk.data))
|
||||
elif chunk.meta["allocation"] in (CH_HOLE, CH_ALLOC):
|
||||
# For hole or alloc chunks, write zeros
|
||||
reassembled.write(b"\0" * chunk.meta["size"])
|
||||
|
||||
# Check that the reassembled data has the correct size
|
||||
reassembled_size = reassembled.tell()
|
||||
assert (
|
||||
reassembled_size == data_size
|
||||
), f"Reassembled data size ({reassembled_size}) does not equal original data size ({data_size})"
|
||||
|
||||
# Verify that the reassembled data matches the original data
|
||||
reassembled.seek(0)
|
||||
reassembled_data = reassembled.read()
|
||||
assert reassembled_data == random_data, "Reassembled data does not match original data"
|
||||
Loading…
Reference in a new issue