_chunker.c: fix some bugs

- better check return value of fd.read(n) and reject if it returns more bytes than requested.
- avoid giving len<=0 to posix_fadvise(), which could drop the rest of the file from cache.
- buzhash: check for len == 0 edge case
- correctly Py_DECREF in cases of errors
- check for malloc/calloc failures
This commit is contained in:
Thomas Waldmann 2026-04-20 03:37:28 +02:00
parent b055b713af
commit 37f66f11ab
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
2 changed files with 31 additions and 6 deletions

View file

@ -87,6 +87,9 @@ buzhash(const unsigned char *data, size_t len, const uint32_t *h)
{
uint32_t i;
uint32_t sum = 0, imod;
if (len == 0) {
return 0;
}
for(i = len - 1; i > 0; i--)
{
imod = i & 0x1f;
@ -118,12 +121,24 @@ static Chunker *
chunker_init(size_t window_size, uint32_t chunk_mask, size_t min_size, size_t max_size, uint32_t seed)
{
Chunker *c = calloc(sizeof(Chunker), 1);
if(!c) {
return NULL;
}
c->window_size = window_size;
c->chunk_mask = chunk_mask;
c->min_size = min_size;
c->table = buzhash_init_table(seed);
if(!c->table) {
free(c);
return NULL;
}
c->buf_size = max_size;
c->data = malloc(c->buf_size);
if(!c->data) {
free(c->table);
free(c);
return NULL;
}
c->fh = -1;
return c;
}
@ -219,7 +234,9 @@ chunker_fill(Chunker *c)
overshoot = 0;
}
posix_fadvise(c->fh, offset & ~pagemask, length - overshoot, POSIX_FADV_DONTNEED);
if (length - overshoot > 0 || length == 0) {
posix_fadvise(c->fh, offset & ~pagemask, length - overshoot, POSIX_FADV_DONTNEED);
}
#endif
PyEval_RestoreThread(thread_state);
@ -230,15 +247,21 @@ chunker_fill(Chunker *c)
if(!data) {
return 0;
}
n = PyBytes_Size(data);
ssize_t read_bytes = PyBytes_Size(data);
if(PyErr_Occurred()) {
// we wanted bytes(), but got something else
Py_DECREF(data);
return 0;
}
if(n) {
memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), n);
c->remaining += n;
c->bytes_read += n;
if(read_bytes > n) {
Py_DECREF(data);
PyErr_SetString(PyExc_ValueError, "read() returned too many bytes");
return 0;
}
if(read_bytes) {
memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), read_bytes);
c->remaining += read_bytes;
c->bytes_read += read_bytes;
}
else {
c->eof = 1;

View file

@ -247,6 +247,8 @@ cdef class Chunker:
assert hash_window_size + min_size + 1 <= max_size, "too small max_size"
hash_mask = (1 << hash_mask_bits) - 1
self.chunker = chunker_init(hash_window_size, hash_mask, min_size, max_size, seed & 0xffffffff)
if not self.chunker:
raise MemoryError('chunker_init failed')
def chunkify(self, fd, fh=-1):
"""