Merge pull request #9586 from ThomasWaldmann/fix-chunker-1.4

chunker fixes 1.4
This commit is contained in:
TW 2026-05-04 21:57:34 +02:00 committed by GitHub
commit bb7bc62fe8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 31 additions and 6 deletions

View file

@ -87,6 +87,9 @@ buzhash(const unsigned char *data, size_t len, const uint32_t *h)
{
uint32_t i;
uint32_t sum = 0, imod;
if (len == 0) {
return 0;
}
for(i = len - 1; i > 0; i--)
{
imod = i & 0x1f;
@ -118,12 +121,24 @@ static Chunker *
chunker_init(size_t window_size, uint32_t chunk_mask, size_t min_size, size_t max_size, uint32_t seed)
{
Chunker *c = calloc(sizeof(Chunker), 1);
if(!c) {
return NULL;
}
c->window_size = window_size;
c->chunk_mask = chunk_mask;
c->min_size = min_size;
c->table = buzhash_init_table(seed);
if(!c->table) {
free(c);
return NULL;
}
c->buf_size = max_size;
c->data = malloc(c->buf_size);
if(!c->data) {
free(c->table);
free(c);
return NULL;
}
c->fh = -1;
return c;
}
@ -219,7 +234,9 @@ chunker_fill(Chunker *c)
overshoot = 0;
}
posix_fadvise(c->fh, offset & ~pagemask, length - overshoot, POSIX_FADV_DONTNEED);
if (length - overshoot > 0 || length == 0) {
posix_fadvise(c->fh, offset & ~pagemask, length - overshoot, POSIX_FADV_DONTNEED);
}
#endif
PyEval_RestoreThread(thread_state);
@ -230,15 +247,21 @@ chunker_fill(Chunker *c)
if(!data) {
return 0;
}
n = PyBytes_Size(data);
ssize_t read_bytes = PyBytes_Size(data);
if(PyErr_Occurred()) {
// we wanted bytes(), but got something else
Py_DECREF(data);
return 0;
}
if(n) {
memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), n);
c->remaining += n;
c->bytes_read += n;
if(read_bytes > n) {
Py_DECREF(data);
PyErr_SetString(PyExc_ValueError, "read() returned too many bytes");
return 0;
}
if(read_bytes) {
memcpy(c->data + c->position + c->remaining, PyBytes_AsString(data), read_bytes);
c->remaining += read_bytes;
c->bytes_read += read_bytes;
}
else {
c->eof = 1;

View file

@ -247,6 +247,8 @@ cdef class Chunker:
assert hash_window_size + min_size + 1 <= max_size, "too small max_size"
hash_mask = (1 << hash_mask_bits) - 1
self.chunker = chunker_init(hash_window_size, hash_mask, min_size, max_size, seed & 0xffffffff)
if not self.chunker:
raise MemoryError('chunker_init failed')
def chunkify(self, fd, fh=-1):
"""