mirror of
https://github.com/borgbackup/borg.git
synced 2026-06-11 01:41:57 -04:00
Merge pull request #1077 from ThomasWaldmann/do-not-chunk-small-files
chunker: speed up remainder <= min_size case
This commit is contained in:
commit
33f3a70cf6
3 changed files with 15 additions and 5 deletions
|
|
@ -96,7 +96,7 @@ buzhash(const unsigned char *data, size_t len, const uint32_t *h)
|
|||
static uint32_t
|
||||
buzhash_update(uint32_t sum, unsigned char remove, unsigned char add, size_t len, const uint32_t *h)
|
||||
{
|
||||
uint32_t lenmod = len & 0x1f;
|
||||
uint32_t lenmod = len & 0x1f; /* Note: replace by constant to get small speedup */
|
||||
return BARREL_SHIFT(sum, 1) ^ BARREL_SHIFT(h[remove], lenmod) ^ h[add];
|
||||
}
|
||||
|
||||
|
|
@ -249,11 +249,12 @@ chunker_process(Chunker *c)
|
|||
PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch");
|
||||
return NULL;
|
||||
}
|
||||
while(c->remaining <= window_size && !c->eof) {
|
||||
while(c->remaining < min_size + window_size + 1 && !c->eof) { /* see assert in Chunker init */
|
||||
if(!chunker_fill(c)) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
/* here we either are at eof ... */
|
||||
if(c->eof) {
|
||||
c->done = 1;
|
||||
if(c->remaining) {
|
||||
|
|
@ -268,8 +269,15 @@ chunker_process(Chunker *c)
|
|||
return NULL;
|
||||
}
|
||||
}
|
||||
/* ... or we have at least min_size + window_size + 1 bytes remaining.
|
||||
* We do not want to "cut" a chunk smaller than min_size and the hash
|
||||
* window starts at the potential cutting place.
|
||||
*/
|
||||
c->position += min_size;
|
||||
c->remaining -= min_size;
|
||||
n += min_size;
|
||||
sum = buzhash(c->data + c->position, window_size, c->table);
|
||||
while(c->remaining > c->window_size && ((sum & chunk_mask) || n < min_size)) {
|
||||
while(c->remaining > c->window_size && (sum & chunk_mask)) {
|
||||
sum = buzhash_update(sum, c->data[c->position],
|
||||
c->data[c->position + window_size],
|
||||
window_size, c->table);
|
||||
|
|
|
|||
|
|
@ -23,6 +23,8 @@ cdef class Chunker:
|
|||
def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size):
|
||||
min_size = 1 << chunk_min_exp
|
||||
max_size = 1 << chunk_max_exp
|
||||
# see chunker_process, first while loop condition, first term must be able to get True:
|
||||
assert hash_window_size + min_size + 1 <= max_size, "too small max_size"
|
||||
hash_mask = (1 << hash_mask_bits) - 1
|
||||
self.chunker = chunker_init(hash_window_size, hash_mask, min_size, max_size, seed & 0xffffffff)
|
||||
|
||||
|
|
|
|||
|
|
@ -1515,9 +1515,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
|
|||
self.cmd('create', self.repository_location + '::test', 'input')
|
||||
archive_before = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')
|
||||
with patch.object(Cache, 'add_chunk', self._test_recreate_chunker_interrupt_patch()):
|
||||
self.cmd('recreate', '-pv', '--chunker-params', '10,12,11,4095', self.repository_location)
|
||||
self.cmd('recreate', '-pv', '--chunker-params', '10,13,11,4095', self.repository_location)
|
||||
assert 'test.recreate' in self.cmd('list', self.repository_location)
|
||||
output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '10,12,11,4095', self.repository_location)
|
||||
output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '10,13,11,4095', self.repository_location)
|
||||
assert 'Found test.recreate, will resume' in output
|
||||
assert 'Copied 1 chunks from a partially processed item' in output
|
||||
archive_after = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')
|
||||
|
|
|
|||
Loading…
Reference in a new issue