diff --git a/borg/_hashindex.c b/borg/_hashindex.c index 2eebd09d9..128ab5b2f 100644 --- a/borg/_hashindex.c +++ b/borg/_hashindex.c @@ -385,3 +385,22 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs *total_unique_chunks = unique_chunks; *total_chunks = chunks; } + +static void +hashindex_merge(HashIndex *index, HashIndex *other) +{ + int32_t key_size = index->key_size; + const int32_t *other_values; + int32_t *my_values; + void *key = NULL; + + while((key = hashindex_next_key(other, key))) { + other_values = key + key_size; + my_values = hashindex_get(index, key); + if(my_values == NULL) { + hashindex_set(index, key, other_values); + } else { + *my_values += *other_values; + } + } +} diff --git a/borg/cache.py b/borg/cache.py index d64cdfb14..f50b456eb 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -309,8 +309,7 @@ class Cache: tf_in.extract(archive_id_hex, tmp_dir) chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8') archive_chunk_idx = ChunkIndex.read(chunk_idx_path) - for chunk_id, (count, size, csize) in archive_chunk_idx.iteritems(): - add(chunk_idx, chunk_id, size, csize, incr=count) + chunk_idx.merge(archive_chunk_idx) os.unlink(chunk_idx_path) self.begin_txn() diff --git a/borg/hashindex.pyx b/borg/hashindex.pyx index c44fe3947..83416bcdf 100644 --- a/borg/hashindex.pyx +++ b/borg/hashindex.pyx @@ -14,6 +14,7 @@ cdef extern from "_hashindex.c": void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize, long long *total_unique_chunks, long long *total_chunks) + void hashindex_merge(HashIndex *index, HashIndex *other) int hashindex_get_size(HashIndex *index) int hashindex_write(HashIndex *index, char *path) void *hashindex_get(HashIndex *index, void *key) @@ -190,6 +191,9 @@ cdef class ChunkIndex(IndexBase): &total_unique_chunks, &total_chunks) return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks + def merge(self, ChunkIndex other): + hashindex_merge(self.index, other.index) + cdef class ChunkKeyIterator: cdef ChunkIndex idx diff --git a/borg/testsuite/hashindex.py b/borg/testsuite/hashindex.py index 41c019d61..bbefeb05e 100644 --- a/borg/testsuite/hashindex.py +++ b/borg/testsuite/hashindex.py @@ -6,6 +6,11 @@ from ..hashindex import NSIndex, ChunkIndex from . import BaseTestCase +def H(x): + # make some 32byte long thing that depends on x + return bytes('%-0.32d' % x, 'ascii') + + class HashIndexTestCase(BaseTestCase): def _generic_test(self, cls, make_value, sha): @@ -78,3 +83,20 @@ class HashIndexTestCase(BaseTestCase): second_half = list(idx.iteritems(marker=all[49][0])) self.assert_equal(len(second_half), 50) self.assert_equal(second_half, all[50:]) + + def test_chunkindex_merge(self): + idx1 = ChunkIndex() + idx1[H(1)] = 1, 100, 100 + idx1[H(2)] = 2, 200, 200 + idx1[H(3)] = 3, 300, 300 + # no H(4) entry + idx2 = ChunkIndex() + idx2[H(1)] = 4, 100, 100 + idx2[H(2)] = 5, 200, 200 + # no H(3) entry + idx2[H(4)] = 6, 400, 400 + idx1.merge(idx2) + assert idx1[H(1)] == (5, 100, 100) + assert idx1[H(2)] == (7, 200, 200) + assert idx1[H(3)] == (3, 300, 300) + assert idx1[H(4)] == (6, 400, 400)