mirror of
https://github.com/borgbackup/borg.git
synced 2026-04-07 10:08:57 -04:00
BUCKET_UPPER_LIMIT: 90% load degrades hash table performance severely, so I lowered that to 75% (which is a usual value - java uses 75%, python uses 66%). I chose the higher value of both because we also should not consume too much memory, considering the RAM usage already is rather high. MIN_BUCKETS: I can't explain why, but benchmarks showed that choosing 2^N as table size severely degrades performance (by 3 orders of magnitude!). So a prime start value improves this a lot, even if we later still use the grow-by-2x algorithm. hashindex_resize: removed the hashindex_get() call as we already know that the values come at key + key_size address. hashindex_init: do not calloc X*Y elements of size 1, but rather X elements of size Y. Makes the code simpler, not sure if it affects performance. The tests needed fixing as the resulting hashtable blob is now of course different due to the above changes, so its sha hash changed.
102 lines
3.6 KiB
Python
102 lines
3.6 KiB
Python
import hashlib
|
|
import os
|
|
import tempfile
|
|
|
|
from ..hashindex import NSIndex, ChunkIndex
|
|
from . import BaseTestCase
|
|
|
|
|
|
def H(x):
|
|
# make some 32byte long thing that depends on x
|
|
return bytes('%-0.32d' % x, 'ascii')
|
|
|
|
|
|
class HashIndexTestCase(BaseTestCase):
|
|
|
|
def _generic_test(self, cls, make_value, sha):
|
|
idx = cls()
|
|
self.assert_equal(len(idx), 0)
|
|
# Test set
|
|
for x in range(100):
|
|
idx[bytes('%-32d' % x, 'ascii')] = make_value(x)
|
|
self.assert_equal(len(idx), 100)
|
|
for x in range(100):
|
|
self.assert_equal(idx[bytes('%-32d' % x, 'ascii')], make_value(x))
|
|
# Test update
|
|
for x in range(100):
|
|
idx[bytes('%-32d' % x, 'ascii')] = make_value(x * 2)
|
|
self.assert_equal(len(idx), 100)
|
|
for x in range(100):
|
|
self.assert_equal(idx[bytes('%-32d' % x, 'ascii')], make_value(x * 2))
|
|
# Test delete
|
|
for x in range(50):
|
|
del idx[bytes('%-32d' % x, 'ascii')]
|
|
self.assert_equal(len(idx), 50)
|
|
idx_name = tempfile.NamedTemporaryFile()
|
|
idx.write(idx_name.name)
|
|
del idx
|
|
# Verify file contents
|
|
with open(idx_name.name, 'rb') as fd:
|
|
self.assert_equal(hashlib.sha256(fd.read()).hexdigest(), sha)
|
|
# Make sure we can open the file
|
|
idx = cls.read(idx_name.name)
|
|
self.assert_equal(len(idx), 50)
|
|
for x in range(50, 100):
|
|
self.assert_equal(idx[bytes('%-32d' % x, 'ascii')], make_value(x * 2))
|
|
idx.clear()
|
|
self.assert_equal(len(idx), 0)
|
|
idx.write(idx_name.name)
|
|
del idx
|
|
self.assert_equal(len(cls.read(idx_name.name)), 0)
|
|
|
|
def test_nsindex(self):
|
|
self._generic_test(NSIndex, lambda x: (x, x),
|
|
'80fba5b40f8cf12f1486f1ba33c9d852fb2b41a5b5961d3b9d1228cf2aa9c4c9')
|
|
|
|
def test_chunkindex(self):
|
|
self._generic_test(ChunkIndex, lambda x: (x, x, x),
|
|
'1d71865e72e3c3af18d3c7216b6fa7b014695eaa3ed7f14cf9cd02fba75d1c95')
|
|
|
|
def test_resize(self):
|
|
n = 2000 # Must be >= MIN_BUCKETS
|
|
idx_name = tempfile.NamedTemporaryFile()
|
|
idx = NSIndex()
|
|
idx.write(idx_name.name)
|
|
initial_size = os.path.getsize(idx_name.name)
|
|
self.assert_equal(len(idx), 0)
|
|
for x in range(n):
|
|
idx[bytes('%-32d' % x, 'ascii')] = x, x
|
|
idx.write(idx_name.name)
|
|
self.assert_true(initial_size < os.path.getsize(idx_name.name))
|
|
for x in range(n):
|
|
del idx[bytes('%-32d' % x, 'ascii')]
|
|
self.assert_equal(len(idx), 0)
|
|
idx.write(idx_name.name)
|
|
self.assert_equal(initial_size, os.path.getsize(idx_name.name))
|
|
|
|
def test_iteritems(self):
|
|
idx = NSIndex()
|
|
for x in range(100):
|
|
idx[bytes('%-0.32d' % x, 'ascii')] = x, x
|
|
all = list(idx.iteritems())
|
|
self.assert_equal(len(all), 100)
|
|
second_half = list(idx.iteritems(marker=all[49][0]))
|
|
self.assert_equal(len(second_half), 50)
|
|
self.assert_equal(second_half, all[50:])
|
|
|
|
def test_chunkindex_merge(self):
|
|
idx1 = ChunkIndex()
|
|
idx1[H(1)] = 1, 100, 100
|
|
idx1[H(2)] = 2, 200, 200
|
|
idx1[H(3)] = 3, 300, 300
|
|
# no H(4) entry
|
|
idx2 = ChunkIndex()
|
|
idx2[H(1)] = 4, 100, 100
|
|
idx2[H(2)] = 5, 200, 200
|
|
# no H(3) entry
|
|
idx2[H(4)] = 6, 400, 400
|
|
idx1.merge(idx2)
|
|
assert idx1[H(1)] == (5, 100, 100)
|
|
assert idx1[H(2)] == (7, 200, 200)
|
|
assert idx1[H(3)] == (3, 300, 300)
|
|
assert idx1[H(4)] == (6, 400, 400)
|