From 2e6af653f6fa05c0706412fa6ac518a94081eb20 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 18 Feb 2026 22:40:22 +0100 Subject: [PATCH 1/3] hashindex: fixed iteritems segfaulting with non-existent marker, fixes #9368 Never happened in borg, because borg always gives existing markers to iteritems. Also: added test for this. --- src/borg/hashindex.pyx | 8 +++---- src/borg/testsuite/hashindex_pytest.py | 30 ++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 src/borg/testsuite/hashindex_pytest.py diff --git a/src/borg/hashindex.pyx b/src/borg/hashindex.pyx index 916df65ed..44f1fcbe1 100644 --- a/src/borg/hashindex.pyx +++ b/src/borg/hashindex.pyx @@ -237,8 +237,8 @@ cdef class NSIndex(IndexBase): iter.index = self.index if marker: key = hashindex_get(self.index, marker) - if marker is None: - raise IndexError + if not key: + raise KeyError("marker not found") iter.key = key - self.key_size return iter @@ -354,8 +354,8 @@ cdef class ChunkIndex(IndexBase): iter.index = self.index if marker: key = hashindex_get(self.index, marker) - if marker is None: - raise IndexError + if not key: + raise KeyError("marker not found") iter.key = key - self.key_size return iter diff --git a/src/borg/testsuite/hashindex_pytest.py b/src/borg/testsuite/hashindex_pytest.py new file mode 100644 index 000000000..4a82775c7 --- /dev/null +++ b/src/borg/testsuite/hashindex_pytest.py @@ -0,0 +1,30 @@ +import pytest +from borg.hashindex import NSIndex, ChunkIndex + +def test_nsindex_iteritems_marker(): + nsindex = NSIndex() + nsindex[b'\xbb'*32] = (123, 456) + nsindex[b'\xaa'*32] = (234, 567) + + # marker exists + items = list(nsindex.iteritems(marker=b'\xbb'*32)) + assert len(items) == 1 + assert items[0][0] == b'\xaa'*32 + + # marker does not exist + with pytest.raises(KeyError, match="marker not found"): + list(nsindex.iteritems(marker=b'\xcc'*32)) + +def test_chunkindex_iteritems_marker(): + chunkindex = ChunkIndex() + chunkindex[b'\xbb'*32] = (1, 100, 50) + chunkindex[b'\xaa'*32] = (1, 200, 100) + + # marker exists + items = list(chunkindex.iteritems(marker=b'\xbb'*32)) + assert len(items) == 1 + assert items[0][0] == b'\xaa'*32 + + # marker does not exist + with pytest.raises(KeyError, match="marker not found"): + list(chunkindex.iteritems(marker=b'\xcc'*32)) From eb80e53fc2f1094d193976eac3208ee8dd110408 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 18 Feb 2026 22:52:01 +0100 Subject: [PATCH 2/3] hashindex: always check return value of hashindex_get in the same way --- src/borg/hashindex.pyx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/borg/hashindex.pyx b/src/borg/hashindex.pyx index 44f1fcbe1..9ce87abcd 100644 --- a/src/borg/hashindex.pyx +++ b/src/borg/hashindex.pyx @@ -205,7 +205,7 @@ cdef class NSIndex(IndexBase): def __getitem__(self, key): assert len(key) == self.key_size data = hashindex_get(self.index, key) - if not data: + if data == NULL: raise KeyError(key) cdef uint32_t segment = _le32toh(data[0]) assert segment <= _MAX_VALUE, "maximum number of segments reached" @@ -237,7 +237,7 @@ cdef class NSIndex(IndexBase): iter.index = self.index if marker: key = hashindex_get(self.index, marker) - if not key: + if key == NULL: raise KeyError("marker not found") iter.key = key - self.key_size return iter @@ -296,7 +296,7 @@ cdef class ChunkIndex(IndexBase): def __getitem__(self, key): assert len(key) == self.key_size data = hashindex_get(self.index, key) - if not data: + if data == NULL: raise KeyError(key) cdef uint32_t refcount = _le32toh(data[0]) assert refcount <= _MAX_VALUE, "invalid reference count" @@ -324,7 +324,7 @@ cdef class ChunkIndex(IndexBase): """Increase refcount for 'key', return (refcount, size, csize).""" assert len(key) == self.key_size data = hashindex_get(self.index, key) - if not data: + if data == NULL: raise KeyError(key) cdef uint32_t refcount = _le32toh(data[0]) assert refcount <= _MAX_VALUE, "invalid reference count" @@ -337,7 +337,7 @@ cdef class ChunkIndex(IndexBase): """Decrease refcount for 'key', return (refcount, size, csize).""" assert len(key) == self.key_size data = hashindex_get(self.index, key) - if not data: + if data == NULL: raise KeyError(key) cdef uint32_t refcount = _le32toh(data[0]) # Never decrease a reference count of zero @@ -354,7 +354,7 @@ cdef class ChunkIndex(IndexBase): iter.index = self.index if marker: key = hashindex_get(self.index, marker) - if not key: + if key == NULL: raise KeyError("marker not found") iter.key = key - self.key_size return iter @@ -406,7 +406,7 @@ cdef class ChunkIndex(IndexBase): break our_values = (key + self.key_size) master_values = hashindex_get(master, key) - if not master_values: + if master_values == NULL: raise ValueError('stats_against: key contained in self but not in master_index.') our_refcount = _le32toh(our_values[0]) chunk_size = _le32toh(master_values[1]) @@ -434,7 +434,7 @@ cdef class ChunkIndex(IndexBase): cdef _add(self, unsigned char *key, uint32_t *data): cdef uint64_t refcount1, refcount2, result64 values = hashindex_get(self.index, key) - if values: + if values != NULL: refcount1 = _le32toh(values[0]) refcount2 = _le32toh(data[0]) assert refcount1 <= _MAX_VALUE, "invalid reference count" From c5a7a3cda67886e909f6cd10059097842c322d28 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 18 Feb 2026 22:59:30 +0100 Subject: [PATCH 3/3] hashindex: moved the tests from hashindex_stress to hashindex_pytest --- src/borg/testsuite/hashindex_pytest.py | 37 ++++++++++++++++++++++++++ src/borg/testsuite/hashindex_stress.py | 37 -------------------------- 2 files changed, 37 insertions(+), 37 deletions(-) delete mode 100644 src/borg/testsuite/hashindex_stress.py diff --git a/src/borg/testsuite/hashindex_pytest.py b/src/borg/testsuite/hashindex_pytest.py index 4a82775c7..e81d3242c 100644 --- a/src/borg/testsuite/hashindex_pytest.py +++ b/src/borg/testsuite/hashindex_pytest.py @@ -1,6 +1,11 @@ +import os +import random + import pytest + from borg.hashindex import NSIndex, ChunkIndex + def test_nsindex_iteritems_marker(): nsindex = NSIndex() nsindex[b'\xbb'*32] = (123, 456) @@ -15,6 +20,7 @@ def test_nsindex_iteritems_marker(): with pytest.raises(KeyError, match="marker not found"): list(nsindex.iteritems(marker=b'\xcc'*32)) + def test_chunkindex_iteritems_marker(): chunkindex = ChunkIndex() chunkindex[b'\xbb'*32] = (1, 100, 50) @@ -28,3 +34,34 @@ def test_chunkindex_iteritems_marker(): # marker does not exist with pytest.raises(KeyError, match="marker not found"): list(chunkindex.iteritems(marker=b'\xcc'*32)) + + +@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1") +def test_hashindex_stress(): + """Check if the hash table behaves as expected + + This can be used in _hashindex.c before running this test to provoke more collisions (don't forget to compile): + #define HASH_MAX_LOAD .99 + #define HASH_MAX_EFF_LOAD .999 + """ + ENTRIES = 10000 + LOOPS = 1000 + idx = NSIndex() + kv = {} + for i in range(LOOPS): + # Put some entries + for j in range(ENTRIES): + k = random.randbytes(32) + v = random.randint(0, NSIndex.MAX_VALUE - 1) + idx[k] = (v, v) + kv[k] = v + # Check and delete a random number of entries + delete_keys = random.sample(list(kv), k=random.randint(0, len(kv))) + for k in delete_keys: + v = kv.pop(k) + assert idx.pop(k) == (v, v) + # Check whether the remaining entries are as expected + for k, v in kv.items(): + assert idx[k] == (v, v) + # Check entry count + assert len(kv) == len(idx) diff --git a/src/borg/testsuite/hashindex_stress.py b/src/borg/testsuite/hashindex_stress.py deleted file mode 100644 index b0a819863..000000000 --- a/src/borg/testsuite/hashindex_stress.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -import random - -import pytest - -from ..hashindex import NSIndex - - -@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1") -def test_hashindex_stress(): - """Check if the hash table behaves as expected - - This can be used in _hashindex.c before running this test to provoke more collisions (don't forget to compile): - #define HASH_MAX_LOAD .99 - #define HASH_MAX_EFF_LOAD .999 - """ - ENTRIES = 10000 - LOOPS = 1000 - idx = NSIndex() - kv = {} - for i in range(LOOPS): - # Put some entries - for j in range(ENTRIES): - k = random.randbytes(32) - v = random.randint(0, NSIndex.MAX_VALUE - 1) - idx[k] = (v, v) - kv[k] = v - # Check and delete a random number of entries - delete_keys = random.sample(list(kv), k=random.randint(0, len(kv))) - for k in delete_keys: - v = kv.pop(k) - assert idx.pop(k) == (v, v) - # Check whether the remaining entries are as expected - for k, v in kv.items(): - assert idx[k] == (v, v) - # Check entry count - assert len(kv) == len(idx)