remove the repository.flags call / feature

this heavily depended on having a repository index where the flags get stored.

we don't have that with borgstore.
This commit is contained in:
Thomas Waldmann 2024-08-18 17:55:41 +02:00
parent 60edc8255f
commit 6605f588cf
No known key found for this signature in database
GPG key ID: 243ACFA951F78E01
8 changed files with 27 additions and 214 deletions

View file

@ -196,7 +196,7 @@ NSIndexEntry = namedtuple('NSIndexEntry', 'segment offset size')
cdef class NSIndex(IndexBase):
value_size = 16
value_size = 12
def __getitem__(self, key):
assert len(key) == self.key_size
@ -209,13 +209,13 @@ cdef class NSIndex(IndexBase):
def __setitem__(self, key, value):
assert len(key) == self.key_size
cdef uint32_t[4] data
cdef uint32_t[3] data
assert len(value) == len(data)
cdef uint32_t segment = value[0]
assert segment <= _MAX_VALUE, "maximum number of segments reached"
data[0] = _htole32(segment)
data[1] = _htole32(value[1])
data[2] = _htole32(value[2])
data[3] = 0 # init flags to all cleared
if not hashindex_set(self.index, <unsigned char *>key, data):
raise Exception('hashindex_set failed')
@ -228,12 +228,10 @@ cdef class NSIndex(IndexBase):
assert segment <= _MAX_VALUE, "maximum number of segments reached"
return data != NULL
def iteritems(self, marker=None, mask=0, value=0):
def iteritems(self, marker=None):
"""iterate over all items or optionally only over items having specific flag values"""
cdef const unsigned char *key
assert isinstance(mask, int)
assert isinstance(value, int)
iter = NSKeyIterator(self.key_size, mask, value)
iter = NSKeyIterator(self.key_size)
iter.idx = self
iter.index = self.index
if marker:
@ -243,20 +241,6 @@ cdef class NSIndex(IndexBase):
iter.key = key - self.key_size
return iter
def flags(self, key, mask=0xFFFFFFFF, value=None):
"""query and optionally set flags"""
assert len(key) == self.key_size
assert isinstance(mask, int)
data = <uint32_t *>hashindex_get(self.index, <unsigned char *>key)
if not data:
raise KeyError(key)
flags = _le32toh(data[3])
if isinstance(value, int):
new_flags = flags & ~mask # clear masked bits
new_flags |= value & mask # set value bits
data[3] = _htole32(new_flags)
return flags & mask # always return previous flags value
cdef class NSKeyIterator:
cdef NSIndex idx
@ -264,15 +248,10 @@ cdef class NSKeyIterator:
cdef const unsigned char *key
cdef int key_size
cdef int exhausted
cdef unsigned int flag_mask
cdef unsigned int flag_value
def __cinit__(self, key_size, mask, value):
def __cinit__(self, key_size):
self.key = NULL
self.key_size = key_size
# note: mask and value both default to 0, so they will match all entries
self.flag_mask = _htole32(mask)
self.flag_value = _htole32(value)
self.exhausted = 0
def __iter__(self):
@ -282,16 +261,11 @@ cdef class NSKeyIterator:
cdef uint32_t *value
if self.exhausted:
raise StopIteration
while True:
self.key = hashindex_next_key(self.index, <unsigned char *>self.key)
if not self.key:
self.exhausted = 1
raise StopIteration
value = <uint32_t *> (self.key + self.key_size)
if value[3] & self.flag_mask == self.flag_value:
# we found a matching entry!
break
self.key = hashindex_next_key(self.index, <unsigned char *>self.key)
if not self.key:
self.exhausted = 1
raise StopIteration
value = <uint32_t *> (self.key + self.key_size)
cdef uint32_t segment = _le32toh(value[0])
assert segment <= _MAX_VALUE, "maximum number of segments reached"
return ((<char *>self.key)[:self.key_size],
@ -331,9 +305,8 @@ cdef class NSIndex1(IndexBase): # legacy borg 1.x
assert segment <= _MAX_VALUE, "maximum number of segments reached"
return data != NULL
def iteritems(self, marker=None, mask=0, value=0):
def iteritems(self, marker=None):
cdef const unsigned char *key
assert mask == 0 and value == 0, "using mask/value is not supported for old index"
iter = NSKeyIterator1(self.key_size)
iter.idx = self
iter.index = self.index
@ -344,9 +317,6 @@ cdef class NSIndex1(IndexBase): # legacy borg 1.x
iter.key = key - self.key_size
return iter
def flags(self, key, mask=0xFFFFFFFF, value=None):
raise NotImplemented("calling .flags() is not supported for old index")
cdef class NSKeyIterator1: # legacy borg 1.x
cdef NSIndex1 idx

View file

@ -141,8 +141,6 @@ class RepositoryServer: # pragma: no cover
"commit",
"delete",
"destroy",
"flags",
"flags_many",
"get",
"list",
"negotiate",
@ -984,20 +982,8 @@ class RemoteRepository:
def __len__(self):
"""actual remoting is done via self.call in the @api decorator"""
@api(
since=parse_version("1.0.0"),
mask={"since": parse_version("2.0.0b2"), "previously": 0},
value={"since": parse_version("2.0.0b2"), "previously": 0},
)
def list(self, limit=None, marker=None, mask=0, value=0):
"""actual remoting is done via self.call in the @api decorator"""
@api(since=parse_version("2.0.0b2"))
def flags(self, id, mask=0xFFFFFFFF, value=None):
"""actual remoting is done via self.call in the @api decorator"""
@api(since=parse_version("2.0.0b2"))
def flags_many(self, ids, mask=0xFFFFFFFF, value=None):
@api(since=parse_version("1.0.0"))
def list(self, limit=None, marker=None):
"""actual remoting is done via self.call in the @api decorator"""
def get(self, id, read_data=True):

View file

@ -143,8 +143,6 @@ class RepositoryServer: # pragma: no cover
"commit",
"delete",
"destroy",
"flags",
"flags_many",
"get",
"list",
"negotiate",
@ -1021,12 +1019,8 @@ class RemoteRepository3:
def __len__(self):
"""actual remoting is done via self.call in the @api decorator"""
@api(
since=parse_version("1.0.0"),
mask={"since": parse_version("2.0.0b2"), "previously": 0},
value={"since": parse_version("2.0.0b2"), "previously": 0},
)
def list(self, limit=None, marker=None, mask=0, value=0):
@api(since=parse_version("1.0.0"))
def list(self, limit=None, marker=None):
"""actual remoting is done via self.call in the @api decorator"""
def get(self, id, read_data=True):

View file

@ -1207,31 +1207,13 @@ class Repository:
self.index = self.open_index(self.get_transaction_id())
return id in self.index
def list(self, limit=None, marker=None, mask=0, value=0):
def list(self, limit=None, marker=None):
"""
list <limit> IDs starting from after id <marker> - in index (pseudo-random) order.
if mask and value are given, only return IDs where flags & mask == value (default: all IDs).
"""
if not self.index:
self.index = self.open_index(self.get_transaction_id())
return [id_ for id_, _ in islice(self.index.iteritems(marker=marker, mask=mask, value=value), limit)]
def flags(self, id, mask=0xFFFFFFFF, value=None):
"""
query and optionally set flags
:param id: id (key) of object
:param mask: bitmask for flags (default: operate on all 32 bits)
:param value: value to set masked bits to (default: do not change any flags)
:return: (previous) flags value (only masked bits)
"""
if not self.index:
self.index = self.open_index(self.get_transaction_id())
return self.index.flags(id, mask, value)
def flags_many(self, ids, mask=0xFFFFFFFF, value=None):
return [self.flags(id_, mask, value) for id_ in ids]
return [id_ for id_, _ in islice(self.index.iteritems(marker=marker), limit)]
def get(self, id, read_data=True):
if not self.index:

View file

@ -288,11 +288,9 @@ class Repository3:
def __contains__(self, id):
raise NotImplementedError
def list(self, limit=None, marker=None, mask=0, value=0):
def list(self, limit=None, marker=None):
"""
list <limit> IDs starting from after id <marker> - in index (pseudo-random) order.
if mask and value are given, only return IDs where flags & mask == value (default: all IDs).
list <limit> IDs starting from after id <marker>.
"""
self._lock_refresh()
infos = self.store.list("data") # XXX we can only get the full list from the store

View file

@ -33,7 +33,7 @@ SELFTEST_CASES = [
ChunkerTestCase,
]
SELFTEST_COUNT = 32
SELFTEST_COUNT = 30
class SelfTestResult(TestResult):

View file

@ -86,7 +86,7 @@ class HashIndexTestCase(BaseTestCase):
def test_nsindex(self):
self._generic_test(
NSIndex, lambda x: (x, x, x), "0d7880dbe02b64f03c471e60e193a1333879b4f23105768b10c9222accfeac5e"
NSIndex, lambda x: (x, x, x), "640b909cf07884cc11fdf5431ffc27dee399770ceadecce31dffecd130a311a3"
)
def test_chunkindex(self):
@ -102,7 +102,7 @@ class HashIndexTestCase(BaseTestCase):
initial_size = os.path.getsize(filepath)
self.assert_equal(len(idx), 0)
for x in range(n):
idx[H(x)] = x, x, x, x
idx[H(x)] = x, x, x
idx.write(filepath)
assert initial_size < os.path.getsize(filepath)
for x in range(n):
@ -114,7 +114,7 @@ class HashIndexTestCase(BaseTestCase):
def test_iteritems(self):
idx = NSIndex()
for x in range(100):
idx[H(x)] = x, x, x, x
idx[H(x)] = x, x, x
iterator = idx.iteritems()
all = list(iterator)
self.assert_equal(len(all), 100)
@ -141,70 +141,6 @@ class HashIndexTestCase(BaseTestCase):
assert idx1[H(3)] == (3, 300)
assert idx1[H(4)] == (6, 400)
def test_flags(self):
idx = NSIndex()
key = H(0)
self.assert_raises(KeyError, idx.flags, key, 0)
idx[key] = 0, 0, 0 # create entry
# check bit 0 and 1, should be both 0 after entry creation
self.assert_equal(idx.flags(key, mask=3), 0)
# set bit 0
idx.flags(key, mask=1, value=1)
self.assert_equal(idx.flags(key, mask=1), 1)
# set bit 1
idx.flags(key, mask=2, value=2)
self.assert_equal(idx.flags(key, mask=2), 2)
# check both bit 0 and 1, both should be set
self.assert_equal(idx.flags(key, mask=3), 3)
# clear bit 1
idx.flags(key, mask=2, value=0)
self.assert_equal(idx.flags(key, mask=2), 0)
# clear bit 0
idx.flags(key, mask=1, value=0)
self.assert_equal(idx.flags(key, mask=1), 0)
# check both bit 0 and 1, both should be cleared
self.assert_equal(idx.flags(key, mask=3), 0)
def test_flags_iteritems(self):
idx = NSIndex()
keys_flagged0 = {H(i) for i in (1, 2, 3, 42)}
keys_flagged1 = {H(i) for i in (11, 12, 13, 142)}
keys_flagged2 = {H(i) for i in (21, 22, 23, 242)}
keys_flagged3 = {H(i) for i in (31, 32, 33, 342)}
for key in keys_flagged0:
idx[key] = 0, 0, 0 # create entry
idx.flags(key, mask=3, value=0) # not really necessary, unflagged is default
for key in keys_flagged1:
idx[key] = 0, 0, 0 # create entry
idx.flags(key, mask=3, value=1)
for key in keys_flagged2:
idx[key] = 0, 0, 0 # create entry
idx.flags(key, mask=3, value=2)
for key in keys_flagged3:
idx[key] = 0, 0, 0 # create entry
idx.flags(key, mask=3, value=3)
# check if we can iterate over all items
k_all = {k for k, v in idx.iteritems()}
self.assert_equal(k_all, keys_flagged0 | keys_flagged1 | keys_flagged2 | keys_flagged3)
# check if we can iterate over the flagged0 items
k0 = {k for k, v in idx.iteritems(mask=3, value=0)}
self.assert_equal(k0, keys_flagged0)
# check if we can iterate over the flagged1 items
k1 = {k for k, v in idx.iteritems(mask=3, value=1)}
self.assert_equal(k1, keys_flagged1)
# check if we can iterate over the flagged2 items
k1 = {k for k, v in idx.iteritems(mask=3, value=2)}
self.assert_equal(k1, keys_flagged2)
# check if we can iterate over the flagged3 items
k1 = {k for k, v in idx.iteritems(mask=3, value=3)}
self.assert_equal(k1, keys_flagged3)
# check if we can iterate over the flagged1 + flagged3 items
k1 = {k for k, v in idx.iteritems(mask=1, value=1)}
self.assert_equal(k1, keys_flagged1 | keys_flagged3)
# check if we can iterate over the flagged0 + flagged2 items
k1 = {k for k, v in idx.iteritems(mask=1, value=0)}
self.assert_equal(k1, keys_flagged0 | keys_flagged2)
class HashIndexExtraTestCase(BaseTestCase):
"""These tests are separate because they should not become part of the selftest."""
@ -553,9 +489,9 @@ class NSIndexTestCase(BaseTestCase):
def test_nsindex_segment_limit(self):
idx = NSIndex()
with self.assert_raises(AssertionError):
idx[H(1)] = NSIndex.MAX_VALUE + 1, 0, 0, 0
idx[H(1)] = NSIndex.MAX_VALUE + 1, 0, 0
assert H(1) not in idx
idx[H(2)] = NSIndex.MAX_VALUE, 0, 0, 0
idx[H(2)] = NSIndex.MAX_VALUE, 0, 0
assert H(2) in idx
@ -583,7 +519,7 @@ class IndexCorruptionTestCase(BaseTestCase):
for y in range(700): # stay below max load not to trigger resize
idx[HH(0, y, 0)] = (0, y, 0)
assert idx.size() == 1024 + 1031 * 48 # header + 1031 buckets
assert idx.size() == 1024 + 1031 * 44 # header + 1031 buckets
# delete lots of the collisions, creating lots of tombstones
for y in range(400): # stay above min load not to trigger resize

View file

@ -231,59 +231,6 @@ def test_max_data_size(repo_fixtures, request):
repository.put(H(1), fchunk(max_data + b"x"))
def test_set_flags(repo_fixtures, request):
with get_repository_from_fixture(repo_fixtures, request) as repository:
id = H(0)
repository.put(id, fchunk(b""))
assert repository.flags(id) == 0x00000000 # init == all zero
repository.flags(id, mask=0x00000001, value=0x00000001)
assert repository.flags(id) == 0x00000001
repository.flags(id, mask=0x00000002, value=0x00000002)
assert repository.flags(id) == 0x00000003
repository.flags(id, mask=0x00000001, value=0x00000000)
assert repository.flags(id) == 0x00000002
repository.flags(id, mask=0x00000002, value=0x00000000)
assert repository.flags(id) == 0x00000000
def test_get_flags(repo_fixtures, request):
with get_repository_from_fixture(repo_fixtures, request) as repository:
id = H(0)
repository.put(id, fchunk(b""))
assert repository.flags(id) == 0x00000000 # init == all zero
repository.flags(id, mask=0xC0000003, value=0x80000001)
assert repository.flags(id, mask=0x00000001) == 0x00000001
assert repository.flags(id, mask=0x00000002) == 0x00000000
assert repository.flags(id, mask=0x40000008) == 0x00000000
assert repository.flags(id, mask=0x80000000) == 0x80000000
def test_flags_many(repo_fixtures, request):
with get_repository_from_fixture(repo_fixtures, request) as repository:
ids_flagged = [H(0), H(1)]
ids_default_flags = [H(2), H(3)]
[repository.put(id, fchunk(b"")) for id in ids_flagged + ids_default_flags]
repository.flags_many(ids_flagged, mask=0xFFFFFFFF, value=0xDEADBEEF)
assert list(repository.flags_many(ids_default_flags)) == [0x00000000, 0x00000000]
assert list(repository.flags_many(ids_flagged)) == [0xDEADBEEF, 0xDEADBEEF]
assert list(repository.flags_many(ids_flagged, mask=0xFFFF0000)) == [0xDEAD0000, 0xDEAD0000]
assert list(repository.flags_many(ids_flagged, mask=0x0000FFFF)) == [0x0000BEEF, 0x0000BEEF]
def test_flags_persistence(repo_fixtures, request):
with get_repository_from_fixture(repo_fixtures, request) as repository:
repository.put(H(0), fchunk(b"default"))
repository.put(H(1), fchunk(b"one one zero"))
# we do not set flags for H(0), so we can later check their default state.
repository.flags(H(1), mask=0x00000007, value=0x00000006)
repository.commit(compact=False)
with reopen(repository) as repository:
# we query all flags to check if the initial flags were all zero and
# only the ones we explicitly set to one are as expected.
assert repository.flags(H(0), mask=0xFFFFFFFF) == 0x00000000
assert repository.flags(H(1), mask=0xFFFFFFFF) == 0x00000006
def _assert_sparse(repository):
# the superseded 123456... PUT
assert repository.compact[0] == 41 + 8 + len(fchunk(b"123456789"))