diff --git a/lib/isc/Makefile.am b/lib/isc/Makefile.am index 4ff086b420..2d8976a363 100644 --- a/lib/isc/Makefile.am +++ b/lib/isc/Makefile.am @@ -37,6 +37,7 @@ libisc_la_HEADERS = \ include/isc/fuzz.h \ include/isc/glob.h \ include/isc/hash.h \ + include/isc/hashmap.h \ include/isc/heap.h \ include/isc/hex.h \ include/isc/hmac.h \ @@ -145,6 +146,7 @@ libisc_la_SOURCES = \ fsaccess_common_p.h \ glob.c \ hash.c \ + hashmap.c \ heap.c \ hex.c \ hmac.c \ diff --git a/lib/isc/hashmap.c b/lib/isc/hashmap.c new file mode 100644 index 0000000000..3526774cad --- /dev/null +++ b/lib/isc/hashmap.c @@ -0,0 +1,749 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +/* + * This is an implementation of the Robin Hood hash table algorithm as + * described in [a] with simple linear searching, and backwards shift + * deletion algorithm as described in [b] and [c]. + * + * Further work: + * 1. Implement 4.1 Speeding up Searches - 4.4 Smart Search [a] + * 2. Implement A Fast Concurrent and Resizable Robin Hood Hash Table [b] + * + * a. https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf paper. + * b. https://dspace.mit.edu/bitstream/handle/1721.1/130693/1251799942-MIT.pdf + * c. + * https://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/ + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define APPROX_99_PERCENT(x) (((x)*1013) >> 10) +#define APPROX_95_PERCENT(x) (((x)*972) >> 10) +#define APPROX_90_PERCENT(x) (((x)*921) >> 10) +#define APPROX_85_PERCENT(x) (((x)*870) >> 10) +#define APPROX_40_PERCENT(x) (((x)*409) >> 10) +#define APPROX_35_PERCENT(x) (((x)*359) >> 10) +#define APPROX_30_PERCENT(x) (((x)*308) >> 10) +#define APPROX_25_PERCENT(x) (((x)*256) >> 10) +#define APPROX_20_PERCENT(x) (((x)*205) >> 10) +#define APPROX_15_PERCENT(x) (((x)*154) >> 10) +#define APPROX_10_PERCENT(x) (((x)*103) >> 10) +#define APPROX_05_PERCENT(x) (((x)*52) >> 10) +#define APPROX_01_PERCENT(x) (((x)*11) >> 10) + +#define ISC_HASHMAP_MAGIC ISC_MAGIC('H', 'M', 'a', 'p') +#define ISC_HASHMAP_VALID(hashmap) ISC_MAGIC_VALID(hashmap, ISC_HASHMAP_MAGIC) + +/* We have two tables for incremental rehashing */ +#define HASHMAP_NUM_TABLES 2 + +#define HASHSIZE(bits) (UINT64_C(1) << (bits)) + +#define HASHMAP_NO_BITS 0U +#define HASHMAP_MIN_BITS 1U +#define HASHMAP_MAX_BITS 32U + +typedef struct hashmap_node { + const uint8_t *key; + void *value; + uint32_t hashval; + uint32_t psl; + uint16_t keysize; +} hashmap_node_t; + +typedef struct hashmap_table { + size_t size; + uint8_t hashbits; + uint32_t hashmask; + hashmap_node_t *table; +} hashmap_table_t; + +struct isc_hashmap { + unsigned int magic; + bool case_sensitive; + uint8_t hindex; + uint32_t hiter; /* rehashing iterator */ + isc_mem_t *mctx; + size_t count; + uint8_t hash_key[16]; + hashmap_table_t tables[HASHMAP_NUM_TABLES]; +}; + +struct isc_hashmap_iter { + isc_hashmap_t *hashmap; + size_t i; + uint8_t hindex; + hashmap_node_t *cur; +}; + +static isc_result_t +hashmap_add(isc_hashmap_t *hashmap, const uint32_t hashval, const uint8_t *key, + const uint32_t keysize, void *value, uint8_t idx); + +static void +hashmap_rehash_one(isc_hashmap_t *hashmap); +static void +hashmap_rehash_start_grow(isc_hashmap_t *hashmap); +static void +hashmap_rehash_start_shrink(isc_hashmap_t *hashmap); +static bool +over_threshold(isc_hashmap_t *hashmap); +static bool +under_threshold(isc_hashmap_t *hashmap); + +static uint8_t +hashmap_nexttable(uint8_t idx) { + return ((idx == 0) ? 1 : 0); +} + +static bool +rehashing_in_progress(const isc_hashmap_t *hashmap) { + return (hashmap->tables[hashmap_nexttable(hashmap->hindex)].table != + NULL); +} + +static bool +try_nexttable(const isc_hashmap_t *hashmap, uint8_t idx) { + return (idx == hashmap->hindex && rehashing_in_progress(hashmap)); +} + +static void +hashmap_node_init(hashmap_node_t *node, const uint32_t hashval, + const uint8_t *key, const uint32_t keysize, void *value) { + REQUIRE(key != NULL && keysize > 0 && keysize <= UINT16_MAX); + + *node = (hashmap_node_t){ + .value = value, + .hashval = hashval, + .key = key, + .keysize = keysize, + .psl = 0, + }; +} + +static void __attribute__((__unused__)) +hashmap_dump_table(const isc_hashmap_t *hashmap, const uint8_t idx) { + fprintf(stderr, + "====== %" PRIu8 " (bits = %" PRIu8 ", size = %zu =====\n", idx, + hashmap->tables[idx].hashbits, hashmap->tables[idx].size); + for (size_t i = 0; i < hashmap->tables[idx].size; i++) { + hashmap_node_t *node = &hashmap->tables[idx].table[i]; + if (node->key != NULL) { + uint32_t hash = isc_hash_bits32( + node->hashval, hashmap->tables[idx].hashbits); + fprintf(stderr, + "%p: %zu -> %p" + ", value = %p" + ", hash = %" PRIu32 ", hashval = %" PRIu32 + ", psl = %" PRIu32 ", key = %s\n", + hashmap, i, node, node->value, hash, + node->hashval, node->psl, (char *)node->key); + } + } + fprintf(stderr, "================\n\n"); +} + +static void +hashmap_create_table(isc_hashmap_t *hashmap, const uint8_t idx, + const uint8_t bits) { + size_t size; + + REQUIRE(hashmap->tables[idx].hashbits == HASHMAP_NO_BITS); + REQUIRE(hashmap->tables[idx].table == NULL); + REQUIRE(bits >= HASHMAP_MIN_BITS); + REQUIRE(bits <= HASHMAP_MAX_BITS); + + hashmap->tables[idx] = (hashmap_table_t){ + .hashbits = bits, + .hashmask = HASHSIZE(bits) - 1, + .size = HASHSIZE(bits), + }; + + size = hashmap->tables[idx].size * + sizeof(hashmap->tables[idx].table[0]); + + hashmap->tables[idx].table = isc_mem_getx(hashmap->mctx, size, + ISC_MEM_ZERO); +} + +static void +hashmap_free_table(isc_hashmap_t *hashmap, const uint8_t idx, bool cleanup) { + size_t size; + + if (cleanup) { + for (size_t i = 0; i < hashmap->tables[idx].size; i++) { + hashmap_node_t *node = &hashmap->tables[idx].table[i]; + if (node->key != NULL) { + *node = (hashmap_node_t){ 0 }; + hashmap->count--; + } + } + } + + size = hashmap->tables[idx].size * + sizeof(hashmap->tables[idx].table[0]); + isc_mem_put(hashmap->mctx, hashmap->tables[idx].table, size); + + hashmap->tables[idx] = (hashmap_table_t){ + .hashbits = HASHMAP_NO_BITS, + }; +} + +void +isc_hashmap_create(isc_mem_t *mctx, uint8_t bits, unsigned int options, + isc_hashmap_t **hashmapp) { + isc_hashmap_t *hashmap = isc_mem_get(mctx, sizeof(*hashmap)); + bool case_sensitive = ((options & ISC_HASHMAP_CASE_INSENSITIVE) == 0); + + REQUIRE(hashmapp != NULL && *hashmapp == NULL); + REQUIRE(mctx != NULL); + REQUIRE(bits >= HASHMAP_MIN_BITS && bits <= HASHMAP_MAX_BITS); + + *hashmap = (isc_hashmap_t){ + .magic = ISC_HASHMAP_MAGIC, + .hash_key = { 0, 1 }, + .case_sensitive = case_sensitive, + }; + isc_mem_attach(mctx, &hashmap->mctx); + +#if !defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && !defined(UNIT_TESTING) + isc_entropy_get(hashmap->hash_key, sizeof(hashmap->hash_key)); +#endif /* if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION */ + + hashmap_create_table(hashmap, 0, bits); + + hashmap->magic = ISC_HASHMAP_MAGIC; + + *hashmapp = hashmap; +} + +void +isc_hashmap_destroy(isc_hashmap_t **hashmapp) { + isc_hashmap_t *hashmap; + + REQUIRE(hashmapp != NULL && *hashmapp != NULL); + REQUIRE(ISC_HASHMAP_VALID(*hashmapp)); + + hashmap = *hashmapp; + *hashmapp = NULL; + + hashmap->magic = 0; + + for (size_t i = 0; i < HASHMAP_NUM_TABLES; i++) { + if (hashmap->tables[i].table != NULL) { + hashmap_free_table(hashmap, i, true); + } + } + INSIST(hashmap->count == 0); + + isc_mem_putanddetach(&hashmap->mctx, hashmap, sizeof(*hashmap)); +} + +static bool +hashmap_match(hashmap_node_t *node, const uint32_t hashval, const uint8_t *key, + const uint32_t keysize, const bool case_sensitive) { + return (node->hashval == hashval && node->keysize == keysize && + (case_sensitive + ? (memcmp(node->key, key, keysize) == 0) + : (isc_ascii_lowerequal(node->key, key, keysize)))); +} + +static hashmap_node_t * +hashmap_find(const isc_hashmap_t *hashmap, const uint32_t hashval, + const uint8_t *key, uint32_t keysize, uint32_t *pslp, + uint8_t *idxp) { + uint32_t hash; + uint32_t psl; + uint8_t idx = *idxp; + uint32_t pos; + +nexttable: + psl = 0; + hash = isc_hash_bits32(hashval, hashmap->tables[idx].hashbits); + + while (true) { + hashmap_node_t *node = NULL; + + pos = (hash + psl) & hashmap->tables[idx].hashmask; + + node = &hashmap->tables[idx].table[pos]; + + if (node->key == NULL || psl > node->psl) { + break; + } + + if (hashmap_match(node, hashval, key, keysize, + hashmap->case_sensitive)) { + *pslp = psl; + *idxp = idx; + return (node); + } + + psl++; + } + if (try_nexttable(hashmap, idx)) { + idx = hashmap_nexttable(idx); + goto nexttable; + } + + return (NULL); +} + +uint32_t +isc_hashmap_hash(const isc_hashmap_t *hashmap, const void *key, + uint32_t keysize) { + REQUIRE(ISC_HASHMAP_VALID(hashmap)); + REQUIRE(key != NULL && keysize > 0 && keysize <= UINT16_MAX); + + uint32_t hashval; + + isc_halfsiphash24(hashmap->hash_key, key, keysize, + hashmap->case_sensitive, (uint8_t *)&hashval); + + return (hashval); +} + +isc_result_t +isc_hashmap_find(const isc_hashmap_t *hashmap, const uint32_t *hashvalp, + const void *key, uint32_t keysize, void **valuep) { + REQUIRE(ISC_HASHMAP_VALID(hashmap)); + REQUIRE(key != NULL && keysize > 0 && keysize <= UINT16_MAX); + + hashmap_node_t *node; + uint8_t idx = hashmap->hindex; + uint32_t hashval = (hashvalp != NULL) + ? *hashvalp + : isc_hashmap_hash(hashmap, key, keysize); + + node = hashmap_find(hashmap, hashval, key, keysize, &(uint32_t){ 0 }, + &idx); + if (node == NULL) { + return (ISC_R_NOTFOUND); + } + + INSIST(node->key != NULL); + if (valuep != NULL) { + *valuep = node->value; + } + return (ISC_R_SUCCESS); +} + +static void +hashmap_delete_node(isc_hashmap_t *hashmap, hashmap_node_t *entry, + uint32_t hashval, uint32_t psl, const uint8_t idx) { + uint32_t pos; + uint32_t hash; + + hashmap->count--; + + hash = isc_hash_bits32(hashval, hashmap->tables[idx].hashbits); + pos = hash + psl; + + while (true) { + hashmap_node_t *node = NULL; + + pos = (pos + 1) & hashmap->tables[idx].hashmask; + INSIST(pos < hashmap->tables[idx].size); + + node = &hashmap->tables[idx].table[pos]; + + if (node->key == NULL || node->psl == 0) { + break; + } + + node->psl--; + *entry = *node; + entry = &hashmap->tables[idx].table[pos]; + } + + *entry = (hashmap_node_t){ 0 }; +} + +static void +hashmap_rehash_one(isc_hashmap_t *hashmap) { + uint8_t oldidx = hashmap_nexttable(hashmap->hindex); + uint32_t oldsize = hashmap->tables[oldidx].size; + hashmap_node_t *oldtable = hashmap->tables[oldidx].table; + hashmap_node_t node; + isc_result_t result; + + /* Find first non-empty node */ + while (hashmap->hiter < oldsize && oldtable[hashmap->hiter].key == NULL) + { + hashmap->hiter++; + } + + /* Rehashing complete */ + if (hashmap->hiter == oldsize) { + hashmap_free_table(hashmap, hashmap_nexttable(hashmap->hindex), + false); + hashmap->hiter = 0; + return; + } + + /* Move the first non-empty node from old table to new table */ + node = oldtable[hashmap->hiter]; + + hashmap_delete_node(hashmap, &oldtable[hashmap->hiter], node.hashval, + node.psl, oldidx); + + result = hashmap_add(hashmap, node.hashval, node.key, node.keysize, + node.value, hashmap->hindex); + INSIST(result == ISC_R_SUCCESS); + + /* + * we don't increase the hiter here because the table has been reordered + * when we deleted the old node + */ +} + +static uint32_t +grow_bits(isc_hashmap_t *hashmap) { + uint32_t newbits = hashmap->tables[hashmap->hindex].hashbits + 1; + size_t newsize = HASHSIZE(newbits); + + while (hashmap->count > APPROX_40_PERCENT(newsize)) { + newbits += 1; + newsize = HASHSIZE(newbits); + } + if (newbits > HASHMAP_MAX_BITS) { + newbits = HASHMAP_MAX_BITS; + } + + return (newbits); +} + +static uint32_t +shrink_bits(isc_hashmap_t *hashmap) { + uint32_t newbits = hashmap->tables[hashmap->hindex].hashbits - 1; + + if (newbits <= HASHMAP_MIN_BITS) { + newbits = HASHMAP_MIN_BITS; + } + + return (newbits); +} + +static void +hashmap_rehash_start_grow(isc_hashmap_t *hashmap) { + uint32_t newbits; + uint8_t oldindex = hashmap->hindex; + uint32_t oldbits = hashmap->tables[oldindex].hashbits; + uint8_t newindex = hashmap_nexttable(oldindex); + + REQUIRE(!rehashing_in_progress(hashmap)); + + newbits = grow_bits(hashmap); + + if (newbits > oldbits) { + hashmap_create_table(hashmap, newindex, newbits); + hashmap->hindex = newindex; + } +} + +static void +hashmap_rehash_start_shrink(isc_hashmap_t *hashmap) { + uint32_t newbits; + uint8_t oldindex = hashmap->hindex; + uint32_t oldbits = hashmap->tables[oldindex].hashbits; + uint8_t newindex = hashmap_nexttable(oldindex); + + REQUIRE(!rehashing_in_progress(hashmap)); + + newbits = shrink_bits(hashmap); + + if (newbits < oldbits) { + hashmap_create_table(hashmap, newindex, newbits); + hashmap->hindex = newindex; + } +} + +isc_result_t +isc_hashmap_delete(isc_hashmap_t *hashmap, const uint32_t *hashvalp, + const void *key, uint32_t keysize) { + REQUIRE(ISC_HASHMAP_VALID(hashmap)); + REQUIRE(key != NULL && keysize > 0 && keysize <= UINT16_MAX); + + hashmap_node_t *node; + isc_result_t result = ISC_R_NOTFOUND; + uint32_t psl = 0; + uint8_t idx; + uint32_t hashval = (hashvalp != NULL) + ? *hashvalp + : isc_hashmap_hash(hashmap, key, keysize); + + if (rehashing_in_progress(hashmap)) { + hashmap_rehash_one(hashmap); + } else if (under_threshold(hashmap)) { + hashmap_rehash_start_shrink(hashmap); + hashmap_rehash_one(hashmap); + } + + /* Initialize idx after possible shrink start */ + idx = hashmap->hindex; + + node = hashmap_find(hashmap, hashval, key, keysize, &psl, &idx); + if (node != NULL) { + INSIST(node->key != NULL); + hashmap_delete_node(hashmap, node, hashval, psl, idx); + result = ISC_R_SUCCESS; + } + + return (result); +} + +static bool +over_threshold(isc_hashmap_t *hashmap) { + uint32_t bits = hashmap->tables[hashmap->hindex].hashbits; + if (bits == HASHMAP_MAX_BITS) { + return (false); + } + size_t threshold = APPROX_90_PERCENT(HASHSIZE(bits)); + return (hashmap->count > threshold); +} + +static bool +under_threshold(isc_hashmap_t *hashmap) { + uint32_t bits = hashmap->tables[hashmap->hindex].hashbits; + if (bits == HASHMAP_MIN_BITS) { + return (false); + } + size_t threshold = APPROX_20_PERCENT(HASHSIZE(bits)); + return (hashmap->count < threshold); +} + +static isc_result_t +hashmap_add(isc_hashmap_t *hashmap, const uint32_t hashval, const uint8_t *key, + const uint32_t keysize, void *value, uint8_t idx) { + uint32_t hash; + uint32_t psl = 0; + hashmap_node_t node; + hashmap_node_t *current = NULL; + uint32_t pos; + + hash = isc_hash_bits32(hashval, hashmap->tables[idx].hashbits); + + /* Initialize the node to be store to 'node' */ + hashmap_node_init(&node, hashval, key, keysize, value); + + psl = 0; + while (true) { + pos = (hash + psl) & hashmap->tables[idx].hashmask; + + current = &hashmap->tables[idx].table[pos]; + + /* Found an empty node */ + if (current->key == NULL) { + break; + } + + if (hashmap_match(current, hashval, key, keysize, + hashmap->case_sensitive)) { + return (ISC_R_EXISTS); + } + /* Found rich node */ + if (node.psl > current->psl) { + /* Swap the poor with the rich node */ + ISC_SWAP(*current, node); + } + + node.psl++; + psl++; + } + + /* + * Possible optimalization - start growing when the poor node is too far + */ +#if ISC_HASHMAP_GROW_FAST + if (psl > hashmap->hashbits[idx]) { + if (!rehashing_in_progress(hashmap)) { + hashmap_rehash_start_grow(hashmap); + } + } +#endif + + hashmap->count++; + + /* We found an empty place, store entry into current node */ + *current = node; + + return (ISC_R_SUCCESS); +} + +isc_result_t +isc_hashmap_add(isc_hashmap_t *hashmap, const uint32_t *hashvalp, + const void *key, uint32_t keysize, void *value) { + REQUIRE(ISC_HASHMAP_VALID(hashmap)); + REQUIRE(key != NULL && keysize > 0 && keysize <= UINT16_MAX); + + isc_result_t result; + uint32_t hashval = (hashvalp != NULL) + ? *hashvalp + : isc_hashmap_hash(hashmap, key, keysize); + + if (rehashing_in_progress(hashmap)) { + hashmap_rehash_one(hashmap); + } else if (over_threshold(hashmap)) { + hashmap_rehash_start_grow(hashmap); + hashmap_rehash_one(hashmap); + } + + if (rehashing_in_progress(hashmap)) { + uint8_t fidx = hashmap_nexttable(hashmap->hindex); + uint32_t psl; + + /* Look for the value in the old table */ + if (hashmap_find(hashmap, hashval, key, keysize, &psl, &fidx)) { + return (ISC_R_EXISTS); + } + } + + result = hashmap_add(hashmap, hashval, key, keysize, value, + hashmap->hindex); + switch (result) { + case ISC_R_SUCCESS: + case ISC_R_EXISTS: + return (result); + default: + UNREACHABLE(); + } +} + +void +isc_hashmap_iter_create(isc_hashmap_t *hashmap, isc_hashmap_iter_t **iterp) { + isc_hashmap_iter_t *iter; + + REQUIRE(ISC_HASHMAP_VALID(hashmap)); + REQUIRE(iterp != NULL && *iterp == NULL); + + iter = isc_mem_get(hashmap->mctx, sizeof(*iter)); + *iter = (isc_hashmap_iter_t){ + .hashmap = hashmap, + .hindex = hashmap->hindex, + }; + + *iterp = iter; +} + +void +isc_hashmap_iter_destroy(isc_hashmap_iter_t **iterp) { + isc_hashmap_iter_t *iter; + isc_hashmap_t *hashmap; + + REQUIRE(iterp != NULL && *iterp != NULL); + + iter = *iterp; + *iterp = NULL; + hashmap = iter->hashmap; + isc_mem_put(hashmap->mctx, iter, sizeof(*iter)); +} + +static isc_result_t +isc__hashmap_iter_next(isc_hashmap_iter_t *iter) { + isc_hashmap_t *hashmap = iter->hashmap; + + while (iter->i < hashmap->tables[iter->hindex].size && + hashmap->tables[iter->hindex].table[iter->i].key == NULL) + { + iter->i++; + } + + if (iter->i < hashmap->tables[iter->hindex].size) { + iter->cur = &hashmap->tables[iter->hindex].table[iter->i]; + + return (ISC_R_SUCCESS); + } + + if (try_nexttable(hashmap, iter->hindex)) { + iter->hindex = hashmap_nexttable(iter->hindex); + iter->i = 0; + return (isc__hashmap_iter_next(iter)); + } + + return (ISC_R_NOMORE); +} + +isc_result_t +isc_hashmap_iter_first(isc_hashmap_iter_t *iter) { + REQUIRE(iter != NULL); + + iter->hindex = iter->hashmap->hindex; + iter->i = 0; + + return (isc__hashmap_iter_next(iter)); +} + +isc_result_t +isc_hashmap_iter_next(isc_hashmap_iter_t *iter) { + REQUIRE(iter != NULL); + REQUIRE(iter->cur != NULL); + + iter->i++; + + return (isc__hashmap_iter_next(iter)); +} + +isc_result_t +isc_hashmap_iter_delcurrent_next(isc_hashmap_iter_t *iter) { + REQUIRE(iter != NULL); + REQUIRE(iter->cur != NULL); + + hashmap_node_t *node = + &iter->hashmap->tables[iter->hindex].table[iter->i]; + + hashmap_delete_node(iter->hashmap, node, node->hashval, node->psl, + iter->hindex); + + return (isc__hashmap_iter_next(iter)); +} + +void +isc_hashmap_iter_current(isc_hashmap_iter_t *it, void **valuep) { + REQUIRE(it != NULL); + REQUIRE(it->cur != NULL); + REQUIRE(valuep != NULL && *valuep == NULL); + + *valuep = it->cur->value; +} + +void +isc_hashmap_iter_currentkey(isc_hashmap_iter_t *it, const unsigned char **key, + size_t *keysize) { + REQUIRE(it != NULL); + REQUIRE(it->cur != NULL); + REQUIRE(key != NULL && *key == NULL); + + *key = it->cur->key; + *keysize = it->cur->keysize; +} + +unsigned int +isc_hashmap_count(isc_hashmap_t *hashmap) { + REQUIRE(ISC_HASHMAP_VALID(hashmap)); + + return (hashmap->count); +} diff --git a/lib/isc/include/isc/hashmap.h b/lib/isc/include/isc/hashmap.h new file mode 100644 index 0000000000..8af0c6b7e3 --- /dev/null +++ b/lib/isc/include/isc/hashmap.h @@ -0,0 +1,198 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +/* ! \file */ + +#pragma once + +#include +#include + +#include +#include + +typedef struct isc_hashmap isc_hashmap_t; +typedef struct isc_hashmap_iter isc_hashmap_iter_t; + +enum { ISC_HASHMAP_CASE_SENSITIVE = 0x00, ISC_HASHMAP_CASE_INSENSITIVE = 0x01 }; + +/*% + * Create hashmap at *hashmapp, using memory context and size of (1<=1 and 'bits' <=32 + * + */ +void +isc_hashmap_create(isc_mem_t *mctx, uint8_t bits, unsigned int options, + isc_hashmap_t **hashmapp); + +/*% + * Destroy hashmap, freeing everything + * + * Requires: + * \li '*hashmapp' is valid hashmap + */ +void +isc_hashmap_destroy(isc_hashmap_t **hashmapp); + +/*% + * Return current hashed value for 'key' of size 'keysize'; + */ +uint32_t +isc_hashmap_hash(const isc_hashmap_t *hashmap, const void *key, + uint32_t keysize); + +/*% + * Add a node to hashmap, pointed by binary key 'key' of size 'keysize'; + * set its value to 'value' + * + * Requires: + * \li 'hashmap' is a valid hashmap + * \li 'hashval' is optional precomputed hash value of 'key' + * \li 'key' is non-null key of size 'keysize' + * + * Returns: + * \li #ISC_R_EXISTS -- node of the same key already exists + * \li #ISC_R_SUCCESS -- all is well. + */ +isc_result_t +isc_hashmap_add(isc_hashmap_t *hashmap, const uint32_t *hashvalp, + const void *key, uint32_t keysize, void *value); + +/*% + * Find a node matching 'key'/'keysize' in hashmap 'hashmap'; + * if found, set '*valuep' to its value. (If 'valuep' is NULL, + * then simply return SUCCESS or NOTFOUND to indicate whether the + * key exists in the hashmap.) + * + * Requires: + * \li 'hashmap' is a valid hashmap + * \li 'hashval' is optional precomputed hash value of 'key' + * \li 'key' is non-null key of size 'keysize' + * + * Returns: + * \li #ISC_R_SUCCESS -- success + * \li #ISC_R_NOTFOUND -- key not found + */ +isc_result_t +isc_hashmap_find(const isc_hashmap_t *hashmap, const uint32_t *hashvalp, + const void *key, uint32_t keysize, void **valuep); + +/*% + * Delete node from hashmap + * + * Requires: + * \li 'hashmap' is a valid hashmap + * \li 'hashval' is optional precomputed hash value of 'key' + * \li 'key' is non-null key of size 'keysize' + * + * Returns: + * \li #ISC_R_NOTFOUND -- key not found + * \li #ISC_R_SUCCESS -- all is well + */ +isc_result_t +isc_hashmap_delete(isc_hashmap_t *hashmap, const uint32_t *hashvalp, + const void *key, uint32_t keysize); + +/*% + * Create an iterator for the hashmap; point '*itp' to it. + * + * Requires: + * \li 'hashmap' is a valid hashmap + * \li 'itp' is non NULL and '*itp' is NULL. + */ +void +isc_hashmap_iter_create(isc_hashmap_t *hashmap, isc_hashmap_iter_t **itp); + +/*% + * Destroy the iterator '*itp', set it to NULL + * + * Requires: + * \li 'itp' is non NULL and '*itp' is non NULL. + */ +void +isc_hashmap_iter_destroy(isc_hashmap_iter_t **itp); + +/*% + * Set an iterator to the first entry. + * + * Requires: + * \li 'it' is non NULL. + * + * Returns: + * \li #ISC_R_SUCCESS -- success + * \li #ISC_R_NOMORE -- no data in the hashmap + */ +isc_result_t +isc_hashmap_iter_first(isc_hashmap_iter_t *it); + +/*% + * Set an iterator to the next entry. + * + * Requires: + * \li 'it' is non NULL. + * + * Returns: + * \li #ISC_R_SUCCESS -- success + * \li #ISC_R_NOMORE -- end of hashmap reached + */ +isc_result_t +isc_hashmap_iter_next(isc_hashmap_iter_t *it); + +/*% + * Delete current entry and set an iterator to the next entry. + * + * Requires: + * \li 'it' is non NULL. + * + * Returns: + * \li #ISC_R_SUCCESS -- success + * \li #ISC_R_NOMORE -- end of hashmap reached + */ +isc_result_t +isc_hashmap_iter_delcurrent_next(isc_hashmap_iter_t *it); + +/*% + * Set 'value' to the current value under the iterator + * + * Requires: + * \li 'it' is non NULL. + * \li 'valuep' is non NULL and '*valuep' is NULL. + */ +void +isc_hashmap_iter_current(isc_hashmap_iter_t *it, void **valuep); + +/*% + * Set 'key' and 'keysize to the current key and keysize for the value + * under the iterator + * + * Requires: + * \li 'it' is non NULL. + * \li 'key' is non NULL and '*key' is NULL. + * \li 'keysize' is non NULL. + */ +void +isc_hashmap_iter_currentkey(isc_hashmap_iter_t *it, const unsigned char **key, + size_t *keysize); + +/*% + * Returns the number of items in the hashmap. + * + * Requires: + * \li 'hashmap' is a valid hashmap + */ +unsigned int +isc_hashmap_count(isc_hashmap_t *hashmap); diff --git a/tests/isc/Makefile.am b/tests/isc/Makefile.am index e6a5d46ac0..4a55ec2aba 100644 --- a/tests/isc/Makefile.am +++ b/tests/isc/Makefile.am @@ -19,6 +19,7 @@ check_PROGRAMS = \ errno_test \ file_test \ hash_test \ + hashmap_test \ heap_test \ hmac_test \ ht_test \ diff --git a/tests/isc/hashmap_test.c b/tests/isc/hashmap_test.c new file mode 100644 index 0000000000..334ed8d64f --- /dev/null +++ b/tests/isc/hashmap_test.c @@ -0,0 +1,413 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#include +#include /* IWYU pragma: keep */ +#include +#include +#include +#include +#include +#include + +#define UNIT_TESTING +#include + +#include +#include +#include +#include +#include +#include + +#include + +/* INCLUDE LAST */ + +#define mctx __mctx +#include "hashmap.c" +#undef mctx + +typedef struct test_node { + uint32_t hashval; + char key[64]; +} test_node_t; + +static void +test_hashmap_full(uint8_t init_bits, uintptr_t count) { + isc_hashmap_t *hashmap = NULL; + isc_result_t result; + test_node_t *nodes, *long_nodes, *upper_nodes; + + nodes = isc_mem_get(mctx, count * sizeof(nodes[0])); + long_nodes = isc_mem_get(mctx, count * sizeof(nodes[0])); + upper_nodes = isc_mem_get(mctx, count * sizeof(nodes[0])); + + isc_hashmap_create(mctx, init_bits, ISC_HASHMAP_CASE_SENSITIVE, + &hashmap); + assert_non_null(hashmap); + + /* + * Note: snprintf() is followed with strlcat() + * to ensure we are always filling the 16 byte key. + */ + for (size_t i = 0; i < count; i++) { + /* short keys */ + snprintf(nodes[i].key, 16, "%u", (unsigned int)i); + strlcat(nodes[i].key, " key of a raw hashmap!!", 16); + + /* long keys */ + snprintf(long_nodes[i].key, sizeof(long_nodes[i].key), "%u", + (unsigned int)i); + strlcat(long_nodes[i].key, " key of a raw hashmap!!", + sizeof(long_nodes[i].key)); + + /* (some) uppercase keys */ + snprintf(upper_nodes[i].key, 16, "%u", (unsigned int)i); + strlcat(upper_nodes[i].key, " KEY of a raw hashmap!!", 16); + } + + /* insert short nodes */ + for (size_t i = 0; i < count; i++) { + nodes[i].hashval = isc_hashmap_hash(hashmap, nodes[i].key, 16); + result = isc_hashmap_add(hashmap, &(nodes[i]).hashval, + nodes[i].key, 16, &nodes[i]); + assert_int_equal(result, ISC_R_SUCCESS); + } + + /* check if the short nodes were insert */ + for (size_t i = 0; i < count; i++) { + void *f = NULL; + result = isc_hashmap_find(hashmap, &(nodes[i]).hashval, + nodes[i].key, 16, &f); + assert_int_equal(result, ISC_R_SUCCESS); + assert_ptr_equal(&nodes[i], f); + } + + /* check for double inserts */ + for (size_t i = 0; i < count; i++) { + result = isc_hashmap_add(hashmap, NULL, nodes[i].key, 16, + &nodes[i]); + assert_int_equal(result, ISC_R_EXISTS); + } + + for (size_t i = 0; i < count; i++) { + result = + isc_hashmap_add(hashmap, NULL, long_nodes[i].key, + strlen((const char *)long_nodes[i].key), + &long_nodes[i]); + assert_int_equal(result, ISC_R_SUCCESS); + } + + for (size_t i = 0; i < count; i++) { + void *f = NULL; + result = isc_hashmap_find(hashmap, NULL, upper_nodes[i].key, 16, + &f); + assert_int_equal(result, ISC_R_NOTFOUND); + assert_null(f); + } + + for (size_t i = 0; i < count; i++) { + void *f = NULL; + result = isc_hashmap_find( + hashmap, NULL, long_nodes[i].key, + strlen((const char *)long_nodes[i].key), &f); + assert_int_equal(result, ISC_R_SUCCESS); + assert_ptr_equal(f, &long_nodes[i]); + } + + for (size_t i = 0; i < count; i++) { + void *f = NULL; + result = isc_hashmap_delete(hashmap, &nodes[i].hashval, + nodes[i].key, 16); + assert_int_equal(result, ISC_R_SUCCESS); + result = isc_hashmap_find(hashmap, NULL, nodes[i].key, 16, &f); + assert_int_equal(result, ISC_R_NOTFOUND); + assert_null(f); + } + + for (size_t i = 0; i < count; i++) { + result = isc_hashmap_add(hashmap, NULL, upper_nodes[i].key, 16, + &upper_nodes[i]); + assert_int_equal(result, ISC_R_SUCCESS); + } + + for (size_t i = 0; i < count; i++) { + void *f = NULL; + result = isc_hashmap_delete( + hashmap, NULL, long_nodes[i].key, + strlen((const char *)long_nodes[i].key)); + assert_int_equal(result, ISC_R_SUCCESS); + result = isc_hashmap_find( + hashmap, NULL, long_nodes[i].key, + strlen((const char *)long_nodes[i].key), &f); + assert_int_equal(result, ISC_R_NOTFOUND); + assert_null(f); + } + + for (size_t i = 0; i < count; i++) { + void *f = NULL; + result = isc_hashmap_find(hashmap, NULL, upper_nodes[i].key, 16, + &f); + assert_int_equal(result, ISC_R_SUCCESS); + assert_ptr_equal(f, &upper_nodes[i]); + } + + for (size_t i = 0; i < count; i++) { + void *f = NULL; + result = isc_hashmap_find(hashmap, NULL, nodes[i].key, 16, &f); + assert_int_equal(result, ISC_R_NOTFOUND); + assert_null(f); + } + + isc_hashmap_destroy(&hashmap); + assert_null(hashmap); + + isc_mem_put(mctx, nodes, count * sizeof(nodes[0])); + isc_mem_put(mctx, long_nodes, count * sizeof(nodes[0])); + isc_mem_put(mctx, upper_nodes, count * sizeof(nodes[0])); +} + +static void +test_hashmap_iterator(void) { + isc_hashmap_t *hashmap = NULL; + isc_result_t result; + isc_hashmap_iter_t *iter = NULL; + size_t count = 7600; + uint32_t walked; + size_t tksize; + test_node_t *nodes; + + nodes = isc_mem_get(mctx, count * sizeof(nodes[0])); + + isc_hashmap_create(mctx, HASHMAP_MIN_BITS, ISC_HASHMAP_CASE_SENSITIVE, + &hashmap); + assert_non_null(hashmap); + + for (size_t i = 0; i < count; i++) { + /* short keys */ + snprintf(nodes[i].key, 16, "%u", (unsigned int)i); + strlcat(nodes[i].key, " key of a raw hashmap!!", 16); + } + + for (size_t i = 0; i < count; i++) { + result = isc_hashmap_add(hashmap, NULL, nodes[i].key, 16, + &nodes[i]); + assert_int_equal(result, ISC_R_SUCCESS); + } + + /* We want to iterate while rehashing is in progress */ + assert_true(rehashing_in_progress(hashmap)); + + walked = 0; + isc_hashmap_iter_create(hashmap, &iter); + + for (result = isc_hashmap_iter_first(iter); result == ISC_R_SUCCESS; + result = isc_hashmap_iter_next(iter)) + { + char key[16] = { 0 }; + ptrdiff_t i; + const uint8_t *tkey = NULL; + test_node_t *v = NULL; + + isc_hashmap_iter_current(iter, (void *)&v); + isc_hashmap_iter_currentkey(iter, &tkey, &tksize); + assert_int_equal(tksize, 16); + + i = v - &nodes[0]; + + snprintf(key, 16, "%u", (unsigned int)i); + strlcat(key, " key of a raw hashmap!!", 16); + + assert_memory_equal(key, tkey, 16); + + walked++; + } + assert_int_equal(walked, count); + assert_int_equal(result, ISC_R_NOMORE); + + /* erase odd */ + walked = 0; + result = isc_hashmap_iter_first(iter); + while (result == ISC_R_SUCCESS) { + char key[16] = { 0 }; + ptrdiff_t i; + const uint8_t *tkey = NULL; + test_node_t *v = NULL; + + isc_hashmap_iter_current(iter, (void *)&v); + isc_hashmap_iter_currentkey(iter, &tkey, &tksize); + assert_int_equal(tksize, 16); + + i = v - nodes; + snprintf(key, 16, "%u", (unsigned int)i); + strlcat(key, " key of a raw hashmap!!", 16); + assert_memory_equal(key, tkey, 16); + + if (i % 2 == 0) { + result = isc_hashmap_iter_delcurrent_next(iter); + } else { + result = isc_hashmap_iter_next(iter); + } + walked++; + } + assert_int_equal(result, ISC_R_NOMORE); + assert_int_equal(walked, count); + + /* erase even */ + walked = 0; + result = isc_hashmap_iter_first(iter); + while (result == ISC_R_SUCCESS) { + char key[16] = { 0 }; + ptrdiff_t i; + const uint8_t *tkey = NULL; + test_node_t *v = NULL; + + isc_hashmap_iter_current(iter, (void *)&v); + isc_hashmap_iter_currentkey(iter, &tkey, &tksize); + assert_int_equal(tksize, 16); + + i = v - nodes; + snprintf(key, 16, "%u", (unsigned int)i); + strlcat(key, " key of a raw hashmap!!", 16); + assert_memory_equal(key, tkey, 16); + + if (i % 2 == 1) { + result = isc_hashmap_iter_delcurrent_next(iter); + } else { + result = isc_hashmap_iter_next(iter); + } + walked++; + } + assert_int_equal(result, ISC_R_NOMORE); + assert_int_equal(walked, count / 2); + + walked = 0; + for (result = isc_hashmap_iter_first(iter); result == ISC_R_SUCCESS; + result = isc_hashmap_iter_next(iter)) + { + walked++; + } + + assert_int_equal(result, ISC_R_NOMORE); + assert_int_equal(walked, 0); + + /* Iterator doesn't progress rehashing */ + assert_true(rehashing_in_progress(hashmap)); + + isc_hashmap_iter_destroy(&iter); + assert_null(iter); + + isc_hashmap_destroy(&hashmap); + assert_null(hashmap); + + isc_mem_put(mctx, nodes, count * sizeof(nodes[0])); +} + +/* 1 bit, 120 elements test, full rehashing */ +ISC_RUN_TEST_IMPL(isc_hashmap_1_120) { + test_hashmap_full(1, 120); + return; +} + +/* 6 bit, 1000 elements test, full rehashing */ +ISC_RUN_TEST_IMPL(isc_hashmap_6_1000) { + test_hashmap_full(6, 1000); + return; +} + +/* 24 bit, 200K elements test, no rehashing */ +ISC_RUN_TEST_IMPL(isc_hashmap_24_200000) { + test_hashmap_full(24, 200000); + return; +} + +/* 15 bit, 45K elements test, full rehashing */ +ISC_RUN_TEST_IMPL(isc_hashmap_1_48000) { + test_hashmap_full(1, 48000); + return; +} + +/* 8 bit, 20k elements test, partial rehashing */ +ISC_RUN_TEST_IMPL(isc_hashmap_8_20000) { + test_hashmap_full(8, 20000); + return; +} + +/* test hashmap iterator */ + +ISC_RUN_TEST_IMPL(isc_hashmap_iterator) { + test_hashmap_iterator(); + return; +} + +ISC_RUN_TEST_IMPL(isc_hashmap_case) { + isc_result_t result; + isc_hashmap_t *hashmap = NULL; + test_node_t lower = { .key = "isc_hashmap_case" }; + test_node_t upper = { .key = "ISC_HASHMAP_CASE" }; + test_node_t mixed = { .key = "IsC_hAsHmAp_CaSe" }; + test_node_t *value; + + isc_hashmap_create(mctx, 1, ISC_HASHMAP_CASE_SENSITIVE, &hashmap); + + result = isc_hashmap_add(hashmap, NULL, lower.key, strlen(lower.key), + &lower); + assert_int_equal(result, ISC_R_SUCCESS); + + result = isc_hashmap_add(hashmap, NULL, lower.key, strlen(lower.key), + &lower); + assert_int_equal(result, ISC_R_EXISTS); + + result = isc_hashmap_add(hashmap, NULL, upper.key, strlen(upper.key), + &upper); + assert_int_equal(result, ISC_R_SUCCESS); + + result = isc_hashmap_find(hashmap, NULL, mixed.key, strlen(mixed.key), + (void *)&value); + assert_int_equal(result, ISC_R_NOTFOUND); + + isc_hashmap_destroy(&hashmap); + + isc_hashmap_create(mctx, 1, ISC_HASHMAP_CASE_INSENSITIVE, &hashmap); + + result = isc_hashmap_add(hashmap, NULL, lower.key, strlen(lower.key), + &lower); + assert_int_equal(result, ISC_R_SUCCESS); + + result = isc_hashmap_add(hashmap, NULL, lower.key, strlen(lower.key), + &lower); + assert_int_equal(result, ISC_R_EXISTS); + + result = isc_hashmap_add(hashmap, NULL, upper.key, strlen(upper.key), + &upper); + assert_int_equal(result, ISC_R_EXISTS); + + result = isc_hashmap_find(hashmap, NULL, mixed.key, strlen(mixed.key), + (void *)&value); + assert_int_equal(result, ISC_R_SUCCESS); + + isc_hashmap_destroy(&hashmap); +} + +ISC_TEST_LIST_START +ISC_TEST_ENTRY(isc_hashmap_case) +ISC_TEST_ENTRY(isc_hashmap_1_120) +ISC_TEST_ENTRY(isc_hashmap_6_1000) +ISC_TEST_ENTRY(isc_hashmap_24_200000) +ISC_TEST_ENTRY(isc_hashmap_1_48000) +ISC_TEST_ENTRY(isc_hashmap_8_20000) +ISC_TEST_ENTRY(isc_hashmap_iterator) +ISC_TEST_LIST_END + +ISC_TEST_MAIN diff --git a/tests/isc/ht_test.c b/tests/isc/ht_test.c index 152994e47d..eec36f53c0 100644 --- a/tests/isc/ht_test.c +++ b/tests/isc/ht_test.c @@ -39,7 +39,7 @@ #undef mctx static void -test_ht_full(uint8_t init_bits, uint8_t finish_bits, uintptr_t count) { +test_ht_full(uint8_t init_bits, uintptr_t count) { isc_ht_t *ht = NULL; isc_result_t result; uintptr_t i; @@ -174,8 +174,6 @@ test_ht_full(uint8_t init_bits, uint8_t finish_bits, uintptr_t count) { assert_null(f); } - assert_int_equal(ht->hashbits[ht->hindex], finish_bits); - isc_ht_destroy(&ht); assert_null(ht); } @@ -186,7 +184,7 @@ test_ht_iterator(void) { isc_result_t result; isc_ht_iter_t *iter = NULL; uintptr_t i; - uintptr_t count = 6300; + uintptr_t count = 7600; uint32_t walked; unsigned char key[16]; size_t tksize; @@ -296,22 +294,34 @@ test_ht_iterator(void) { assert_null(ht); } +/* 1 bit, 120 elements test, full rehashing */ +ISC_RUN_TEST_IMPL(isc_ht_1_120) { + test_ht_full(1, 120); + return; +} + +/* 6 bit, 1000 elements test, full rehashing */ +ISC_RUN_TEST_IMPL(isc_ht_6_1000) { + test_ht_full(6, 1000); + return; +} + /* 24 bit, 200K elements test, no rehashing */ -ISC_RUN_TEST_IMPL(isc_ht_24) { +ISC_RUN_TEST_IMPL(isc_ht_24_200000) { UNUSED(state); - test_ht_full(24, 24, 200000); + test_ht_full(24, 200000); } /* 15 bit, 45K elements test, full rehashing */ -ISC_RUN_TEST_IMPL(isc_ht_15) { +ISC_RUN_TEST_IMPL(isc_ht_1_48000) { UNUSED(state); - test_ht_full(1, 15, 48000); + test_ht_full(1, 48000); } /* 8 bit, 20k elements test, partial rehashing */ -ISC_RUN_TEST_IMPL(isc_ht_8) { +ISC_RUN_TEST_IMPL(isc_ht_8_20000) { UNUSED(state); - test_ht_full(8, 14, 20000); + test_ht_full(8, 20000); } /* test hashtable iterator */ @@ -322,9 +332,11 @@ ISC_RUN_TEST_IMPL(isc_ht_iterator) { } ISC_TEST_LIST_START -ISC_TEST_ENTRY(isc_ht_24) -ISC_TEST_ENTRY(isc_ht_15) -ISC_TEST_ENTRY(isc_ht_8) +ISC_TEST_ENTRY(isc_ht_1_120) +ISC_TEST_ENTRY(isc_ht_6_1000) +ISC_TEST_ENTRY(isc_ht_24_200000) +ISC_TEST_ENTRY(isc_ht_1_48000) +ISC_TEST_ENTRY(isc_ht_8_20000) ISC_TEST_ENTRY(isc_ht_iterator) ISC_TEST_LIST_END