Add isc_hashmap API that implements Robin Hood hashing

Add new isc_hashmap API that differs from the current isc_ht API in
several aspects:

1. It implements Robin Hood Hashing which is open-addressing hash table
   algorithm (e.g. no linked-lists)

2. No memory allocations - the array to store the nodes is made of
   isc_hashmap_node_t structures instead of just pointers, so there's
   only allocation on resize.

3. The key is not copied into the hashmap node and must be also stored
   externally, either as part of the stored value or in any other
   location that's valid as long the value is stored in the hashmap.

This makes the isc_hashmap_t a little less universal because of the key
storage requirements, but the inserts and deletes are faster because
they don't require memory allocation on isc_hashmap_add() and memory
deallocation on isc_hashmap_delete().
This commit is contained in:
Ondřej Surý 2022-06-24 08:32:12 +02:00
parent 0744ebe220
commit f46ce447a6
No known key found for this signature in database
GPG key ID: 2820F37E873DEA41
6 changed files with 1388 additions and 13 deletions

View file

@ -37,6 +37,7 @@ libisc_la_HEADERS = \
include/isc/fuzz.h \
include/isc/glob.h \
include/isc/hash.h \
include/isc/hashmap.h \
include/isc/heap.h \
include/isc/hex.h \
include/isc/hmac.h \
@ -145,6 +146,7 @@ libisc_la_SOURCES = \
fsaccess_common_p.h \
glob.c \
hash.c \
hashmap.c \
heap.c \
hex.c \
hmac.c \

749
lib/isc/hashmap.c Normal file
View file

@ -0,0 +1,749 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
/*
* This is an implementation of the Robin Hood hash table algorithm as
* described in [a] with simple linear searching, and backwards shift
* deletion algorithm as described in [b] and [c].
*
* Further work:
* 1. Implement 4.1 Speeding up Searches - 4.4 Smart Search [a]
* 2. Implement A Fast Concurrent and Resizable Robin Hood Hash Table [b]
*
* a. https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf paper.
* b. https://dspace.mit.edu/bitstream/handle/1721.1/130693/1251799942-MIT.pdf
* c.
* https://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
*/
#include <ctype.h>
#include <inttypes.h>
#include <string.h>
#include <isc/ascii.h>
#include <isc/entropy.h>
#include <isc/hash.h>
#include <isc/hashmap.h>
#include <isc/magic.h>
#include <isc/mem.h>
#include <isc/result.h>
#include <isc/siphash.h>
#include <isc/types.h>
#include <isc/util.h>
#define APPROX_99_PERCENT(x) (((x)*1013) >> 10)
#define APPROX_95_PERCENT(x) (((x)*972) >> 10)
#define APPROX_90_PERCENT(x) (((x)*921) >> 10)
#define APPROX_85_PERCENT(x) (((x)*870) >> 10)
#define APPROX_40_PERCENT(x) (((x)*409) >> 10)
#define APPROX_35_PERCENT(x) (((x)*359) >> 10)
#define APPROX_30_PERCENT(x) (((x)*308) >> 10)
#define APPROX_25_PERCENT(x) (((x)*256) >> 10)
#define APPROX_20_PERCENT(x) (((x)*205) >> 10)
#define APPROX_15_PERCENT(x) (((x)*154) >> 10)
#define APPROX_10_PERCENT(x) (((x)*103) >> 10)
#define APPROX_05_PERCENT(x) (((x)*52) >> 10)
#define APPROX_01_PERCENT(x) (((x)*11) >> 10)
#define ISC_HASHMAP_MAGIC ISC_MAGIC('H', 'M', 'a', 'p')
#define ISC_HASHMAP_VALID(hashmap) ISC_MAGIC_VALID(hashmap, ISC_HASHMAP_MAGIC)
/* We have two tables for incremental rehashing */
#define HASHMAP_NUM_TABLES 2
#define HASHSIZE(bits) (UINT64_C(1) << (bits))
#define HASHMAP_NO_BITS 0U
#define HASHMAP_MIN_BITS 1U
#define HASHMAP_MAX_BITS 32U
typedef struct hashmap_node {
const uint8_t *key;
void *value;
uint32_t hashval;
uint32_t psl;
uint16_t keysize;
} hashmap_node_t;
typedef struct hashmap_table {
size_t size;
uint8_t hashbits;
uint32_t hashmask;
hashmap_node_t *table;
} hashmap_table_t;
struct isc_hashmap {
unsigned int magic;
bool case_sensitive;
uint8_t hindex;
uint32_t hiter; /* rehashing iterator */
isc_mem_t *mctx;
size_t count;
uint8_t hash_key[16];
hashmap_table_t tables[HASHMAP_NUM_TABLES];
};
struct isc_hashmap_iter {
isc_hashmap_t *hashmap;
size_t i;
uint8_t hindex;
hashmap_node_t *cur;
};
static isc_result_t
hashmap_add(isc_hashmap_t *hashmap, const uint32_t hashval, const uint8_t *key,
const uint32_t keysize, void *value, uint8_t idx);
static void
hashmap_rehash_one(isc_hashmap_t *hashmap);
static void
hashmap_rehash_start_grow(isc_hashmap_t *hashmap);
static void
hashmap_rehash_start_shrink(isc_hashmap_t *hashmap);
static bool
over_threshold(isc_hashmap_t *hashmap);
static bool
under_threshold(isc_hashmap_t *hashmap);
static uint8_t
hashmap_nexttable(uint8_t idx) {
return ((idx == 0) ? 1 : 0);
}
static bool
rehashing_in_progress(const isc_hashmap_t *hashmap) {
return (hashmap->tables[hashmap_nexttable(hashmap->hindex)].table !=
NULL);
}
static bool
try_nexttable(const isc_hashmap_t *hashmap, uint8_t idx) {
return (idx == hashmap->hindex && rehashing_in_progress(hashmap));
}
static void
hashmap_node_init(hashmap_node_t *node, const uint32_t hashval,
const uint8_t *key, const uint32_t keysize, void *value) {
REQUIRE(key != NULL && keysize > 0 && keysize <= UINT16_MAX);
*node = (hashmap_node_t){
.value = value,
.hashval = hashval,
.key = key,
.keysize = keysize,
.psl = 0,
};
}
static void __attribute__((__unused__))
hashmap_dump_table(const isc_hashmap_t *hashmap, const uint8_t idx) {
fprintf(stderr,
"====== %" PRIu8 " (bits = %" PRIu8 ", size = %zu =====\n", idx,
hashmap->tables[idx].hashbits, hashmap->tables[idx].size);
for (size_t i = 0; i < hashmap->tables[idx].size; i++) {
hashmap_node_t *node = &hashmap->tables[idx].table[i];
if (node->key != NULL) {
uint32_t hash = isc_hash_bits32(
node->hashval, hashmap->tables[idx].hashbits);
fprintf(stderr,
"%p: %zu -> %p"
", value = %p"
", hash = %" PRIu32 ", hashval = %" PRIu32
", psl = %" PRIu32 ", key = %s\n",
hashmap, i, node, node->value, hash,
node->hashval, node->psl, (char *)node->key);
}
}
fprintf(stderr, "================\n\n");
}
static void
hashmap_create_table(isc_hashmap_t *hashmap, const uint8_t idx,
const uint8_t bits) {
size_t size;
REQUIRE(hashmap->tables[idx].hashbits == HASHMAP_NO_BITS);
REQUIRE(hashmap->tables[idx].table == NULL);
REQUIRE(bits >= HASHMAP_MIN_BITS);
REQUIRE(bits <= HASHMAP_MAX_BITS);
hashmap->tables[idx] = (hashmap_table_t){
.hashbits = bits,
.hashmask = HASHSIZE(bits) - 1,
.size = HASHSIZE(bits),
};
size = hashmap->tables[idx].size *
sizeof(hashmap->tables[idx].table[0]);
hashmap->tables[idx].table = isc_mem_getx(hashmap->mctx, size,
ISC_MEM_ZERO);
}
static void
hashmap_free_table(isc_hashmap_t *hashmap, const uint8_t idx, bool cleanup) {
size_t size;
if (cleanup) {
for (size_t i = 0; i < hashmap->tables[idx].size; i++) {
hashmap_node_t *node = &hashmap->tables[idx].table[i];
if (node->key != NULL) {
*node = (hashmap_node_t){ 0 };
hashmap->count--;
}
}
}
size = hashmap->tables[idx].size *
sizeof(hashmap->tables[idx].table[0]);
isc_mem_put(hashmap->mctx, hashmap->tables[idx].table, size);
hashmap->tables[idx] = (hashmap_table_t){
.hashbits = HASHMAP_NO_BITS,
};
}
void
isc_hashmap_create(isc_mem_t *mctx, uint8_t bits, unsigned int options,
isc_hashmap_t **hashmapp) {
isc_hashmap_t *hashmap = isc_mem_get(mctx, sizeof(*hashmap));
bool case_sensitive = ((options & ISC_HASHMAP_CASE_INSENSITIVE) == 0);
REQUIRE(hashmapp != NULL && *hashmapp == NULL);
REQUIRE(mctx != NULL);
REQUIRE(bits >= HASHMAP_MIN_BITS && bits <= HASHMAP_MAX_BITS);
*hashmap = (isc_hashmap_t){
.magic = ISC_HASHMAP_MAGIC,
.hash_key = { 0, 1 },
.case_sensitive = case_sensitive,
};
isc_mem_attach(mctx, &hashmap->mctx);
#if !defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && !defined(UNIT_TESTING)
isc_entropy_get(hashmap->hash_key, sizeof(hashmap->hash_key));
#endif /* if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION */
hashmap_create_table(hashmap, 0, bits);
hashmap->magic = ISC_HASHMAP_MAGIC;
*hashmapp = hashmap;
}
void
isc_hashmap_destroy(isc_hashmap_t **hashmapp) {
isc_hashmap_t *hashmap;
REQUIRE(hashmapp != NULL && *hashmapp != NULL);
REQUIRE(ISC_HASHMAP_VALID(*hashmapp));
hashmap = *hashmapp;
*hashmapp = NULL;
hashmap->magic = 0;
for (size_t i = 0; i < HASHMAP_NUM_TABLES; i++) {
if (hashmap->tables[i].table != NULL) {
hashmap_free_table(hashmap, i, true);
}
}
INSIST(hashmap->count == 0);
isc_mem_putanddetach(&hashmap->mctx, hashmap, sizeof(*hashmap));
}
static bool
hashmap_match(hashmap_node_t *node, const uint32_t hashval, const uint8_t *key,
const uint32_t keysize, const bool case_sensitive) {
return (node->hashval == hashval && node->keysize == keysize &&
(case_sensitive
? (memcmp(node->key, key, keysize) == 0)
: (isc_ascii_lowerequal(node->key, key, keysize))));
}
static hashmap_node_t *
hashmap_find(const isc_hashmap_t *hashmap, const uint32_t hashval,
const uint8_t *key, uint32_t keysize, uint32_t *pslp,
uint8_t *idxp) {
uint32_t hash;
uint32_t psl;
uint8_t idx = *idxp;
uint32_t pos;
nexttable:
psl = 0;
hash = isc_hash_bits32(hashval, hashmap->tables[idx].hashbits);
while (true) {
hashmap_node_t *node = NULL;
pos = (hash + psl) & hashmap->tables[idx].hashmask;
node = &hashmap->tables[idx].table[pos];
if (node->key == NULL || psl > node->psl) {
break;
}
if (hashmap_match(node, hashval, key, keysize,
hashmap->case_sensitive)) {
*pslp = psl;
*idxp = idx;
return (node);
}
psl++;
}
if (try_nexttable(hashmap, idx)) {
idx = hashmap_nexttable(idx);
goto nexttable;
}
return (NULL);
}
uint32_t
isc_hashmap_hash(const isc_hashmap_t *hashmap, const void *key,
uint32_t keysize) {
REQUIRE(ISC_HASHMAP_VALID(hashmap));
REQUIRE(key != NULL && keysize > 0 && keysize <= UINT16_MAX);
uint32_t hashval;
isc_halfsiphash24(hashmap->hash_key, key, keysize,
hashmap->case_sensitive, (uint8_t *)&hashval);
return (hashval);
}
isc_result_t
isc_hashmap_find(const isc_hashmap_t *hashmap, const uint32_t *hashvalp,
const void *key, uint32_t keysize, void **valuep) {
REQUIRE(ISC_HASHMAP_VALID(hashmap));
REQUIRE(key != NULL && keysize > 0 && keysize <= UINT16_MAX);
hashmap_node_t *node;
uint8_t idx = hashmap->hindex;
uint32_t hashval = (hashvalp != NULL)
? *hashvalp
: isc_hashmap_hash(hashmap, key, keysize);
node = hashmap_find(hashmap, hashval, key, keysize, &(uint32_t){ 0 },
&idx);
if (node == NULL) {
return (ISC_R_NOTFOUND);
}
INSIST(node->key != NULL);
if (valuep != NULL) {
*valuep = node->value;
}
return (ISC_R_SUCCESS);
}
static void
hashmap_delete_node(isc_hashmap_t *hashmap, hashmap_node_t *entry,
uint32_t hashval, uint32_t psl, const uint8_t idx) {
uint32_t pos;
uint32_t hash;
hashmap->count--;
hash = isc_hash_bits32(hashval, hashmap->tables[idx].hashbits);
pos = hash + psl;
while (true) {
hashmap_node_t *node = NULL;
pos = (pos + 1) & hashmap->tables[idx].hashmask;
INSIST(pos < hashmap->tables[idx].size);
node = &hashmap->tables[idx].table[pos];
if (node->key == NULL || node->psl == 0) {
break;
}
node->psl--;
*entry = *node;
entry = &hashmap->tables[idx].table[pos];
}
*entry = (hashmap_node_t){ 0 };
}
static void
hashmap_rehash_one(isc_hashmap_t *hashmap) {
uint8_t oldidx = hashmap_nexttable(hashmap->hindex);
uint32_t oldsize = hashmap->tables[oldidx].size;
hashmap_node_t *oldtable = hashmap->tables[oldidx].table;
hashmap_node_t node;
isc_result_t result;
/* Find first non-empty node */
while (hashmap->hiter < oldsize && oldtable[hashmap->hiter].key == NULL)
{
hashmap->hiter++;
}
/* Rehashing complete */
if (hashmap->hiter == oldsize) {
hashmap_free_table(hashmap, hashmap_nexttable(hashmap->hindex),
false);
hashmap->hiter = 0;
return;
}
/* Move the first non-empty node from old table to new table */
node = oldtable[hashmap->hiter];
hashmap_delete_node(hashmap, &oldtable[hashmap->hiter], node.hashval,
node.psl, oldidx);
result = hashmap_add(hashmap, node.hashval, node.key, node.keysize,
node.value, hashmap->hindex);
INSIST(result == ISC_R_SUCCESS);
/*
* we don't increase the hiter here because the table has been reordered
* when we deleted the old node
*/
}
static uint32_t
grow_bits(isc_hashmap_t *hashmap) {
uint32_t newbits = hashmap->tables[hashmap->hindex].hashbits + 1;
size_t newsize = HASHSIZE(newbits);
while (hashmap->count > APPROX_40_PERCENT(newsize)) {
newbits += 1;
newsize = HASHSIZE(newbits);
}
if (newbits > HASHMAP_MAX_BITS) {
newbits = HASHMAP_MAX_BITS;
}
return (newbits);
}
static uint32_t
shrink_bits(isc_hashmap_t *hashmap) {
uint32_t newbits = hashmap->tables[hashmap->hindex].hashbits - 1;
if (newbits <= HASHMAP_MIN_BITS) {
newbits = HASHMAP_MIN_BITS;
}
return (newbits);
}
static void
hashmap_rehash_start_grow(isc_hashmap_t *hashmap) {
uint32_t newbits;
uint8_t oldindex = hashmap->hindex;
uint32_t oldbits = hashmap->tables[oldindex].hashbits;
uint8_t newindex = hashmap_nexttable(oldindex);
REQUIRE(!rehashing_in_progress(hashmap));
newbits = grow_bits(hashmap);
if (newbits > oldbits) {
hashmap_create_table(hashmap, newindex, newbits);
hashmap->hindex = newindex;
}
}
static void
hashmap_rehash_start_shrink(isc_hashmap_t *hashmap) {
uint32_t newbits;
uint8_t oldindex = hashmap->hindex;
uint32_t oldbits = hashmap->tables[oldindex].hashbits;
uint8_t newindex = hashmap_nexttable(oldindex);
REQUIRE(!rehashing_in_progress(hashmap));
newbits = shrink_bits(hashmap);
if (newbits < oldbits) {
hashmap_create_table(hashmap, newindex, newbits);
hashmap->hindex = newindex;
}
}
isc_result_t
isc_hashmap_delete(isc_hashmap_t *hashmap, const uint32_t *hashvalp,
const void *key, uint32_t keysize) {
REQUIRE(ISC_HASHMAP_VALID(hashmap));
REQUIRE(key != NULL && keysize > 0 && keysize <= UINT16_MAX);
hashmap_node_t *node;
isc_result_t result = ISC_R_NOTFOUND;
uint32_t psl = 0;
uint8_t idx;
uint32_t hashval = (hashvalp != NULL)
? *hashvalp
: isc_hashmap_hash(hashmap, key, keysize);
if (rehashing_in_progress(hashmap)) {
hashmap_rehash_one(hashmap);
} else if (under_threshold(hashmap)) {
hashmap_rehash_start_shrink(hashmap);
hashmap_rehash_one(hashmap);
}
/* Initialize idx after possible shrink start */
idx = hashmap->hindex;
node = hashmap_find(hashmap, hashval, key, keysize, &psl, &idx);
if (node != NULL) {
INSIST(node->key != NULL);
hashmap_delete_node(hashmap, node, hashval, psl, idx);
result = ISC_R_SUCCESS;
}
return (result);
}
static bool
over_threshold(isc_hashmap_t *hashmap) {
uint32_t bits = hashmap->tables[hashmap->hindex].hashbits;
if (bits == HASHMAP_MAX_BITS) {
return (false);
}
size_t threshold = APPROX_90_PERCENT(HASHSIZE(bits));
return (hashmap->count > threshold);
}
static bool
under_threshold(isc_hashmap_t *hashmap) {
uint32_t bits = hashmap->tables[hashmap->hindex].hashbits;
if (bits == HASHMAP_MIN_BITS) {
return (false);
}
size_t threshold = APPROX_20_PERCENT(HASHSIZE(bits));
return (hashmap->count < threshold);
}
static isc_result_t
hashmap_add(isc_hashmap_t *hashmap, const uint32_t hashval, const uint8_t *key,
const uint32_t keysize, void *value, uint8_t idx) {
uint32_t hash;
uint32_t psl = 0;
hashmap_node_t node;
hashmap_node_t *current = NULL;
uint32_t pos;
hash = isc_hash_bits32(hashval, hashmap->tables[idx].hashbits);
/* Initialize the node to be store to 'node' */
hashmap_node_init(&node, hashval, key, keysize, value);
psl = 0;
while (true) {
pos = (hash + psl) & hashmap->tables[idx].hashmask;
current = &hashmap->tables[idx].table[pos];
/* Found an empty node */
if (current->key == NULL) {
break;
}
if (hashmap_match(current, hashval, key, keysize,
hashmap->case_sensitive)) {
return (ISC_R_EXISTS);
}
/* Found rich node */
if (node.psl > current->psl) {
/* Swap the poor with the rich node */
ISC_SWAP(*current, node);
}
node.psl++;
psl++;
}
/*
* Possible optimalization - start growing when the poor node is too far
*/
#if ISC_HASHMAP_GROW_FAST
if (psl > hashmap->hashbits[idx]) {
if (!rehashing_in_progress(hashmap)) {
hashmap_rehash_start_grow(hashmap);
}
}
#endif
hashmap->count++;
/* We found an empty place, store entry into current node */
*current = node;
return (ISC_R_SUCCESS);
}
isc_result_t
isc_hashmap_add(isc_hashmap_t *hashmap, const uint32_t *hashvalp,
const void *key, uint32_t keysize, void *value) {
REQUIRE(ISC_HASHMAP_VALID(hashmap));
REQUIRE(key != NULL && keysize > 0 && keysize <= UINT16_MAX);
isc_result_t result;
uint32_t hashval = (hashvalp != NULL)
? *hashvalp
: isc_hashmap_hash(hashmap, key, keysize);
if (rehashing_in_progress(hashmap)) {
hashmap_rehash_one(hashmap);
} else if (over_threshold(hashmap)) {
hashmap_rehash_start_grow(hashmap);
hashmap_rehash_one(hashmap);
}
if (rehashing_in_progress(hashmap)) {
uint8_t fidx = hashmap_nexttable(hashmap->hindex);
uint32_t psl;
/* Look for the value in the old table */
if (hashmap_find(hashmap, hashval, key, keysize, &psl, &fidx)) {
return (ISC_R_EXISTS);
}
}
result = hashmap_add(hashmap, hashval, key, keysize, value,
hashmap->hindex);
switch (result) {
case ISC_R_SUCCESS:
case ISC_R_EXISTS:
return (result);
default:
UNREACHABLE();
}
}
void
isc_hashmap_iter_create(isc_hashmap_t *hashmap, isc_hashmap_iter_t **iterp) {
isc_hashmap_iter_t *iter;
REQUIRE(ISC_HASHMAP_VALID(hashmap));
REQUIRE(iterp != NULL && *iterp == NULL);
iter = isc_mem_get(hashmap->mctx, sizeof(*iter));
*iter = (isc_hashmap_iter_t){
.hashmap = hashmap,
.hindex = hashmap->hindex,
};
*iterp = iter;
}
void
isc_hashmap_iter_destroy(isc_hashmap_iter_t **iterp) {
isc_hashmap_iter_t *iter;
isc_hashmap_t *hashmap;
REQUIRE(iterp != NULL && *iterp != NULL);
iter = *iterp;
*iterp = NULL;
hashmap = iter->hashmap;
isc_mem_put(hashmap->mctx, iter, sizeof(*iter));
}
static isc_result_t
isc__hashmap_iter_next(isc_hashmap_iter_t *iter) {
isc_hashmap_t *hashmap = iter->hashmap;
while (iter->i < hashmap->tables[iter->hindex].size &&
hashmap->tables[iter->hindex].table[iter->i].key == NULL)
{
iter->i++;
}
if (iter->i < hashmap->tables[iter->hindex].size) {
iter->cur = &hashmap->tables[iter->hindex].table[iter->i];
return (ISC_R_SUCCESS);
}
if (try_nexttable(hashmap, iter->hindex)) {
iter->hindex = hashmap_nexttable(iter->hindex);
iter->i = 0;
return (isc__hashmap_iter_next(iter));
}
return (ISC_R_NOMORE);
}
isc_result_t
isc_hashmap_iter_first(isc_hashmap_iter_t *iter) {
REQUIRE(iter != NULL);
iter->hindex = iter->hashmap->hindex;
iter->i = 0;
return (isc__hashmap_iter_next(iter));
}
isc_result_t
isc_hashmap_iter_next(isc_hashmap_iter_t *iter) {
REQUIRE(iter != NULL);
REQUIRE(iter->cur != NULL);
iter->i++;
return (isc__hashmap_iter_next(iter));
}
isc_result_t
isc_hashmap_iter_delcurrent_next(isc_hashmap_iter_t *iter) {
REQUIRE(iter != NULL);
REQUIRE(iter->cur != NULL);
hashmap_node_t *node =
&iter->hashmap->tables[iter->hindex].table[iter->i];
hashmap_delete_node(iter->hashmap, node, node->hashval, node->psl,
iter->hindex);
return (isc__hashmap_iter_next(iter));
}
void
isc_hashmap_iter_current(isc_hashmap_iter_t *it, void **valuep) {
REQUIRE(it != NULL);
REQUIRE(it->cur != NULL);
REQUIRE(valuep != NULL && *valuep == NULL);
*valuep = it->cur->value;
}
void
isc_hashmap_iter_currentkey(isc_hashmap_iter_t *it, const unsigned char **key,
size_t *keysize) {
REQUIRE(it != NULL);
REQUIRE(it->cur != NULL);
REQUIRE(key != NULL && *key == NULL);
*key = it->cur->key;
*keysize = it->cur->keysize;
}
unsigned int
isc_hashmap_count(isc_hashmap_t *hashmap) {
REQUIRE(ISC_HASHMAP_VALID(hashmap));
return (hashmap->count);
}

View file

@ -0,0 +1,198 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
/* ! \file */
#pragma once
#include <inttypes.h>
#include <string.h>
#include <isc/result.h>
#include <isc/types.h>
typedef struct isc_hashmap isc_hashmap_t;
typedef struct isc_hashmap_iter isc_hashmap_iter_t;
enum { ISC_HASHMAP_CASE_SENSITIVE = 0x00, ISC_HASHMAP_CASE_INSENSITIVE = 0x01 };
/*%
* Create hashmap at *hashmapp, using memory context and size of (1<<bits)
*
* Requires:
* \li 'hashmapp' is not NULL and '*hashmapp' is NULL.
* \li 'mctx' is a valid memory context.
* \li 'bits' >=1 and 'bits' <=32
*
*/
void
isc_hashmap_create(isc_mem_t *mctx, uint8_t bits, unsigned int options,
isc_hashmap_t **hashmapp);
/*%
* Destroy hashmap, freeing everything
*
* Requires:
* \li '*hashmapp' is valid hashmap
*/
void
isc_hashmap_destroy(isc_hashmap_t **hashmapp);
/*%
* Return current hashed value for 'key' of size 'keysize';
*/
uint32_t
isc_hashmap_hash(const isc_hashmap_t *hashmap, const void *key,
uint32_t keysize);
/*%
* Add a node to hashmap, pointed by binary key 'key' of size 'keysize';
* set its value to 'value'
*
* Requires:
* \li 'hashmap' is a valid hashmap
* \li 'hashval' is optional precomputed hash value of 'key'
* \li 'key' is non-null key of size 'keysize'
*
* Returns:
* \li #ISC_R_EXISTS -- node of the same key already exists
* \li #ISC_R_SUCCESS -- all is well.
*/
isc_result_t
isc_hashmap_add(isc_hashmap_t *hashmap, const uint32_t *hashvalp,
const void *key, uint32_t keysize, void *value);
/*%
* Find a node matching 'key'/'keysize' in hashmap 'hashmap';
* if found, set '*valuep' to its value. (If 'valuep' is NULL,
* then simply return SUCCESS or NOTFOUND to indicate whether the
* key exists in the hashmap.)
*
* Requires:
* \li 'hashmap' is a valid hashmap
* \li 'hashval' is optional precomputed hash value of 'key'
* \li 'key' is non-null key of size 'keysize'
*
* Returns:
* \li #ISC_R_SUCCESS -- success
* \li #ISC_R_NOTFOUND -- key not found
*/
isc_result_t
isc_hashmap_find(const isc_hashmap_t *hashmap, const uint32_t *hashvalp,
const void *key, uint32_t keysize, void **valuep);
/*%
* Delete node from hashmap
*
* Requires:
* \li 'hashmap' is a valid hashmap
* \li 'hashval' is optional precomputed hash value of 'key'
* \li 'key' is non-null key of size 'keysize'
*
* Returns:
* \li #ISC_R_NOTFOUND -- key not found
* \li #ISC_R_SUCCESS -- all is well
*/
isc_result_t
isc_hashmap_delete(isc_hashmap_t *hashmap, const uint32_t *hashvalp,
const void *key, uint32_t keysize);
/*%
* Create an iterator for the hashmap; point '*itp' to it.
*
* Requires:
* \li 'hashmap' is a valid hashmap
* \li 'itp' is non NULL and '*itp' is NULL.
*/
void
isc_hashmap_iter_create(isc_hashmap_t *hashmap, isc_hashmap_iter_t **itp);
/*%
* Destroy the iterator '*itp', set it to NULL
*
* Requires:
* \li 'itp' is non NULL and '*itp' is non NULL.
*/
void
isc_hashmap_iter_destroy(isc_hashmap_iter_t **itp);
/*%
* Set an iterator to the first entry.
*
* Requires:
* \li 'it' is non NULL.
*
* Returns:
* \li #ISC_R_SUCCESS -- success
* \li #ISC_R_NOMORE -- no data in the hashmap
*/
isc_result_t
isc_hashmap_iter_first(isc_hashmap_iter_t *it);
/*%
* Set an iterator to the next entry.
*
* Requires:
* \li 'it' is non NULL.
*
* Returns:
* \li #ISC_R_SUCCESS -- success
* \li #ISC_R_NOMORE -- end of hashmap reached
*/
isc_result_t
isc_hashmap_iter_next(isc_hashmap_iter_t *it);
/*%
* Delete current entry and set an iterator to the next entry.
*
* Requires:
* \li 'it' is non NULL.
*
* Returns:
* \li #ISC_R_SUCCESS -- success
* \li #ISC_R_NOMORE -- end of hashmap reached
*/
isc_result_t
isc_hashmap_iter_delcurrent_next(isc_hashmap_iter_t *it);
/*%
* Set 'value' to the current value under the iterator
*
* Requires:
* \li 'it' is non NULL.
* \li 'valuep' is non NULL and '*valuep' is NULL.
*/
void
isc_hashmap_iter_current(isc_hashmap_iter_t *it, void **valuep);
/*%
* Set 'key' and 'keysize to the current key and keysize for the value
* under the iterator
*
* Requires:
* \li 'it' is non NULL.
* \li 'key' is non NULL and '*key' is NULL.
* \li 'keysize' is non NULL.
*/
void
isc_hashmap_iter_currentkey(isc_hashmap_iter_t *it, const unsigned char **key,
size_t *keysize);
/*%
* Returns the number of items in the hashmap.
*
* Requires:
* \li 'hashmap' is a valid hashmap
*/
unsigned int
isc_hashmap_count(isc_hashmap_t *hashmap);

View file

@ -19,6 +19,7 @@ check_PROGRAMS = \
errno_test \
file_test \
hash_test \
hashmap_test \
heap_test \
hmac_test \
ht_test \

413
tests/isc/hashmap_test.c Normal file
View file

@ -0,0 +1,413 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
#include <inttypes.h>
#include <sched.h> /* IWYU pragma: keep */
#include <setjmp.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define UNIT_TESTING
#include <cmocka.h>
#include <isc/hash.h>
#include <isc/hashmap.h>
#include <isc/mem.h>
#include <isc/print.h>
#include <isc/string.h>
#include <isc/util.h>
#include <tests/isc.h>
/* INCLUDE LAST */
#define mctx __mctx
#include "hashmap.c"
#undef mctx
typedef struct test_node {
uint32_t hashval;
char key[64];
} test_node_t;
static void
test_hashmap_full(uint8_t init_bits, uintptr_t count) {
isc_hashmap_t *hashmap = NULL;
isc_result_t result;
test_node_t *nodes, *long_nodes, *upper_nodes;
nodes = isc_mem_get(mctx, count * sizeof(nodes[0]));
long_nodes = isc_mem_get(mctx, count * sizeof(nodes[0]));
upper_nodes = isc_mem_get(mctx, count * sizeof(nodes[0]));
isc_hashmap_create(mctx, init_bits, ISC_HASHMAP_CASE_SENSITIVE,
&hashmap);
assert_non_null(hashmap);
/*
* Note: snprintf() is followed with strlcat()
* to ensure we are always filling the 16 byte key.
*/
for (size_t i = 0; i < count; i++) {
/* short keys */
snprintf(nodes[i].key, 16, "%u", (unsigned int)i);
strlcat(nodes[i].key, " key of a raw hashmap!!", 16);
/* long keys */
snprintf(long_nodes[i].key, sizeof(long_nodes[i].key), "%u",
(unsigned int)i);
strlcat(long_nodes[i].key, " key of a raw hashmap!!",
sizeof(long_nodes[i].key));
/* (some) uppercase keys */
snprintf(upper_nodes[i].key, 16, "%u", (unsigned int)i);
strlcat(upper_nodes[i].key, " KEY of a raw hashmap!!", 16);
}
/* insert short nodes */
for (size_t i = 0; i < count; i++) {
nodes[i].hashval = isc_hashmap_hash(hashmap, nodes[i].key, 16);
result = isc_hashmap_add(hashmap, &(nodes[i]).hashval,
nodes[i].key, 16, &nodes[i]);
assert_int_equal(result, ISC_R_SUCCESS);
}
/* check if the short nodes were insert */
for (size_t i = 0; i < count; i++) {
void *f = NULL;
result = isc_hashmap_find(hashmap, &(nodes[i]).hashval,
nodes[i].key, 16, &f);
assert_int_equal(result, ISC_R_SUCCESS);
assert_ptr_equal(&nodes[i], f);
}
/* check for double inserts */
for (size_t i = 0; i < count; i++) {
result = isc_hashmap_add(hashmap, NULL, nodes[i].key, 16,
&nodes[i]);
assert_int_equal(result, ISC_R_EXISTS);
}
for (size_t i = 0; i < count; i++) {
result =
isc_hashmap_add(hashmap, NULL, long_nodes[i].key,
strlen((const char *)long_nodes[i].key),
&long_nodes[i]);
assert_int_equal(result, ISC_R_SUCCESS);
}
for (size_t i = 0; i < count; i++) {
void *f = NULL;
result = isc_hashmap_find(hashmap, NULL, upper_nodes[i].key, 16,
&f);
assert_int_equal(result, ISC_R_NOTFOUND);
assert_null(f);
}
for (size_t i = 0; i < count; i++) {
void *f = NULL;
result = isc_hashmap_find(
hashmap, NULL, long_nodes[i].key,
strlen((const char *)long_nodes[i].key), &f);
assert_int_equal(result, ISC_R_SUCCESS);
assert_ptr_equal(f, &long_nodes[i]);
}
for (size_t i = 0; i < count; i++) {
void *f = NULL;
result = isc_hashmap_delete(hashmap, &nodes[i].hashval,
nodes[i].key, 16);
assert_int_equal(result, ISC_R_SUCCESS);
result = isc_hashmap_find(hashmap, NULL, nodes[i].key, 16, &f);
assert_int_equal(result, ISC_R_NOTFOUND);
assert_null(f);
}
for (size_t i = 0; i < count; i++) {
result = isc_hashmap_add(hashmap, NULL, upper_nodes[i].key, 16,
&upper_nodes[i]);
assert_int_equal(result, ISC_R_SUCCESS);
}
for (size_t i = 0; i < count; i++) {
void *f = NULL;
result = isc_hashmap_delete(
hashmap, NULL, long_nodes[i].key,
strlen((const char *)long_nodes[i].key));
assert_int_equal(result, ISC_R_SUCCESS);
result = isc_hashmap_find(
hashmap, NULL, long_nodes[i].key,
strlen((const char *)long_nodes[i].key), &f);
assert_int_equal(result, ISC_R_NOTFOUND);
assert_null(f);
}
for (size_t i = 0; i < count; i++) {
void *f = NULL;
result = isc_hashmap_find(hashmap, NULL, upper_nodes[i].key, 16,
&f);
assert_int_equal(result, ISC_R_SUCCESS);
assert_ptr_equal(f, &upper_nodes[i]);
}
for (size_t i = 0; i < count; i++) {
void *f = NULL;
result = isc_hashmap_find(hashmap, NULL, nodes[i].key, 16, &f);
assert_int_equal(result, ISC_R_NOTFOUND);
assert_null(f);
}
isc_hashmap_destroy(&hashmap);
assert_null(hashmap);
isc_mem_put(mctx, nodes, count * sizeof(nodes[0]));
isc_mem_put(mctx, long_nodes, count * sizeof(nodes[0]));
isc_mem_put(mctx, upper_nodes, count * sizeof(nodes[0]));
}
static void
test_hashmap_iterator(void) {
isc_hashmap_t *hashmap = NULL;
isc_result_t result;
isc_hashmap_iter_t *iter = NULL;
size_t count = 7600;
uint32_t walked;
size_t tksize;
test_node_t *nodes;
nodes = isc_mem_get(mctx, count * sizeof(nodes[0]));
isc_hashmap_create(mctx, HASHMAP_MIN_BITS, ISC_HASHMAP_CASE_SENSITIVE,
&hashmap);
assert_non_null(hashmap);
for (size_t i = 0; i < count; i++) {
/* short keys */
snprintf(nodes[i].key, 16, "%u", (unsigned int)i);
strlcat(nodes[i].key, " key of a raw hashmap!!", 16);
}
for (size_t i = 0; i < count; i++) {
result = isc_hashmap_add(hashmap, NULL, nodes[i].key, 16,
&nodes[i]);
assert_int_equal(result, ISC_R_SUCCESS);
}
/* We want to iterate while rehashing is in progress */
assert_true(rehashing_in_progress(hashmap));
walked = 0;
isc_hashmap_iter_create(hashmap, &iter);
for (result = isc_hashmap_iter_first(iter); result == ISC_R_SUCCESS;
result = isc_hashmap_iter_next(iter))
{
char key[16] = { 0 };
ptrdiff_t i;
const uint8_t *tkey = NULL;
test_node_t *v = NULL;
isc_hashmap_iter_current(iter, (void *)&v);
isc_hashmap_iter_currentkey(iter, &tkey, &tksize);
assert_int_equal(tksize, 16);
i = v - &nodes[0];
snprintf(key, 16, "%u", (unsigned int)i);
strlcat(key, " key of a raw hashmap!!", 16);
assert_memory_equal(key, tkey, 16);
walked++;
}
assert_int_equal(walked, count);
assert_int_equal(result, ISC_R_NOMORE);
/* erase odd */
walked = 0;
result = isc_hashmap_iter_first(iter);
while (result == ISC_R_SUCCESS) {
char key[16] = { 0 };
ptrdiff_t i;
const uint8_t *tkey = NULL;
test_node_t *v = NULL;
isc_hashmap_iter_current(iter, (void *)&v);
isc_hashmap_iter_currentkey(iter, &tkey, &tksize);
assert_int_equal(tksize, 16);
i = v - nodes;
snprintf(key, 16, "%u", (unsigned int)i);
strlcat(key, " key of a raw hashmap!!", 16);
assert_memory_equal(key, tkey, 16);
if (i % 2 == 0) {
result = isc_hashmap_iter_delcurrent_next(iter);
} else {
result = isc_hashmap_iter_next(iter);
}
walked++;
}
assert_int_equal(result, ISC_R_NOMORE);
assert_int_equal(walked, count);
/* erase even */
walked = 0;
result = isc_hashmap_iter_first(iter);
while (result == ISC_R_SUCCESS) {
char key[16] = { 0 };
ptrdiff_t i;
const uint8_t *tkey = NULL;
test_node_t *v = NULL;
isc_hashmap_iter_current(iter, (void *)&v);
isc_hashmap_iter_currentkey(iter, &tkey, &tksize);
assert_int_equal(tksize, 16);
i = v - nodes;
snprintf(key, 16, "%u", (unsigned int)i);
strlcat(key, " key of a raw hashmap!!", 16);
assert_memory_equal(key, tkey, 16);
if (i % 2 == 1) {
result = isc_hashmap_iter_delcurrent_next(iter);
} else {
result = isc_hashmap_iter_next(iter);
}
walked++;
}
assert_int_equal(result, ISC_R_NOMORE);
assert_int_equal(walked, count / 2);
walked = 0;
for (result = isc_hashmap_iter_first(iter); result == ISC_R_SUCCESS;
result = isc_hashmap_iter_next(iter))
{
walked++;
}
assert_int_equal(result, ISC_R_NOMORE);
assert_int_equal(walked, 0);
/* Iterator doesn't progress rehashing */
assert_true(rehashing_in_progress(hashmap));
isc_hashmap_iter_destroy(&iter);
assert_null(iter);
isc_hashmap_destroy(&hashmap);
assert_null(hashmap);
isc_mem_put(mctx, nodes, count * sizeof(nodes[0]));
}
/* 1 bit, 120 elements test, full rehashing */
ISC_RUN_TEST_IMPL(isc_hashmap_1_120) {
test_hashmap_full(1, 120);
return;
}
/* 6 bit, 1000 elements test, full rehashing */
ISC_RUN_TEST_IMPL(isc_hashmap_6_1000) {
test_hashmap_full(6, 1000);
return;
}
/* 24 bit, 200K elements test, no rehashing */
ISC_RUN_TEST_IMPL(isc_hashmap_24_200000) {
test_hashmap_full(24, 200000);
return;
}
/* 15 bit, 45K elements test, full rehashing */
ISC_RUN_TEST_IMPL(isc_hashmap_1_48000) {
test_hashmap_full(1, 48000);
return;
}
/* 8 bit, 20k elements test, partial rehashing */
ISC_RUN_TEST_IMPL(isc_hashmap_8_20000) {
test_hashmap_full(8, 20000);
return;
}
/* test hashmap iterator */
ISC_RUN_TEST_IMPL(isc_hashmap_iterator) {
test_hashmap_iterator();
return;
}
ISC_RUN_TEST_IMPL(isc_hashmap_case) {
isc_result_t result;
isc_hashmap_t *hashmap = NULL;
test_node_t lower = { .key = "isc_hashmap_case" };
test_node_t upper = { .key = "ISC_HASHMAP_CASE" };
test_node_t mixed = { .key = "IsC_hAsHmAp_CaSe" };
test_node_t *value;
isc_hashmap_create(mctx, 1, ISC_HASHMAP_CASE_SENSITIVE, &hashmap);
result = isc_hashmap_add(hashmap, NULL, lower.key, strlen(lower.key),
&lower);
assert_int_equal(result, ISC_R_SUCCESS);
result = isc_hashmap_add(hashmap, NULL, lower.key, strlen(lower.key),
&lower);
assert_int_equal(result, ISC_R_EXISTS);
result = isc_hashmap_add(hashmap, NULL, upper.key, strlen(upper.key),
&upper);
assert_int_equal(result, ISC_R_SUCCESS);
result = isc_hashmap_find(hashmap, NULL, mixed.key, strlen(mixed.key),
(void *)&value);
assert_int_equal(result, ISC_R_NOTFOUND);
isc_hashmap_destroy(&hashmap);
isc_hashmap_create(mctx, 1, ISC_HASHMAP_CASE_INSENSITIVE, &hashmap);
result = isc_hashmap_add(hashmap, NULL, lower.key, strlen(lower.key),
&lower);
assert_int_equal(result, ISC_R_SUCCESS);
result = isc_hashmap_add(hashmap, NULL, lower.key, strlen(lower.key),
&lower);
assert_int_equal(result, ISC_R_EXISTS);
result = isc_hashmap_add(hashmap, NULL, upper.key, strlen(upper.key),
&upper);
assert_int_equal(result, ISC_R_EXISTS);
result = isc_hashmap_find(hashmap, NULL, mixed.key, strlen(mixed.key),
(void *)&value);
assert_int_equal(result, ISC_R_SUCCESS);
isc_hashmap_destroy(&hashmap);
}
ISC_TEST_LIST_START
ISC_TEST_ENTRY(isc_hashmap_case)
ISC_TEST_ENTRY(isc_hashmap_1_120)
ISC_TEST_ENTRY(isc_hashmap_6_1000)
ISC_TEST_ENTRY(isc_hashmap_24_200000)
ISC_TEST_ENTRY(isc_hashmap_1_48000)
ISC_TEST_ENTRY(isc_hashmap_8_20000)
ISC_TEST_ENTRY(isc_hashmap_iterator)
ISC_TEST_LIST_END
ISC_TEST_MAIN

View file

@ -39,7 +39,7 @@
#undef mctx
static void
test_ht_full(uint8_t init_bits, uint8_t finish_bits, uintptr_t count) {
test_ht_full(uint8_t init_bits, uintptr_t count) {
isc_ht_t *ht = NULL;
isc_result_t result;
uintptr_t i;
@ -174,8 +174,6 @@ test_ht_full(uint8_t init_bits, uint8_t finish_bits, uintptr_t count) {
assert_null(f);
}
assert_int_equal(ht->hashbits[ht->hindex], finish_bits);
isc_ht_destroy(&ht);
assert_null(ht);
}
@ -186,7 +184,7 @@ test_ht_iterator(void) {
isc_result_t result;
isc_ht_iter_t *iter = NULL;
uintptr_t i;
uintptr_t count = 6300;
uintptr_t count = 7600;
uint32_t walked;
unsigned char key[16];
size_t tksize;
@ -296,22 +294,34 @@ test_ht_iterator(void) {
assert_null(ht);
}
/* 1 bit, 120 elements test, full rehashing */
ISC_RUN_TEST_IMPL(isc_ht_1_120) {
test_ht_full(1, 120);
return;
}
/* 6 bit, 1000 elements test, full rehashing */
ISC_RUN_TEST_IMPL(isc_ht_6_1000) {
test_ht_full(6, 1000);
return;
}
/* 24 bit, 200K elements test, no rehashing */
ISC_RUN_TEST_IMPL(isc_ht_24) {
ISC_RUN_TEST_IMPL(isc_ht_24_200000) {
UNUSED(state);
test_ht_full(24, 24, 200000);
test_ht_full(24, 200000);
}
/* 15 bit, 45K elements test, full rehashing */
ISC_RUN_TEST_IMPL(isc_ht_15) {
ISC_RUN_TEST_IMPL(isc_ht_1_48000) {
UNUSED(state);
test_ht_full(1, 15, 48000);
test_ht_full(1, 48000);
}
/* 8 bit, 20k elements test, partial rehashing */
ISC_RUN_TEST_IMPL(isc_ht_8) {
ISC_RUN_TEST_IMPL(isc_ht_8_20000) {
UNUSED(state);
test_ht_full(8, 14, 20000);
test_ht_full(8, 20000);
}
/* test hashtable iterator */
@ -322,9 +332,11 @@ ISC_RUN_TEST_IMPL(isc_ht_iterator) {
}
ISC_TEST_LIST_START
ISC_TEST_ENTRY(isc_ht_24)
ISC_TEST_ENTRY(isc_ht_15)
ISC_TEST_ENTRY(isc_ht_8)
ISC_TEST_ENTRY(isc_ht_1_120)
ISC_TEST_ENTRY(isc_ht_6_1000)
ISC_TEST_ENTRY(isc_ht_24_200000)
ISC_TEST_ENTRY(isc_ht_1_48000)
ISC_TEST_ENTRY(isc_ht_8_20000)
ISC_TEST_ENTRY(isc_ht_iterator)
ISC_TEST_LIST_END