chg: dev: Adaptive memory allocation strategy for qp-tries

qp-tries allocate their nodes (twigs) in chunks to reduce allocator
pressure and improve memory locality. The choice of chunk size presents
a tradeoff: larger chunks benefit qp-tries with many values (as seen
in large zones and resolvers) but waste memory in smaller use cases.

Previously, our fixed chunk size of 2^10 twigs meant that even an
empty qp-trie would consume 12KB of memory, while reducing this size
would negatively impact resolver performance.

This commit implements an adaptive chunking strategy that:
 - Tracks the size of the most recently allocated chunk.
 - Doubles the chunk size for each new allocation until reaching a
   predefined maximum.

This approach effectively balances memory efficiency for small tries
while maintaining the performance benefits of larger chunk sizes for
bigger data structures.

Merge branch 'alessio/qp-small-alloc' into 'main'

See merge request isc-projects/bind9!10245
This commit is contained in:
Alessio Podda 2025-05-22 22:53:48 +00:00
commit dc3a1bde65
6 changed files with 143 additions and 49 deletions

View file

@ -89,6 +89,12 @@
#include <dns/name.h>
#include <dns/types.h>
/*%
* How many bytes a qp-trie might allocate as part of an insert. Needed for
* overmem checks.
*/
#define QP_SAFETY_MARGIN ((1ul << 12ul) * 12)
/*%
* A `dns_qp_t` supports single-threaded read/write access.
*/
@ -306,7 +312,6 @@ typedef struct dns_qp_memusage {
size_t hold; /*%< nodes retained for readers */
size_t free; /*%< nodes to be reclaimed */
size_t node_size; /*%< in bytes */
size_t chunk_size; /*%< nodes per chunk */
size_t chunk_count; /*%< allocated chunks */
size_t bytes; /*%< total memory in chunks and metadata */
bool fragmented; /*%< trie needs compaction */

View file

@ -96,6 +96,19 @@ static atomic_uint_fast64_t rollback_time;
#define TRACE(...)
#endif
#if DNS_QPMULTI_TRACE
ISC_REFCOUNT_STATIC_TRACE_DECL(dns_qpmulti);
#define dns_qpmulti_ref(ptr) dns_qpmulti__ref(ptr, __func__, __FILE__, __LINE__)
#define dns_qpmulti_unref(ptr) \
dns_qpmulti__unref(ptr, __func__, __FILE__, __LINE__)
#define dns_qpmulti_attach(ptr, ptrp) \
dns_qpmulti__attach(ptr, ptrp, __func__, __FILE__, __LINE__)
#define dns_qpmulti_detach(ptrp) \
dns_qpmulti__detach(ptrp, __func__, __FILE__, __LINE__)
#else
ISC_REFCOUNT_STATIC_DECL(dns_qpmulti);
#endif
/***********************************************************************
*
* converting DNS names to trie keys
@ -425,7 +438,7 @@ write_protect(dns_qp_t *qp, dns_qpchunk_t chunk) {
#else
#define chunk_get_raw(qp) isc_mem_allocate(qp->mctx, QP_CHUNK_BYTES)
#define chunk_get_raw(qp, size) isc_mem_allocate(qp->mctx, size)
#define chunk_free_raw(qp, ptr) isc_mem_free(qp->mctx, ptr)
#define chunk_shrink_raw(qp, ptr, size) isc_mem_reallocate(qp->mctx, ptr, size)
@ -454,6 +467,22 @@ cells_immutable(dns_qp_t *qp, dns_qpref_t ref) {
}
}
/*
* Find the next power that is both bigger than size and prev_capacity,
* but still within the chunk min and max sizes.
*/
static dns_qpcell_t
next_capacity(uint32_t prev_capacity, uint32_t size) {
/*
* Unfortunately builtin_clz is undefined for 0. We work around this
* issue by flooring the request size at 2.
*/
size = ISC_MAX3(size, prev_capacity, 2u);
uint32_t log2 = 32u - __builtin_clz(size - 1u);
return 1U << ISC_CLAMP(log2, QP_CHUNK_LOG_MIN, QP_CHUNK_LOG_MAX);
}
/*
* Create a fresh bump chunk and allocate some twigs from it.
*/
@ -462,9 +491,15 @@ chunk_alloc(dns_qp_t *qp, dns_qpchunk_t chunk, dns_qpweight_t size) {
INSIST(qp->base->ptr[chunk] == NULL);
INSIST(qp->usage[chunk].used == 0);
INSIST(qp->usage[chunk].free == 0);
INSIST(qp->chunk_capacity <= QP_CHUNK_SIZE);
qp->base->ptr[chunk] = chunk_get_raw(qp);
qp->usage[chunk] = (qp_usage_t){ .exists = true, .used = size };
qp->chunk_capacity = next_capacity(qp->chunk_capacity * 2u, size);
qp->base->ptr[chunk] =
chunk_get_raw(qp, qp->chunk_capacity * sizeof(dns_qpnode_t));
qp->usage[chunk] = (qp_usage_t){ .exists = true,
.used = size,
.capacity = qp->chunk_capacity };
qp->used_count += size;
qp->bump = chunk;
qp->fender = 0;
@ -544,7 +579,7 @@ alloc_twigs(dns_qp_t *qp, dns_qpweight_t size) {
dns_qpchunk_t chunk = qp->bump;
dns_qpcell_t cell = qp->usage[chunk].used;
if (cell + size <= QP_CHUNK_SIZE) {
if (cell + size <= qp->usage[chunk].capacity) {
qp->usage[chunk].used += size;
qp->used_count += size;
return make_ref(chunk, cell);
@ -697,38 +732,47 @@ reclaim_chunks_cb(struct rcu_head *arg) {
REQUIRE(QPMULTI_VALID(multi));
LOCK(&multi->mutex);
dns_qp_t *qp = &multi->writer;
REQUIRE(QP_VALID(qp));
unsigned int free = 0;
isc_nanosecs_t start = isc_time_monotonic();
/*
* If chunk_max is zero, chunks have already been freed.
*/
if (qp->chunk_max != 0) {
unsigned int free = 0;
isc_nanosecs_t start = isc_time_monotonic();
for (unsigned int i = 0; i < rcuctx->count; i++) {
dns_qpchunk_t chunk = rcuctx->chunk[i];
if (qp->usage[chunk].snapshot) {
/* cleanup when snapshot is destroyed */
qp->usage[chunk].snapfree = true;
} else {
chunk_free(qp, chunk);
free++;
INSIST(QP_VALID(qp));
for (unsigned int i = 0; i < rcuctx->count; i++) {
dns_qpchunk_t chunk = rcuctx->chunk[i];
if (qp->usage[chunk].snapshot) {
/* clean up when snapshot is destroyed */
qp->usage[chunk].snapfree = true;
} else {
chunk_free(qp, chunk);
free++;
}
}
isc_nanosecs_t time = isc_time_monotonic() - start;
recycle_time += time;
if (free > 0) {
LOG_STATS("qp reclaim" PRItime "free %u chunks", time,
free);
LOG_STATS(
"qp reclaim leaf %u live %u used %u free %u "
"hold %u",
qp->leaf_count, qp->used_count - qp->free_count,
qp->used_count, qp->free_count, qp->hold_count);
}
}
UNLOCK(&multi->mutex);
dns_qpmulti_detach(&multi);
isc_mem_putanddetach(&rcuctx->mctx, rcuctx,
STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count));
isc_nanosecs_t time = isc_time_monotonic() - start;
recycle_time += time;
if (free > 0) {
LOG_STATS("qp reclaim" PRItime "free %u chunks", time, free);
LOG_STATS("qp reclaim leaf %u live %u used %u free %u hold %u",
qp->leaf_count, qp->used_count - qp->free_count,
qp->used_count, qp->free_count, qp->hold_count);
}
UNLOCK(&multi->mutex);
}
/*
@ -773,6 +817,11 @@ reclaim_chunks(dns_qpmulti_t *multi) {
}
}
/*
* Reference the qpmulti object to keep it from being
* freed until reclaim_chunks_cb() runs.
*/
dns_qpmulti_ref(multi);
call_rcu(&rcuctx->rcu_head, reclaim_chunks_cb);
LOG_STATS("qp will reclaim %u chunks", count);
@ -1023,12 +1072,13 @@ dns_qp_memusage(dns_qp_t *qp) {
.hold = qp->hold_count,
.free = qp->free_count,
.node_size = sizeof(dns_qpnode_t),
.chunk_size = QP_CHUNK_SIZE,
.fragmented = QP_NEEDGC(qp),
};
size_t chunk_usage_bytes = 0;
for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
if (qp->base->ptr[chunk] != NULL) {
chunk_usage_bytes += qp->usage[chunk].capacity;
memusage.chunk_count += 1;
}
}
@ -1037,7 +1087,7 @@ dns_qp_memusage(dns_qp_t *qp) {
* XXXFANF does not subtract chunks that have been shrunk,
* and does not count unreclaimed dns_qpbase_t objects
*/
memusage.bytes = memusage.chunk_count * QP_CHUNK_BYTES +
memusage.bytes = chunk_usage_bytes +
qp->chunk_max * sizeof(qp->base->ptr[0]) +
qp->chunk_max * sizeof(qp->usage[0]);
@ -1055,7 +1105,7 @@ dns_qpmulti_memusage(dns_qpmulti_t *multi) {
dns_qp_memusage_t memusage = dns_qp_memusage(qp);
if (qp->transaction_mode == QP_UPDATE) {
memusage.bytes -= QP_CHUNK_BYTES;
memusage.bytes -= qp->usage[qp->bump].capacity;
memusage.bytes += qp->usage[qp->bump].used *
sizeof(dns_qpnode_t);
}
@ -1440,12 +1490,12 @@ dns_qpmulti_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx,
REQUIRE(qpmp != NULL && *qpmp == NULL);
dns_qpmulti_t *multi = isc_mem_get(mctx, sizeof(*multi));
*multi = (dns_qpmulti_t){
.magic = QPMULTI_MAGIC,
.reader_ref = INVALID_REF,
};
*multi = (dns_qpmulti_t){ .magic = QPMULTI_MAGIC,
.reader_ref = INVALID_REF,
.references = ISC_REFCOUNT_INITIALIZER(1) };
isc_mutex_init(&multi->mutex);
ISC_LIST_INIT(multi->snapshots);
/*
* Do not waste effort allocating a bump chunk that will be thrown
* away when a transaction is opened. dns_qpmulti_update() always
@ -1465,11 +1515,13 @@ destroy_guts(dns_qp_t *qp) {
if (qp->chunk_max == 0) {
return;
}
for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
if (qp->base->ptr[chunk] != NULL) {
chunk_free(qp, chunk);
}
}
qp->chunk_max = 0;
ENSURE(qp->used_count == 0);
ENSURE(qp->free_count == 0);
ENSURE(isc_refcount_current(&qp->base->refcount) == 1);
@ -1495,7 +1547,26 @@ dns_qp_destroy(dns_qp_t **qptp) {
}
static void
qpmulti_destroy_cb(struct rcu_head *arg) {
qpmulti_free_mem(dns_qpmulti_t *multi) {
REQUIRE(QPMULTI_VALID(multi));
/* reassure thread sanitizer */
LOCK(&multi->mutex);
dns_qp_t *qp = &multi->writer;
UNLOCK(&multi->mutex);
isc_mutex_destroy(&multi->mutex);
isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi));
}
#if QPMULTI_TRACE
ISC_REFCOUNT_STATIC_TRACE_IMPL(dns_qpmulti, qpmulti_free_mem)
#else
ISC_REFCOUNT_STATIC_IMPL(dns_qpmulti, qpmulti_free_mem)
#endif
static void
qpmulti_destroy_guts_cb(struct rcu_head *arg) {
qp_rcuctx_t *rcuctx = caa_container_of(arg, qp_rcuctx_t, rcu_head);
REQUIRE(QPRCU_VALID(rcuctx));
/* only nonzero for reclaim_chunks_cb() */
@ -1514,10 +1585,9 @@ qpmulti_destroy_cb(struct rcu_head *arg) {
UNLOCK(&multi->mutex);
isc_mutex_destroy(&multi->mutex);
dns_qpmulti_detach(&multi);
isc_mem_putanddetach(&rcuctx->mctx, rcuctx,
STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count));
isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi));
}
void
@ -1543,7 +1613,7 @@ dns_qpmulti_destroy(dns_qpmulti_t **qpmp) {
.multi = multi,
};
isc_mem_attach(qp->mctx, &rcuctx->mctx);
call_rcu(&rcuctx->rcu_head, qpmulti_destroy_cb);
call_rcu(&rcuctx->rcu_head, qpmulti_destroy_guts_cb);
}
/***********************************************************************

View file

@ -19,6 +19,8 @@
#pragma once
#include <isc/refcount.h>
#include <dns/qp.h>
/***********************************************************************
@ -141,22 +143,29 @@ enum {
* size to make the allocator work harder.
*/
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
#define QP_CHUNK_LOG 7
#define QP_CHUNK_LOG_MIN 7
#define QP_CHUNK_LOG_MAX 7
#else
#define QP_CHUNK_LOG 10
#define QP_CHUNK_LOG_MIN 3
#define QP_CHUNK_LOG_MAX 12
#endif
STATIC_ASSERT(6 <= QP_CHUNK_LOG && QP_CHUNK_LOG <= 20,
"qp-trie chunk size is unreasonable");
STATIC_ASSERT(2 <= QP_CHUNK_LOG_MIN && QP_CHUNK_LOG_MIN <= QP_CHUNK_LOG_MAX,
"qp-trie min chunk size is unreasonable");
STATIC_ASSERT(6 <= QP_CHUNK_LOG_MAX && QP_CHUNK_LOG_MAX <= 20,
"qp-trie max chunk size is unreasonable");
#define QP_CHUNK_SIZE (1U << QP_CHUNK_LOG)
#define QP_CHUNK_SIZE (1U << QP_CHUNK_LOG_MAX)
#define QP_CHUNK_BYTES (QP_CHUNK_SIZE * sizeof(dns_qpnode_t))
STATIC_ASSERT(QP_SAFETY_MARGIN >= QP_CHUNK_BYTES,
"qp-trie safety margin too small");
/*
* We need a bitfield this big to count how much of a chunk is in use:
* it needs to count from 0 up to and including `1 << QP_CHUNK_LOG`.
* it needs to count from 0 up to and including `1 << QP_CHUNK_LOG_MAX`.
*/
#define QP_USAGE_BITS (QP_CHUNK_LOG + 1)
#define QP_USAGE_BITS (QP_CHUNK_LOG_MAX + 1)
/*
* A chunk needs to be compacted if it is less full than this threshold.
@ -268,6 +277,8 @@ ref_cell(dns_qpref_t ref) {
typedef struct qp_usage {
/*% the allocation point, increases monotonically */
dns_qpcell_t used : QP_USAGE_BITS;
/*% the actual size of the allocation */
dns_qpcell_t capacity : QP_USAGE_BITS;
/*% count of nodes no longer needed, also monotonic */
dns_qpcell_t free : QP_USAGE_BITS;
/*% qp->base->ptr[chunk] != NULL */
@ -322,6 +333,7 @@ typedef struct qp_rcuctx {
struct rcu_head rcu_head;
isc_mem_t *mctx;
dns_qpmulti_t *multi;
ISC_LINK(struct qp_rcuctx) link;
dns_qpchunk_t count;
dns_qpchunk_t chunk[];
} qp_rcuctx_t;
@ -479,6 +491,8 @@ struct dns_qp {
dns_qpcell_t used_count, free_count;
/*% free cells that cannot be recovered right now */
dns_qpcell_t hold_count;
/*% capacity of last allocated chunk, for exponential chunk growth */
dns_qpcell_t chunk_capacity;
/*% what kind of transaction was most recently started [MT] */
enum { QP_NONE, QP_WRITE, QP_UPDATE } transaction_mode : 2;
/*% compact the entire trie [MT] */
@ -523,6 +537,8 @@ struct dns_qpmulti {
dns_qp_t *rollback;
/*% all snapshots of this trie */
ISC_LIST(dns_qpsnap_t) snapshots;
/*% refcount for memory reclamation */
isc_refcount_t references;
};
/***********************************************************************

View file

@ -519,7 +519,7 @@ qpcache_miss(qpcache_t *qpdb, dns_slabheader_t *newheader,
size_t purgesize =
2 * (sizeof(qpcnode_t) +
dns_name_size(&HEADERNODE(newheader)->name)) +
rdataset_size(newheader) + 12288;
rdataset_size(newheader) + QP_SAFETY_MARGIN;
expire_lru_headers(qpdb, idx, purgesize, nlocktypep,
tlocktypep DNS__DB_FLARG_PASS);

View file

@ -69,6 +69,8 @@
#define ISC_CLAMP(v, x, y) ((v) < (x) ? (x) : ((v) > (y) ? (y) : (v)))
#define ISC_MAX3(a, b, c) ISC_MAX(ISC_MAX((a), (b)), (c))
/*%
* The UNCONST() macro can be used to omit warnings produced by certain
* compilers when operating with pointers declared with the const type qual-

View file

@ -434,9 +434,10 @@ isc__mem_initialize(void) {
void
isc__mem_shutdown(void) {
/* should be called after an rcu_barrier() */
bool empty;
rcu_barrier();
isc__mem_checkdestroyed();
LOCK(&contextslock);