mirror of
https://github.com/isc-projects/bind9.git
synced 2026-06-12 19:20:00 -04:00
chg: dev: Adaptive memory allocation strategy for qp-tries
qp-tries allocate their nodes (twigs) in chunks to reduce allocator pressure and improve memory locality. The choice of chunk size presents a tradeoff: larger chunks benefit qp-tries with many values (as seen in large zones and resolvers) but waste memory in smaller use cases. Previously, our fixed chunk size of 2^10 twigs meant that even an empty qp-trie would consume 12KB of memory, while reducing this size would negatively impact resolver performance. This commit implements an adaptive chunking strategy that: - Tracks the size of the most recently allocated chunk. - Doubles the chunk size for each new allocation until reaching a predefined maximum. This approach effectively balances memory efficiency for small tries while maintaining the performance benefits of larger chunk sizes for bigger data structures. Merge branch 'alessio/qp-small-alloc' into 'main' See merge request isc-projects/bind9!10245
This commit is contained in:
commit
dc3a1bde65
6 changed files with 143 additions and 49 deletions
|
|
@ -89,6 +89,12 @@
|
|||
#include <dns/name.h>
|
||||
#include <dns/types.h>
|
||||
|
||||
/*%
|
||||
* How many bytes a qp-trie might allocate as part of an insert. Needed for
|
||||
* overmem checks.
|
||||
*/
|
||||
#define QP_SAFETY_MARGIN ((1ul << 12ul) * 12)
|
||||
|
||||
/*%
|
||||
* A `dns_qp_t` supports single-threaded read/write access.
|
||||
*/
|
||||
|
|
@ -306,7 +312,6 @@ typedef struct dns_qp_memusage {
|
|||
size_t hold; /*%< nodes retained for readers */
|
||||
size_t free; /*%< nodes to be reclaimed */
|
||||
size_t node_size; /*%< in bytes */
|
||||
size_t chunk_size; /*%< nodes per chunk */
|
||||
size_t chunk_count; /*%< allocated chunks */
|
||||
size_t bytes; /*%< total memory in chunks and metadata */
|
||||
bool fragmented; /*%< trie needs compaction */
|
||||
|
|
|
|||
148
lib/dns/qp.c
148
lib/dns/qp.c
|
|
@ -96,6 +96,19 @@ static atomic_uint_fast64_t rollback_time;
|
|||
#define TRACE(...)
|
||||
#endif
|
||||
|
||||
#if DNS_QPMULTI_TRACE
|
||||
ISC_REFCOUNT_STATIC_TRACE_DECL(dns_qpmulti);
|
||||
#define dns_qpmulti_ref(ptr) dns_qpmulti__ref(ptr, __func__, __FILE__, __LINE__)
|
||||
#define dns_qpmulti_unref(ptr) \
|
||||
dns_qpmulti__unref(ptr, __func__, __FILE__, __LINE__)
|
||||
#define dns_qpmulti_attach(ptr, ptrp) \
|
||||
dns_qpmulti__attach(ptr, ptrp, __func__, __FILE__, __LINE__)
|
||||
#define dns_qpmulti_detach(ptrp) \
|
||||
dns_qpmulti__detach(ptrp, __func__, __FILE__, __LINE__)
|
||||
#else
|
||||
ISC_REFCOUNT_STATIC_DECL(dns_qpmulti);
|
||||
#endif
|
||||
|
||||
/***********************************************************************
|
||||
*
|
||||
* converting DNS names to trie keys
|
||||
|
|
@ -425,7 +438,7 @@ write_protect(dns_qp_t *qp, dns_qpchunk_t chunk) {
|
|||
|
||||
#else
|
||||
|
||||
#define chunk_get_raw(qp) isc_mem_allocate(qp->mctx, QP_CHUNK_BYTES)
|
||||
#define chunk_get_raw(qp, size) isc_mem_allocate(qp->mctx, size)
|
||||
#define chunk_free_raw(qp, ptr) isc_mem_free(qp->mctx, ptr)
|
||||
|
||||
#define chunk_shrink_raw(qp, ptr, size) isc_mem_reallocate(qp->mctx, ptr, size)
|
||||
|
|
@ -454,6 +467,22 @@ cells_immutable(dns_qp_t *qp, dns_qpref_t ref) {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the next power that is both bigger than size and prev_capacity,
|
||||
* but still within the chunk min and max sizes.
|
||||
*/
|
||||
static dns_qpcell_t
|
||||
next_capacity(uint32_t prev_capacity, uint32_t size) {
|
||||
/*
|
||||
* Unfortunately builtin_clz is undefined for 0. We work around this
|
||||
* issue by flooring the request size at 2.
|
||||
*/
|
||||
size = ISC_MAX3(size, prev_capacity, 2u);
|
||||
uint32_t log2 = 32u - __builtin_clz(size - 1u);
|
||||
|
||||
return 1U << ISC_CLAMP(log2, QP_CHUNK_LOG_MIN, QP_CHUNK_LOG_MAX);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a fresh bump chunk and allocate some twigs from it.
|
||||
*/
|
||||
|
|
@ -462,9 +491,15 @@ chunk_alloc(dns_qp_t *qp, dns_qpchunk_t chunk, dns_qpweight_t size) {
|
|||
INSIST(qp->base->ptr[chunk] == NULL);
|
||||
INSIST(qp->usage[chunk].used == 0);
|
||||
INSIST(qp->usage[chunk].free == 0);
|
||||
INSIST(qp->chunk_capacity <= QP_CHUNK_SIZE);
|
||||
|
||||
qp->base->ptr[chunk] = chunk_get_raw(qp);
|
||||
qp->usage[chunk] = (qp_usage_t){ .exists = true, .used = size };
|
||||
qp->chunk_capacity = next_capacity(qp->chunk_capacity * 2u, size);
|
||||
qp->base->ptr[chunk] =
|
||||
chunk_get_raw(qp, qp->chunk_capacity * sizeof(dns_qpnode_t));
|
||||
|
||||
qp->usage[chunk] = (qp_usage_t){ .exists = true,
|
||||
.used = size,
|
||||
.capacity = qp->chunk_capacity };
|
||||
qp->used_count += size;
|
||||
qp->bump = chunk;
|
||||
qp->fender = 0;
|
||||
|
|
@ -544,7 +579,7 @@ alloc_twigs(dns_qp_t *qp, dns_qpweight_t size) {
|
|||
dns_qpchunk_t chunk = qp->bump;
|
||||
dns_qpcell_t cell = qp->usage[chunk].used;
|
||||
|
||||
if (cell + size <= QP_CHUNK_SIZE) {
|
||||
if (cell + size <= qp->usage[chunk].capacity) {
|
||||
qp->usage[chunk].used += size;
|
||||
qp->used_count += size;
|
||||
return make_ref(chunk, cell);
|
||||
|
|
@ -697,38 +732,47 @@ reclaim_chunks_cb(struct rcu_head *arg) {
|
|||
REQUIRE(QPMULTI_VALID(multi));
|
||||
|
||||
LOCK(&multi->mutex);
|
||||
|
||||
dns_qp_t *qp = &multi->writer;
|
||||
REQUIRE(QP_VALID(qp));
|
||||
|
||||
unsigned int free = 0;
|
||||
isc_nanosecs_t start = isc_time_monotonic();
|
||||
/*
|
||||
* If chunk_max is zero, chunks have already been freed.
|
||||
*/
|
||||
if (qp->chunk_max != 0) {
|
||||
unsigned int free = 0;
|
||||
isc_nanosecs_t start = isc_time_monotonic();
|
||||
|
||||
for (unsigned int i = 0; i < rcuctx->count; i++) {
|
||||
dns_qpchunk_t chunk = rcuctx->chunk[i];
|
||||
if (qp->usage[chunk].snapshot) {
|
||||
/* cleanup when snapshot is destroyed */
|
||||
qp->usage[chunk].snapfree = true;
|
||||
} else {
|
||||
chunk_free(qp, chunk);
|
||||
free++;
|
||||
INSIST(QP_VALID(qp));
|
||||
|
||||
for (unsigned int i = 0; i < rcuctx->count; i++) {
|
||||
dns_qpchunk_t chunk = rcuctx->chunk[i];
|
||||
if (qp->usage[chunk].snapshot) {
|
||||
/* clean up when snapshot is destroyed */
|
||||
qp->usage[chunk].snapfree = true;
|
||||
} else {
|
||||
chunk_free(qp, chunk);
|
||||
free++;
|
||||
}
|
||||
}
|
||||
|
||||
isc_nanosecs_t time = isc_time_monotonic() - start;
|
||||
recycle_time += time;
|
||||
|
||||
if (free > 0) {
|
||||
LOG_STATS("qp reclaim" PRItime "free %u chunks", time,
|
||||
free);
|
||||
LOG_STATS(
|
||||
"qp reclaim leaf %u live %u used %u free %u "
|
||||
"hold %u",
|
||||
qp->leaf_count, qp->used_count - qp->free_count,
|
||||
qp->used_count, qp->free_count, qp->hold_count);
|
||||
}
|
||||
}
|
||||
|
||||
UNLOCK(&multi->mutex);
|
||||
|
||||
dns_qpmulti_detach(&multi);
|
||||
isc_mem_putanddetach(&rcuctx->mctx, rcuctx,
|
||||
STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count));
|
||||
|
||||
isc_nanosecs_t time = isc_time_monotonic() - start;
|
||||
recycle_time += time;
|
||||
|
||||
if (free > 0) {
|
||||
LOG_STATS("qp reclaim" PRItime "free %u chunks", time, free);
|
||||
LOG_STATS("qp reclaim leaf %u live %u used %u free %u hold %u",
|
||||
qp->leaf_count, qp->used_count - qp->free_count,
|
||||
qp->used_count, qp->free_count, qp->hold_count);
|
||||
}
|
||||
|
||||
UNLOCK(&multi->mutex);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -773,6 +817,11 @@ reclaim_chunks(dns_qpmulti_t *multi) {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Reference the qpmulti object to keep it from being
|
||||
* freed until reclaim_chunks_cb() runs.
|
||||
*/
|
||||
dns_qpmulti_ref(multi);
|
||||
call_rcu(&rcuctx->rcu_head, reclaim_chunks_cb);
|
||||
|
||||
LOG_STATS("qp will reclaim %u chunks", count);
|
||||
|
|
@ -1023,12 +1072,13 @@ dns_qp_memusage(dns_qp_t *qp) {
|
|||
.hold = qp->hold_count,
|
||||
.free = qp->free_count,
|
||||
.node_size = sizeof(dns_qpnode_t),
|
||||
.chunk_size = QP_CHUNK_SIZE,
|
||||
.fragmented = QP_NEEDGC(qp),
|
||||
};
|
||||
|
||||
size_t chunk_usage_bytes = 0;
|
||||
for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
|
||||
if (qp->base->ptr[chunk] != NULL) {
|
||||
chunk_usage_bytes += qp->usage[chunk].capacity;
|
||||
memusage.chunk_count += 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -1037,7 +1087,7 @@ dns_qp_memusage(dns_qp_t *qp) {
|
|||
* XXXFANF does not subtract chunks that have been shrunk,
|
||||
* and does not count unreclaimed dns_qpbase_t objects
|
||||
*/
|
||||
memusage.bytes = memusage.chunk_count * QP_CHUNK_BYTES +
|
||||
memusage.bytes = chunk_usage_bytes +
|
||||
qp->chunk_max * sizeof(qp->base->ptr[0]) +
|
||||
qp->chunk_max * sizeof(qp->usage[0]);
|
||||
|
||||
|
|
@ -1055,7 +1105,7 @@ dns_qpmulti_memusage(dns_qpmulti_t *multi) {
|
|||
dns_qp_memusage_t memusage = dns_qp_memusage(qp);
|
||||
|
||||
if (qp->transaction_mode == QP_UPDATE) {
|
||||
memusage.bytes -= QP_CHUNK_BYTES;
|
||||
memusage.bytes -= qp->usage[qp->bump].capacity;
|
||||
memusage.bytes += qp->usage[qp->bump].used *
|
||||
sizeof(dns_qpnode_t);
|
||||
}
|
||||
|
|
@ -1440,12 +1490,12 @@ dns_qpmulti_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx,
|
|||
REQUIRE(qpmp != NULL && *qpmp == NULL);
|
||||
|
||||
dns_qpmulti_t *multi = isc_mem_get(mctx, sizeof(*multi));
|
||||
*multi = (dns_qpmulti_t){
|
||||
.magic = QPMULTI_MAGIC,
|
||||
.reader_ref = INVALID_REF,
|
||||
};
|
||||
*multi = (dns_qpmulti_t){ .magic = QPMULTI_MAGIC,
|
||||
.reader_ref = INVALID_REF,
|
||||
.references = ISC_REFCOUNT_INITIALIZER(1) };
|
||||
isc_mutex_init(&multi->mutex);
|
||||
ISC_LIST_INIT(multi->snapshots);
|
||||
|
||||
/*
|
||||
* Do not waste effort allocating a bump chunk that will be thrown
|
||||
* away when a transaction is opened. dns_qpmulti_update() always
|
||||
|
|
@ -1465,11 +1515,13 @@ destroy_guts(dns_qp_t *qp) {
|
|||
if (qp->chunk_max == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
|
||||
if (qp->base->ptr[chunk] != NULL) {
|
||||
chunk_free(qp, chunk);
|
||||
}
|
||||
}
|
||||
qp->chunk_max = 0;
|
||||
ENSURE(qp->used_count == 0);
|
||||
ENSURE(qp->free_count == 0);
|
||||
ENSURE(isc_refcount_current(&qp->base->refcount) == 1);
|
||||
|
|
@ -1495,7 +1547,26 @@ dns_qp_destroy(dns_qp_t **qptp) {
|
|||
}
|
||||
|
||||
static void
|
||||
qpmulti_destroy_cb(struct rcu_head *arg) {
|
||||
qpmulti_free_mem(dns_qpmulti_t *multi) {
|
||||
REQUIRE(QPMULTI_VALID(multi));
|
||||
|
||||
/* reassure thread sanitizer */
|
||||
LOCK(&multi->mutex);
|
||||
dns_qp_t *qp = &multi->writer;
|
||||
UNLOCK(&multi->mutex);
|
||||
|
||||
isc_mutex_destroy(&multi->mutex);
|
||||
isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi));
|
||||
}
|
||||
|
||||
#if QPMULTI_TRACE
|
||||
ISC_REFCOUNT_STATIC_TRACE_IMPL(dns_qpmulti, qpmulti_free_mem)
|
||||
#else
|
||||
ISC_REFCOUNT_STATIC_IMPL(dns_qpmulti, qpmulti_free_mem)
|
||||
#endif
|
||||
|
||||
static void
|
||||
qpmulti_destroy_guts_cb(struct rcu_head *arg) {
|
||||
qp_rcuctx_t *rcuctx = caa_container_of(arg, qp_rcuctx_t, rcu_head);
|
||||
REQUIRE(QPRCU_VALID(rcuctx));
|
||||
/* only nonzero for reclaim_chunks_cb() */
|
||||
|
|
@ -1514,10 +1585,9 @@ qpmulti_destroy_cb(struct rcu_head *arg) {
|
|||
|
||||
UNLOCK(&multi->mutex);
|
||||
|
||||
isc_mutex_destroy(&multi->mutex);
|
||||
dns_qpmulti_detach(&multi);
|
||||
isc_mem_putanddetach(&rcuctx->mctx, rcuctx,
|
||||
STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count));
|
||||
isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi));
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1543,7 +1613,7 @@ dns_qpmulti_destroy(dns_qpmulti_t **qpmp) {
|
|||
.multi = multi,
|
||||
};
|
||||
isc_mem_attach(qp->mctx, &rcuctx->mctx);
|
||||
call_rcu(&rcuctx->rcu_head, qpmulti_destroy_cb);
|
||||
call_rcu(&rcuctx->rcu_head, qpmulti_destroy_guts_cb);
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
|
|
|
|||
|
|
@ -19,6 +19,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <isc/refcount.h>
|
||||
|
||||
#include <dns/qp.h>
|
||||
|
||||
/***********************************************************************
|
||||
|
|
@ -141,22 +143,29 @@ enum {
|
|||
* size to make the allocator work harder.
|
||||
*/
|
||||
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
#define QP_CHUNK_LOG 7
|
||||
#define QP_CHUNK_LOG_MIN 7
|
||||
#define QP_CHUNK_LOG_MAX 7
|
||||
#else
|
||||
#define QP_CHUNK_LOG 10
|
||||
#define QP_CHUNK_LOG_MIN 3
|
||||
#define QP_CHUNK_LOG_MAX 12
|
||||
#endif
|
||||
|
||||
STATIC_ASSERT(6 <= QP_CHUNK_LOG && QP_CHUNK_LOG <= 20,
|
||||
"qp-trie chunk size is unreasonable");
|
||||
STATIC_ASSERT(2 <= QP_CHUNK_LOG_MIN && QP_CHUNK_LOG_MIN <= QP_CHUNK_LOG_MAX,
|
||||
"qp-trie min chunk size is unreasonable");
|
||||
STATIC_ASSERT(6 <= QP_CHUNK_LOG_MAX && QP_CHUNK_LOG_MAX <= 20,
|
||||
"qp-trie max chunk size is unreasonable");
|
||||
|
||||
#define QP_CHUNK_SIZE (1U << QP_CHUNK_LOG)
|
||||
#define QP_CHUNK_SIZE (1U << QP_CHUNK_LOG_MAX)
|
||||
#define QP_CHUNK_BYTES (QP_CHUNK_SIZE * sizeof(dns_qpnode_t))
|
||||
|
||||
STATIC_ASSERT(QP_SAFETY_MARGIN >= QP_CHUNK_BYTES,
|
||||
"qp-trie safety margin too small");
|
||||
|
||||
/*
|
||||
* We need a bitfield this big to count how much of a chunk is in use:
|
||||
* it needs to count from 0 up to and including `1 << QP_CHUNK_LOG`.
|
||||
* it needs to count from 0 up to and including `1 << QP_CHUNK_LOG_MAX`.
|
||||
*/
|
||||
#define QP_USAGE_BITS (QP_CHUNK_LOG + 1)
|
||||
#define QP_USAGE_BITS (QP_CHUNK_LOG_MAX + 1)
|
||||
|
||||
/*
|
||||
* A chunk needs to be compacted if it is less full than this threshold.
|
||||
|
|
@ -268,6 +277,8 @@ ref_cell(dns_qpref_t ref) {
|
|||
typedef struct qp_usage {
|
||||
/*% the allocation point, increases monotonically */
|
||||
dns_qpcell_t used : QP_USAGE_BITS;
|
||||
/*% the actual size of the allocation */
|
||||
dns_qpcell_t capacity : QP_USAGE_BITS;
|
||||
/*% count of nodes no longer needed, also monotonic */
|
||||
dns_qpcell_t free : QP_USAGE_BITS;
|
||||
/*% qp->base->ptr[chunk] != NULL */
|
||||
|
|
@ -322,6 +333,7 @@ typedef struct qp_rcuctx {
|
|||
struct rcu_head rcu_head;
|
||||
isc_mem_t *mctx;
|
||||
dns_qpmulti_t *multi;
|
||||
ISC_LINK(struct qp_rcuctx) link;
|
||||
dns_qpchunk_t count;
|
||||
dns_qpchunk_t chunk[];
|
||||
} qp_rcuctx_t;
|
||||
|
|
@ -479,6 +491,8 @@ struct dns_qp {
|
|||
dns_qpcell_t used_count, free_count;
|
||||
/*% free cells that cannot be recovered right now */
|
||||
dns_qpcell_t hold_count;
|
||||
/*% capacity of last allocated chunk, for exponential chunk growth */
|
||||
dns_qpcell_t chunk_capacity;
|
||||
/*% what kind of transaction was most recently started [MT] */
|
||||
enum { QP_NONE, QP_WRITE, QP_UPDATE } transaction_mode : 2;
|
||||
/*% compact the entire trie [MT] */
|
||||
|
|
@ -523,6 +537,8 @@ struct dns_qpmulti {
|
|||
dns_qp_t *rollback;
|
||||
/*% all snapshots of this trie */
|
||||
ISC_LIST(dns_qpsnap_t) snapshots;
|
||||
/*% refcount for memory reclamation */
|
||||
isc_refcount_t references;
|
||||
};
|
||||
|
||||
/***********************************************************************
|
||||
|
|
|
|||
|
|
@ -519,7 +519,7 @@ qpcache_miss(qpcache_t *qpdb, dns_slabheader_t *newheader,
|
|||
size_t purgesize =
|
||||
2 * (sizeof(qpcnode_t) +
|
||||
dns_name_size(&HEADERNODE(newheader)->name)) +
|
||||
rdataset_size(newheader) + 12288;
|
||||
rdataset_size(newheader) + QP_SAFETY_MARGIN;
|
||||
|
||||
expire_lru_headers(qpdb, idx, purgesize, nlocktypep,
|
||||
tlocktypep DNS__DB_FLARG_PASS);
|
||||
|
|
|
|||
|
|
@ -69,6 +69,8 @@
|
|||
|
||||
#define ISC_CLAMP(v, x, y) ((v) < (x) ? (x) : ((v) > (y) ? (y) : (v)))
|
||||
|
||||
#define ISC_MAX3(a, b, c) ISC_MAX(ISC_MAX((a), (b)), (c))
|
||||
|
||||
/*%
|
||||
* The UNCONST() macro can be used to omit warnings produced by certain
|
||||
* compilers when operating with pointers declared with the const type qual-
|
||||
|
|
|
|||
|
|
@ -434,9 +434,10 @@ isc__mem_initialize(void) {
|
|||
|
||||
void
|
||||
isc__mem_shutdown(void) {
|
||||
/* should be called after an rcu_barrier() */
|
||||
bool empty;
|
||||
|
||||
rcu_barrier();
|
||||
|
||||
isc__mem_checkdestroyed();
|
||||
|
||||
LOCK(&contextslock);
|
||||
|
|
|
|||
Loading…
Reference in a new issue