diff --git a/lib/dns/include/dns/qp.h b/lib/dns/include/dns/qp.h index 16eca977d9..d72730c80a 100644 --- a/lib/dns/include/dns/qp.h +++ b/lib/dns/include/dns/qp.h @@ -89,6 +89,12 @@ #include #include +/*% + * How many bytes a qp-trie might allocate as part of an insert. Needed for + * overmem checks. + */ +#define QP_SAFETY_MARGIN ((1ul << 12ul) * 12) + /*% * A `dns_qp_t` supports single-threaded read/write access. */ @@ -306,7 +312,6 @@ typedef struct dns_qp_memusage { size_t hold; /*%< nodes retained for readers */ size_t free; /*%< nodes to be reclaimed */ size_t node_size; /*%< in bytes */ - size_t chunk_size; /*%< nodes per chunk */ size_t chunk_count; /*%< allocated chunks */ size_t bytes; /*%< total memory in chunks and metadata */ bool fragmented; /*%< trie needs compaction */ diff --git a/lib/dns/qp.c b/lib/dns/qp.c index 79a0b8e6ee..afed2a7fcf 100644 --- a/lib/dns/qp.c +++ b/lib/dns/qp.c @@ -96,6 +96,19 @@ static atomic_uint_fast64_t rollback_time; #define TRACE(...) #endif +#if DNS_QPMULTI_TRACE +ISC_REFCOUNT_STATIC_TRACE_DECL(dns_qpmulti); +#define dns_qpmulti_ref(ptr) dns_qpmulti__ref(ptr, __func__, __FILE__, __LINE__) +#define dns_qpmulti_unref(ptr) \ + dns_qpmulti__unref(ptr, __func__, __FILE__, __LINE__) +#define dns_qpmulti_attach(ptr, ptrp) \ + dns_qpmulti__attach(ptr, ptrp, __func__, __FILE__, __LINE__) +#define dns_qpmulti_detach(ptrp) \ + dns_qpmulti__detach(ptrp, __func__, __FILE__, __LINE__) +#else +ISC_REFCOUNT_STATIC_DECL(dns_qpmulti); +#endif + /*********************************************************************** * * converting DNS names to trie keys @@ -425,7 +438,7 @@ write_protect(dns_qp_t *qp, dns_qpchunk_t chunk) { #else -#define chunk_get_raw(qp) isc_mem_allocate(qp->mctx, QP_CHUNK_BYTES) +#define chunk_get_raw(qp, size) isc_mem_allocate(qp->mctx, size) #define chunk_free_raw(qp, ptr) isc_mem_free(qp->mctx, ptr) #define chunk_shrink_raw(qp, ptr, size) isc_mem_reallocate(qp->mctx, ptr, size) @@ -454,6 +467,22 @@ cells_immutable(dns_qp_t *qp, dns_qpref_t ref) { } } +/* + * Find the next power that is both bigger than size and prev_capacity, + * but still within the chunk min and max sizes. + */ +static dns_qpcell_t +next_capacity(uint32_t prev_capacity, uint32_t size) { + /* + * Unfortunately builtin_clz is undefined for 0. We work around this + * issue by flooring the request size at 2. + */ + size = ISC_MAX3(size, prev_capacity, 2u); + uint32_t log2 = 32u - __builtin_clz(size - 1u); + + return 1U << ISC_CLAMP(log2, QP_CHUNK_LOG_MIN, QP_CHUNK_LOG_MAX); +} + /* * Create a fresh bump chunk and allocate some twigs from it. */ @@ -462,9 +491,15 @@ chunk_alloc(dns_qp_t *qp, dns_qpchunk_t chunk, dns_qpweight_t size) { INSIST(qp->base->ptr[chunk] == NULL); INSIST(qp->usage[chunk].used == 0); INSIST(qp->usage[chunk].free == 0); + INSIST(qp->chunk_capacity <= QP_CHUNK_SIZE); - qp->base->ptr[chunk] = chunk_get_raw(qp); - qp->usage[chunk] = (qp_usage_t){ .exists = true, .used = size }; + qp->chunk_capacity = next_capacity(qp->chunk_capacity * 2u, size); + qp->base->ptr[chunk] = + chunk_get_raw(qp, qp->chunk_capacity * sizeof(dns_qpnode_t)); + + qp->usage[chunk] = (qp_usage_t){ .exists = true, + .used = size, + .capacity = qp->chunk_capacity }; qp->used_count += size; qp->bump = chunk; qp->fender = 0; @@ -544,7 +579,7 @@ alloc_twigs(dns_qp_t *qp, dns_qpweight_t size) { dns_qpchunk_t chunk = qp->bump; dns_qpcell_t cell = qp->usage[chunk].used; - if (cell + size <= QP_CHUNK_SIZE) { + if (cell + size <= qp->usage[chunk].capacity) { qp->usage[chunk].used += size; qp->used_count += size; return make_ref(chunk, cell); @@ -697,38 +732,47 @@ reclaim_chunks_cb(struct rcu_head *arg) { REQUIRE(QPMULTI_VALID(multi)); LOCK(&multi->mutex); - dns_qp_t *qp = &multi->writer; - REQUIRE(QP_VALID(qp)); - unsigned int free = 0; - isc_nanosecs_t start = isc_time_monotonic(); + /* + * If chunk_max is zero, chunks have already been freed. + */ + if (qp->chunk_max != 0) { + unsigned int free = 0; + isc_nanosecs_t start = isc_time_monotonic(); - for (unsigned int i = 0; i < rcuctx->count; i++) { - dns_qpchunk_t chunk = rcuctx->chunk[i]; - if (qp->usage[chunk].snapshot) { - /* cleanup when snapshot is destroyed */ - qp->usage[chunk].snapfree = true; - } else { - chunk_free(qp, chunk); - free++; + INSIST(QP_VALID(qp)); + + for (unsigned int i = 0; i < rcuctx->count; i++) { + dns_qpchunk_t chunk = rcuctx->chunk[i]; + if (qp->usage[chunk].snapshot) { + /* clean up when snapshot is destroyed */ + qp->usage[chunk].snapfree = true; + } else { + chunk_free(qp, chunk); + free++; + } + } + + isc_nanosecs_t time = isc_time_monotonic() - start; + recycle_time += time; + + if (free > 0) { + LOG_STATS("qp reclaim" PRItime "free %u chunks", time, + free); + LOG_STATS( + "qp reclaim leaf %u live %u used %u free %u " + "hold %u", + qp->leaf_count, qp->used_count - qp->free_count, + qp->used_count, qp->free_count, qp->hold_count); } } + UNLOCK(&multi->mutex); + + dns_qpmulti_detach(&multi); isc_mem_putanddetach(&rcuctx->mctx, rcuctx, STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count)); - - isc_nanosecs_t time = isc_time_monotonic() - start; - recycle_time += time; - - if (free > 0) { - LOG_STATS("qp reclaim" PRItime "free %u chunks", time, free); - LOG_STATS("qp reclaim leaf %u live %u used %u free %u hold %u", - qp->leaf_count, qp->used_count - qp->free_count, - qp->used_count, qp->free_count, qp->hold_count); - } - - UNLOCK(&multi->mutex); } /* @@ -773,6 +817,11 @@ reclaim_chunks(dns_qpmulti_t *multi) { } } + /* + * Reference the qpmulti object to keep it from being + * freed until reclaim_chunks_cb() runs. + */ + dns_qpmulti_ref(multi); call_rcu(&rcuctx->rcu_head, reclaim_chunks_cb); LOG_STATS("qp will reclaim %u chunks", count); @@ -1023,12 +1072,13 @@ dns_qp_memusage(dns_qp_t *qp) { .hold = qp->hold_count, .free = qp->free_count, .node_size = sizeof(dns_qpnode_t), - .chunk_size = QP_CHUNK_SIZE, .fragmented = QP_NEEDGC(qp), }; + size_t chunk_usage_bytes = 0; for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { if (qp->base->ptr[chunk] != NULL) { + chunk_usage_bytes += qp->usage[chunk].capacity; memusage.chunk_count += 1; } } @@ -1037,7 +1087,7 @@ dns_qp_memusage(dns_qp_t *qp) { * XXXFANF does not subtract chunks that have been shrunk, * and does not count unreclaimed dns_qpbase_t objects */ - memusage.bytes = memusage.chunk_count * QP_CHUNK_BYTES + + memusage.bytes = chunk_usage_bytes + qp->chunk_max * sizeof(qp->base->ptr[0]) + qp->chunk_max * sizeof(qp->usage[0]); @@ -1055,7 +1105,7 @@ dns_qpmulti_memusage(dns_qpmulti_t *multi) { dns_qp_memusage_t memusage = dns_qp_memusage(qp); if (qp->transaction_mode == QP_UPDATE) { - memusage.bytes -= QP_CHUNK_BYTES; + memusage.bytes -= qp->usage[qp->bump].capacity; memusage.bytes += qp->usage[qp->bump].used * sizeof(dns_qpnode_t); } @@ -1440,12 +1490,12 @@ dns_qpmulti_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx, REQUIRE(qpmp != NULL && *qpmp == NULL); dns_qpmulti_t *multi = isc_mem_get(mctx, sizeof(*multi)); - *multi = (dns_qpmulti_t){ - .magic = QPMULTI_MAGIC, - .reader_ref = INVALID_REF, - }; + *multi = (dns_qpmulti_t){ .magic = QPMULTI_MAGIC, + .reader_ref = INVALID_REF, + .references = ISC_REFCOUNT_INITIALIZER(1) }; isc_mutex_init(&multi->mutex); ISC_LIST_INIT(multi->snapshots); + /* * Do not waste effort allocating a bump chunk that will be thrown * away when a transaction is opened. dns_qpmulti_update() always @@ -1465,11 +1515,13 @@ destroy_guts(dns_qp_t *qp) { if (qp->chunk_max == 0) { return; } + for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { if (qp->base->ptr[chunk] != NULL) { chunk_free(qp, chunk); } } + qp->chunk_max = 0; ENSURE(qp->used_count == 0); ENSURE(qp->free_count == 0); ENSURE(isc_refcount_current(&qp->base->refcount) == 1); @@ -1495,7 +1547,26 @@ dns_qp_destroy(dns_qp_t **qptp) { } static void -qpmulti_destroy_cb(struct rcu_head *arg) { +qpmulti_free_mem(dns_qpmulti_t *multi) { + REQUIRE(QPMULTI_VALID(multi)); + + /* reassure thread sanitizer */ + LOCK(&multi->mutex); + dns_qp_t *qp = &multi->writer; + UNLOCK(&multi->mutex); + + isc_mutex_destroy(&multi->mutex); + isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi)); +} + +#if QPMULTI_TRACE +ISC_REFCOUNT_STATIC_TRACE_IMPL(dns_qpmulti, qpmulti_free_mem) +#else +ISC_REFCOUNT_STATIC_IMPL(dns_qpmulti, qpmulti_free_mem) +#endif + +static void +qpmulti_destroy_guts_cb(struct rcu_head *arg) { qp_rcuctx_t *rcuctx = caa_container_of(arg, qp_rcuctx_t, rcu_head); REQUIRE(QPRCU_VALID(rcuctx)); /* only nonzero for reclaim_chunks_cb() */ @@ -1514,10 +1585,9 @@ qpmulti_destroy_cb(struct rcu_head *arg) { UNLOCK(&multi->mutex); - isc_mutex_destroy(&multi->mutex); + dns_qpmulti_detach(&multi); isc_mem_putanddetach(&rcuctx->mctx, rcuctx, STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count)); - isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi)); } void @@ -1543,7 +1613,7 @@ dns_qpmulti_destroy(dns_qpmulti_t **qpmp) { .multi = multi, }; isc_mem_attach(qp->mctx, &rcuctx->mctx); - call_rcu(&rcuctx->rcu_head, qpmulti_destroy_cb); + call_rcu(&rcuctx->rcu_head, qpmulti_destroy_guts_cb); } /*********************************************************************** diff --git a/lib/dns/qp_p.h b/lib/dns/qp_p.h index 0206c5fd56..efe5c5d9b0 100644 --- a/lib/dns/qp_p.h +++ b/lib/dns/qp_p.h @@ -19,6 +19,8 @@ #pragma once +#include + #include /*********************************************************************** @@ -141,22 +143,29 @@ enum { * size to make the allocator work harder. */ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -#define QP_CHUNK_LOG 7 +#define QP_CHUNK_LOG_MIN 7 +#define QP_CHUNK_LOG_MAX 7 #else -#define QP_CHUNK_LOG 10 +#define QP_CHUNK_LOG_MIN 3 +#define QP_CHUNK_LOG_MAX 12 #endif -STATIC_ASSERT(6 <= QP_CHUNK_LOG && QP_CHUNK_LOG <= 20, - "qp-trie chunk size is unreasonable"); +STATIC_ASSERT(2 <= QP_CHUNK_LOG_MIN && QP_CHUNK_LOG_MIN <= QP_CHUNK_LOG_MAX, + "qp-trie min chunk size is unreasonable"); +STATIC_ASSERT(6 <= QP_CHUNK_LOG_MAX && QP_CHUNK_LOG_MAX <= 20, + "qp-trie max chunk size is unreasonable"); -#define QP_CHUNK_SIZE (1U << QP_CHUNK_LOG) +#define QP_CHUNK_SIZE (1U << QP_CHUNK_LOG_MAX) #define QP_CHUNK_BYTES (QP_CHUNK_SIZE * sizeof(dns_qpnode_t)) +STATIC_ASSERT(QP_SAFETY_MARGIN >= QP_CHUNK_BYTES, + "qp-trie safety margin too small"); + /* * We need a bitfield this big to count how much of a chunk is in use: - * it needs to count from 0 up to and including `1 << QP_CHUNK_LOG`. + * it needs to count from 0 up to and including `1 << QP_CHUNK_LOG_MAX`. */ -#define QP_USAGE_BITS (QP_CHUNK_LOG + 1) +#define QP_USAGE_BITS (QP_CHUNK_LOG_MAX + 1) /* * A chunk needs to be compacted if it is less full than this threshold. @@ -268,6 +277,8 @@ ref_cell(dns_qpref_t ref) { typedef struct qp_usage { /*% the allocation point, increases monotonically */ dns_qpcell_t used : QP_USAGE_BITS; + /*% the actual size of the allocation */ + dns_qpcell_t capacity : QP_USAGE_BITS; /*% count of nodes no longer needed, also monotonic */ dns_qpcell_t free : QP_USAGE_BITS; /*% qp->base->ptr[chunk] != NULL */ @@ -322,6 +333,7 @@ typedef struct qp_rcuctx { struct rcu_head rcu_head; isc_mem_t *mctx; dns_qpmulti_t *multi; + ISC_LINK(struct qp_rcuctx) link; dns_qpchunk_t count; dns_qpchunk_t chunk[]; } qp_rcuctx_t; @@ -479,6 +491,8 @@ struct dns_qp { dns_qpcell_t used_count, free_count; /*% free cells that cannot be recovered right now */ dns_qpcell_t hold_count; + /*% capacity of last allocated chunk, for exponential chunk growth */ + dns_qpcell_t chunk_capacity; /*% what kind of transaction was most recently started [MT] */ enum { QP_NONE, QP_WRITE, QP_UPDATE } transaction_mode : 2; /*% compact the entire trie [MT] */ @@ -523,6 +537,8 @@ struct dns_qpmulti { dns_qp_t *rollback; /*% all snapshots of this trie */ ISC_LIST(dns_qpsnap_t) snapshots; + /*% refcount for memory reclamation */ + isc_refcount_t references; }; /*********************************************************************** diff --git a/lib/dns/qpcache.c b/lib/dns/qpcache.c index 032dc47a1a..40b7b206c7 100644 --- a/lib/dns/qpcache.c +++ b/lib/dns/qpcache.c @@ -519,7 +519,7 @@ qpcache_miss(qpcache_t *qpdb, dns_slabheader_t *newheader, size_t purgesize = 2 * (sizeof(qpcnode_t) + dns_name_size(&HEADERNODE(newheader)->name)) + - rdataset_size(newheader) + 12288; + rdataset_size(newheader) + QP_SAFETY_MARGIN; expire_lru_headers(qpdb, idx, purgesize, nlocktypep, tlocktypep DNS__DB_FLARG_PASS); diff --git a/lib/isc/include/isc/util.h b/lib/isc/include/isc/util.h index 9fed22df60..971f21dc1e 100644 --- a/lib/isc/include/isc/util.h +++ b/lib/isc/include/isc/util.h @@ -69,6 +69,8 @@ #define ISC_CLAMP(v, x, y) ((v) < (x) ? (x) : ((v) > (y) ? (y) : (v))) +#define ISC_MAX3(a, b, c) ISC_MAX(ISC_MAX((a), (b)), (c)) + /*% * The UNCONST() macro can be used to omit warnings produced by certain * compilers when operating with pointers declared with the const type qual- diff --git a/lib/isc/mem.c b/lib/isc/mem.c index ff34485ae0..c1b0ab7331 100644 --- a/lib/isc/mem.c +++ b/lib/isc/mem.c @@ -434,9 +434,10 @@ isc__mem_initialize(void) { void isc__mem_shutdown(void) { - /* should be called after an rcu_barrier() */ bool empty; + rcu_barrier(); + isc__mem_checkdestroyed(); LOCK(&contextslock);