[9.18] chg: usr: Fix CPU spikes and slow queries when cache approaches memory limit

When the cache grew close to the configured max-cache-size, every subsequent
entry triggered all worker threads to run cache cleanup at once, causing CPU
spikes and a drop in query throughput. Cleanup is now spread probabilistically
across inserts as memory approaches the limit, so the work is distributed evenly
instead of piling up at the threshold.

Backport of MR !1002

Merge branch '5891-improve-overmem-cleaning-9.18' into 'security-bind-9.18'

See merge request isc-private/bind9!1008
This commit is contained in:
Ondřej Surý 2026-05-05 15:20:43 +02:00 committed by Michał Kępień
commit 874a19c71b
No known key found for this signature in database
3 changed files with 77 additions and 11 deletions

View file

@ -29,6 +29,7 @@
#include <isc/once.h>
#include <isc/os.h>
#include <isc/print.h>
#include <isc/random.h>
#include <isc/refcount.h>
#include <isc/string.h>
#include <isc/types.h>
@ -151,7 +152,6 @@ struct isc_mem {
atomic_size_t malloced;
atomic_size_t maxmalloced;
atomic_bool hi_called;
atomic_bool is_overmem;
isc_mem_water_t water;
void *water_arg;
atomic_size_t hi_water;
@ -534,7 +534,6 @@ mem_create(isc_mem_t **ctxp, unsigned int flags, unsigned int jemalloc_flags) {
atomic_init(&ctx->hi_water, 0);
atomic_init(&ctx->lo_water, 0);
atomic_init(&ctx->hi_called, false);
atomic_init(&ctx->is_overmem, false);
for (size_t i = 0; i < STATS_BUCKETS + 1; i++) {
atomic_init(&ctx->stats[i].gets, 0);
@ -786,9 +785,6 @@ hi_water(isc_mem_t *ctx) {
return false;
}
/* We are over water (for the first time) */
atomic_store_release(&ctx->is_overmem, true);
return true;
}
@ -810,9 +806,6 @@ lo_water(isc_mem_t *ctx) {
return false;
}
/* We are no longer overmem */
atomic_store_release(&ctx->is_overmem, false);
return true;
}
@ -1195,7 +1188,30 @@ bool
isc_mem_isovermem(isc_mem_t *ctx) {
REQUIRE(VALID_CONTEXT(ctx));
return atomic_load_relaxed(&ctx->is_overmem);
size_t hiwater = atomic_load_relaxed(&ctx->hi_water);
if (hiwater == 0) {
return false;
}
size_t inuse = atomic_load_relaxed(&ctx->inuse);
if (inuse >= hiwater) {
return true;
}
size_t lowater = atomic_load_relaxed(&ctx->lo_water);
if (inuse <= lowater) {
return false;
}
/*
* Between lo_water and hi_water, return true with a probability
* that ramps linearly from 0 at lo_water to 1 at hi_water. This
* spreads cache cleaning across many inserts instead of triggering
* a thundering herd once the hi_water mark is crossed.
*/
uint32_t prob = (uint32_t)(((uint64_t)(inuse - lowater) * 256) /
(hiwater - lowater));
return isc_random8() < prob;
}
void

View file

@ -307,7 +307,6 @@ ISC_RUN_TEST_IMPL(overmempurge_bigrdata) {
for (i = 0; !isc_mem_isovermem(mctx2) && i < (maxcache / 10); i++) {
overmempurge_addrdataset(db, now, i, 50053, 0, false);
}
assert_true(isc_mem_isovermem(mctx2));
/*
* Then try to add the same number of entries, each has very large data.
@ -353,7 +352,6 @@ ISC_RUN_TEST_IMPL(overmempurge_longname) {
for (i = 0; !isc_mem_isovermem(mctx2) && i < (maxcache / 10); i++) {
overmempurge_addrdataset(db, now, i, 50053, 0, false);
}
assert_true(isc_mem_isovermem(mctx2));
/*
* Then try to add the same number of entries, each has very large data.

View file

@ -290,6 +290,57 @@ ISC_RUN_TEST_IMPL(isc_mem_reget) {
isc_mem_put(mctx, data, REGET_SHRINK_SIZE);
}
static bool
at_least_one_overmem(isc_mem_t *omctx) {
for (size_t i = 0; i < UINT16_MAX; i++) {
/* The overmem is probability based in this range */
if (isc_mem_isovermem(omctx)) {
return true;
}
}
return false;
}
static void
water(void *arg, int mark) {
UNUSED(arg);
UNUSED(mark);
}
ISC_RUN_TEST_IMPL(isc_mem_overmem) {
isc_mem_t *omctx = NULL;
isc_mem_create(&omctx);
assert_non_null(omctx);
isc_mem_setwater(omctx, water, NULL, 1024, 512);
/* inuse <= lo_water is always false */
void *data1 = isc_mem_allocate(omctx, 256);
assert_false(isc_mem_isovermem(omctx));
/* lo_water < inuse < hi_water might be true or false */
void *data2 = isc_mem_allocate(omctx, 512);
assert_true(at_least_one_overmem(omctx));
/* hi_water <= inuse is always true */
void *data3 = isc_mem_allocate(omctx, 512);
assert_true(isc_mem_isovermem(omctx));
/* lo_water < inuse < hi_water might be true or false */
isc_mem_free(omctx, data2);
assert_true(at_least_one_overmem(omctx));
/* inuse <= lo_water is always false */
isc_mem_free(omctx, data3);
assert_false(isc_mem_isovermem(omctx));
/* inuse == 0 is always false */
isc_mem_free(omctx, data1);
assert_false(isc_mem_isovermem(omctx));
isc_mem_destroy(&omctx);
}
#if ISC_MEM_TRACKLINES
/* test mem with no flags */
@ -501,6 +552,7 @@ ISC_TEST_ENTRY(isc_mem_total)
ISC_TEST_ENTRY(isc_mem_inuse)
ISC_TEST_ENTRY(isc_mem_zeroget)
ISC_TEST_ENTRY(isc_mem_reget)
ISC_TEST_ENTRY(isc_mem_overmem)
#if !defined(__SANITIZE_THREAD__)
ISC_TEST_ENTRY(isc_mem_benchmark)