From 24ac3392d99bf2d22976290f76ddad4317597349 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Sun, 19 Apr 2026 21:36:43 +0200 Subject: [PATCH] Make isc_mem_isovermem() probabilistic Replace the hysteretic hi_water/lo_water switch with a stochastic check: always false below lo_water, always true at or above hi_water, linearly ramped probability in between. This spreads cache cleaning across many inserts instead of triggering a thundering herd once the hi_water mark is crossed (which causes every addrdataset to enter the LRU purge path simultaneously and serializes lookups behind the node write locks). The is_overmem atomic and its stores are no longer needed and are removed. The existing tests that asserted specific hysteretic state transitions are simplified to check only the deterministic boundaries. --- lib/isc/mem.c | 67 +++++++++++++++---------------------------- tests/dns/qpdb_test.c | 44 ++++++++++++++-------------- tests/isc/mem_test.c | 27 +++++++++++------ 3 files changed, 64 insertions(+), 74 deletions(-) diff --git a/lib/isc/mem.c b/lib/isc/mem.c index 9385d4a83a..2ef4e28583 100644 --- a/lib/isc/mem.c +++ b/lib/isc/mem.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -126,7 +127,6 @@ static isc_mutex_t contextslock; typedef union { struct { atomic_int_fast64_t inuse; - atomic_bool is_overmem; }; char padding[ISC_OS_CACHELINE_SIZE]; } isc__mem_stat_t; @@ -620,7 +620,6 @@ mem_create(const char *name, isc_mem_t **ctxp, unsigned int debugging, for (size_t i = 0; i < ARRAY_SIZE(ctx->stat_s); i++) { atomic_init(&ctx->stat_s[i].inuse, 0); - atomic_init(&ctx->stat_s[i].is_overmem, false); } /* Reserve the [-1] index for ISC_TID_UNKNOWN */ @@ -1020,50 +1019,30 @@ bool isc_mem_isovermem(isc_mem_t *ctx) { REQUIRE(VALID_CONTEXT(ctx)); - int32_t tid = isc_tid(); - - bool is_overmem = atomic_load_relaxed(&ctx->stat[tid].is_overmem); - - if (!is_overmem) { - /* We are not overmem, check whether we should be? */ - size_t hiwater = atomic_load_relaxed(&ctx->hi_water); - if (hiwater == 0) { - return false; - } - - size_t inuse = isc_mem_inuse(ctx); - if (inuse <= hiwater) { - return false; - } - - if ((ctx->debugging & ISC_MEM_DEBUGUSAGE) != 0) { - fprintf(stderr, - "overmem %s mctx %p inuse %zu hi_water %zu\n", - ctx->name, ctx, inuse, hiwater); - } - - atomic_store_relaxed(&ctx->stat[tid].is_overmem, true); - return true; - } else { - /* We are overmem, check whether we should not be? */ - size_t lowater = atomic_load_relaxed(&ctx->lo_water); - if (lowater == 0) { - return false; - } - - size_t inuse = isc_mem_inuse(ctx); - if (inuse >= lowater) { - return true; - } - - if ((ctx->debugging & ISC_MEM_DEBUGUSAGE) != 0) { - fprintf(stderr, - "overmem %s mctx %p inuse %zu lo_water %zu\n", - ctx->name, ctx, inuse, lowater); - } - atomic_store_relaxed(&ctx->stat[tid].is_overmem, false); + size_t hiwater = atomic_load_relaxed(&ctx->hi_water); + if (hiwater == 0) { return false; } + + size_t inuse = isc_mem_inuse(ctx); + if (inuse >= hiwater) { + return true; + } + + size_t lowater = atomic_load_relaxed(&ctx->lo_water); + if (inuse <= lowater) { + return false; + } + + /* + * Between lo_water and hi_water, return true with a probability + * that ramps linearly from 0 at lo_water to 1 at hi_water. This + * spreads cache cleaning across many inserts instead of triggering + * a thundering herd once the hi_water mark is crossed. + */ + uint32_t prob = (uint32_t)(((uint64_t)(inuse - lowater) * 256) / + (hiwater - lowater)); + return isc_random8() < prob; } const char * diff --git a/tests/dns/qpdb_test.c b/tests/dns/qpdb_test.c index de3ea1245b..8293c971aa 100644 --- a/tests/dns/qpdb_test.c +++ b/tests/dns/qpdb_test.c @@ -128,7 +128,7 @@ ISC_LOOP_TEST_IMPL(overmempurge_bigrdata) { dns_db_t *db = NULL; isc_mem_t *mctx = NULL; isc_stdtime_t now = isc_stdtime_now(); - size_t i; + size_t i = 0; isc_mem_create("test", &mctx); @@ -140,21 +140,21 @@ ISC_LOOP_TEST_IMPL(overmempurge_bigrdata) { isc_mem_setwater(mctx, hiwater, lowater); /* - * Add cache entries with minimum size of data until 'overmem' - * condition is triggered. - * This should eventually happen, but we also limit the number of - * iteration to avoid an infinite loop in case something gets wrong. + * Add a lot of data entries sufficient to push the context + * above the hi_water mark. */ - for (i = 0; !isc_mem_isovermem(mctx) && i < (maxcache / 10); i++) { - overmempurge_addrdataset(db, now, i, 50053, 0, false); + while (isc_mem_inuse(mctx) < hiwater) { + overmempurge_addrdataset(db, now, i, 50053, 0, true); + i++; } - assert_true(isc_mem_isovermem(mctx)); + assert_true(isc_mem_inuse(mctx) >= hiwater); + assert_true(isc_mem_inuse(mctx) < maxcache); /* * Then try to add the same number of entries, each has very large data. - * 'overmem purge' should keep the total cache size from exceeding - * the 'hiwater' mark too much. So we should be able to assume the - * cache size doesn't reach the "max". + * Probabilistic LRU cleaning should keep the total cache size from + * exceeding the 'hiwater' mark too much. So we should be able to + * assume the cache size doesn't reach the "max". */ while (i-- > 0) { overmempurge_addrdataset(db, now, i, 50054, @@ -180,7 +180,7 @@ ISC_LOOP_TEST_IMPL(overmempurge_longname) { dns_db_t *db = NULL; isc_mem_t *mctx = NULL; isc_stdtime_t now = isc_stdtime_now(); - size_t i; + size_t i = 0; isc_mem_create("test", &mctx); @@ -192,21 +192,21 @@ ISC_LOOP_TEST_IMPL(overmempurge_longname) { isc_mem_setwater(mctx, hiwater, lowater); /* - * Add cache entries with minimum size of data until 'overmem' - * condition is triggered. - * This should eventually happen, but we also limit the number of - * iteration to avoid an infinite loop in case something gets wrong. + * Add a lot of data entries sufficient to push the context + * above the hi_water mark. */ - for (i = 0; !isc_mem_isovermem(mctx) && i < (maxcache / 10); i++) { - overmempurge_addrdataset(db, now, i, 50053, 0, false); + while (isc_mem_inuse(mctx) < hiwater) { + overmempurge_addrdataset(db, now, i, 50053, 0, true); + i++; } - assert_true(isc_mem_isovermem(mctx)); + assert_true(isc_mem_inuse(mctx) >= hiwater); + assert_true(isc_mem_inuse(mctx) < maxcache); /* * Then try to add the same number of entries, each has very long name. - * 'overmem purge' should keep the total cache size from not exceeding - * the 'hiwater' mark too much. So we should be able to assume the cache - * size doesn't reach the "max". + * Probabilistic LRU cleaning should keep the total cache size from + * exceeding the 'hiwater' mark too much. So we should be able to + * assume the cache size doesn't reach the "max". */ while (i-- > 0) { overmempurge_addrdataset(db, now, i, 50054, 0, true); diff --git a/tests/isc/mem_test.c b/tests/isc/mem_test.c index 7724488935..5462b628d4 100644 --- a/tests/isc/mem_test.c +++ b/tests/isc/mem_test.c @@ -291,6 +291,17 @@ ISC_RUN_TEST_IMPL(isc_mem_reallocate) { isc_mem_free(isc_g_mctx, data); } +static bool +at_least_one_overmem(isc_mem_t *mctx) { + for (size_t i = 0; i < UINT16_MAX; i++) { + /* The overmem is probability based in this range */ + if (isc_mem_isovermem(mctx)) { + return true; + } + } + return false; +} + ISC_RUN_TEST_IMPL(isc_mem_overmem) { isc_mem_t *mctx = NULL; isc_mem_create("test", &mctx); @@ -298,27 +309,27 @@ ISC_RUN_TEST_IMPL(isc_mem_overmem) { isc_mem_setwater(mctx, 1024, 512); - /* inuse < lo_water */ + /* inuse <= lo_water is always false */ void *data1 = isc_mem_allocate(mctx, 256); assert_false(isc_mem_isovermem(mctx)); - /* lo_water < inuse < hi_water */ + /* lo_water < inuse < hi_water might be true or false */ void *data2 = isc_mem_allocate(mctx, 512); - assert_false(isc_mem_isovermem(mctx)); + assert_true(at_least_one_overmem(mctx)); - /* hi_water < inuse */ + /* hi_water <= inuse is always true */ void *data3 = isc_mem_allocate(mctx, 512); assert_true(isc_mem_isovermem(mctx)); - /* lo_water < inuse < hi_water */ + /* lo_water < inuse < hi_water might be true or false */ isc_mem_free(mctx, data2); - assert_true(isc_mem_isovermem(mctx)); + assert_true(at_least_one_overmem(mctx)); - /* inuse < lo_water */ + /* inuse <= lo_water is always false */ isc_mem_free(mctx, data3); assert_false(isc_mem_isovermem(mctx)); - /* inuse == 0 */ + /* inuse == 0 is always false */ isc_mem_free(mctx, data1); assert_false(isc_mem_isovermem(mctx));