From 915aa590b684815675bdd5f970e9cecbc20812a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Sun, 19 Apr 2026 21:36:43 +0200 Subject: [PATCH] Make isc_mem_isovermem() probabilistic Replace the hysteretic hi_water/lo_water switch with a stochastic check: always false below lo_water, always true at or above hi_water, linearly ramped probability in between. This spreads cache cleaning across many inserts instead of triggering a thundering herd once the hi_water mark is crossed (which causes every addrdataset to enter the LRU purge path simultaneously and serializes lookups behind the node write locks). The is_overmem atomic and its stores are no longer needed and are removed. The existing tests that asserted specific hysteretic state transitions are simplified to check only the deterministic boundaries. (cherry picked from commit ee24d2a1c3361dcc1c48fb29bb2e0b91bc3405e8) --- lib/isc/mem.c | 34 +++++++++++++++++++-------- tests/dns/rbtdb_test.c | 2 -- tests/isc/mem_test.c | 52 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 11 deletions(-) diff --git a/lib/isc/mem.c b/lib/isc/mem.c index 706fb1ca8c..f2b82de7eb 100644 --- a/lib/isc/mem.c +++ b/lib/isc/mem.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -151,7 +152,6 @@ struct isc_mem { atomic_size_t malloced; atomic_size_t maxmalloced; atomic_bool hi_called; - atomic_bool is_overmem; isc_mem_water_t water; void *water_arg; atomic_size_t hi_water; @@ -534,7 +534,6 @@ mem_create(isc_mem_t **ctxp, unsigned int flags, unsigned int jemalloc_flags) { atomic_init(&ctx->hi_water, 0); atomic_init(&ctx->lo_water, 0); atomic_init(&ctx->hi_called, false); - atomic_init(&ctx->is_overmem, false); for (size_t i = 0; i < STATS_BUCKETS + 1; i++) { atomic_init(&ctx->stats[i].gets, 0); @@ -786,9 +785,6 @@ hi_water(isc_mem_t *ctx) { return false; } - /* We are over water (for the first time) */ - atomic_store_release(&ctx->is_overmem, true); - return true; } @@ -810,9 +806,6 @@ lo_water(isc_mem_t *ctx) { return false; } - /* We are no longer overmem */ - atomic_store_release(&ctx->is_overmem, false); - return true; } @@ -1195,7 +1188,30 @@ bool isc_mem_isovermem(isc_mem_t *ctx) { REQUIRE(VALID_CONTEXT(ctx)); - return atomic_load_relaxed(&ctx->is_overmem); + size_t hiwater = atomic_load_relaxed(&ctx->hi_water); + if (hiwater == 0) { + return false; + } + + size_t inuse = atomic_load_relaxed(&ctx->inuse); + if (inuse >= hiwater) { + return true; + } + + size_t lowater = atomic_load_relaxed(&ctx->lo_water); + if (inuse <= lowater) { + return false; + } + + /* + * Between lo_water and hi_water, return true with a probability + * that ramps linearly from 0 at lo_water to 1 at hi_water. This + * spreads cache cleaning across many inserts instead of triggering + * a thundering herd once the hi_water mark is crossed. + */ + uint32_t prob = (uint32_t)(((uint64_t)(inuse - lowater) * 256) / + (hiwater - lowater)); + return isc_random8() < prob; } void diff --git a/tests/dns/rbtdb_test.c b/tests/dns/rbtdb_test.c index cfc2a5779b..451d5afed0 100644 --- a/tests/dns/rbtdb_test.c +++ b/tests/dns/rbtdb_test.c @@ -307,7 +307,6 @@ ISC_RUN_TEST_IMPL(overmempurge_bigrdata) { for (i = 0; !isc_mem_isovermem(mctx2) && i < (maxcache / 10); i++) { overmempurge_addrdataset(db, now, i, 50053, 0, false); } - assert_true(isc_mem_isovermem(mctx2)); /* * Then try to add the same number of entries, each has very large data. @@ -353,7 +352,6 @@ ISC_RUN_TEST_IMPL(overmempurge_longname) { for (i = 0; !isc_mem_isovermem(mctx2) && i < (maxcache / 10); i++) { overmempurge_addrdataset(db, now, i, 50053, 0, false); } - assert_true(isc_mem_isovermem(mctx2)); /* * Then try to add the same number of entries, each has very large data. diff --git a/tests/isc/mem_test.c b/tests/isc/mem_test.c index c2540df729..ca23960880 100644 --- a/tests/isc/mem_test.c +++ b/tests/isc/mem_test.c @@ -290,6 +290,57 @@ ISC_RUN_TEST_IMPL(isc_mem_reget) { isc_mem_put(mctx, data, REGET_SHRINK_SIZE); } +static bool +at_least_one_overmem(isc_mem_t *omctx) { + for (size_t i = 0; i < UINT16_MAX; i++) { + /* The overmem is probability based in this range */ + if (isc_mem_isovermem(omctx)) { + return true; + } + } + return false; +} + +static void +water(void *arg, int mark) { + UNUSED(arg); + UNUSED(mark); +} + +ISC_RUN_TEST_IMPL(isc_mem_overmem) { + isc_mem_t *omctx = NULL; + isc_mem_create(&omctx); + assert_non_null(omctx); + + isc_mem_setwater(omctx, water, NULL, 1024, 512); + + /* inuse <= lo_water is always false */ + void *data1 = isc_mem_allocate(omctx, 256); + assert_false(isc_mem_isovermem(omctx)); + + /* lo_water < inuse < hi_water might be true or false */ + void *data2 = isc_mem_allocate(omctx, 512); + assert_true(at_least_one_overmem(omctx)); + + /* hi_water <= inuse is always true */ + void *data3 = isc_mem_allocate(omctx, 512); + assert_true(isc_mem_isovermem(omctx)); + + /* lo_water < inuse < hi_water might be true or false */ + isc_mem_free(omctx, data2); + assert_true(at_least_one_overmem(omctx)); + + /* inuse <= lo_water is always false */ + isc_mem_free(omctx, data3); + assert_false(isc_mem_isovermem(omctx)); + + /* inuse == 0 is always false */ + isc_mem_free(omctx, data1); + assert_false(isc_mem_isovermem(omctx)); + + isc_mem_destroy(&omctx); +} + #if ISC_MEM_TRACKLINES /* test mem with no flags */ @@ -501,6 +552,7 @@ ISC_TEST_ENTRY(isc_mem_total) ISC_TEST_ENTRY(isc_mem_inuse) ISC_TEST_ENTRY(isc_mem_zeroget) ISC_TEST_ENTRY(isc_mem_reget) +ISC_TEST_ENTRY(isc_mem_overmem) #if !defined(__SANITIZE_THREAD__) ISC_TEST_ENTRY(isc_mem_benchmark)