mirror of
https://github.com/isc-projects/bind9.git
synced 2026-06-11 12:20:00 -04:00
chg: usr: Fix CPU spikes and slow queries when cache approaches memory limit
Spread cache cleanup probabilistically to avoid CPU usage spikes and a drop in query throughput. Closes isc-projects/bind9#5891 Merge branch '5891-improve-overmem-cleaning' into 'security-main' See merge request isc-private/bind9!1002
This commit is contained in:
commit
2e7e9f51db
3 changed files with 64 additions and 74 deletions
|
|
@ -30,6 +30,7 @@
|
|||
#include <isc/mutex.h>
|
||||
#include <isc/os.h>
|
||||
#include <isc/overflow.h>
|
||||
#include <isc/random.h>
|
||||
#include <isc/refcount.h>
|
||||
#include <isc/stdtime.h>
|
||||
#include <isc/strerr.h>
|
||||
|
|
@ -126,7 +127,6 @@ static isc_mutex_t contextslock;
|
|||
typedef union {
|
||||
struct {
|
||||
atomic_int_fast64_t inuse;
|
||||
atomic_bool is_overmem;
|
||||
};
|
||||
char padding[ISC_OS_CACHELINE_SIZE];
|
||||
} isc__mem_stat_t;
|
||||
|
|
@ -620,7 +620,6 @@ mem_create(const char *name, isc_mem_t **ctxp, unsigned int debugging,
|
|||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(ctx->stat_s); i++) {
|
||||
atomic_init(&ctx->stat_s[i].inuse, 0);
|
||||
atomic_init(&ctx->stat_s[i].is_overmem, false);
|
||||
}
|
||||
|
||||
/* Reserve the [-1] index for ISC_TID_UNKNOWN */
|
||||
|
|
@ -1020,50 +1019,30 @@ bool
|
|||
isc_mem_isovermem(isc_mem_t *ctx) {
|
||||
REQUIRE(VALID_CONTEXT(ctx));
|
||||
|
||||
int32_t tid = isc_tid();
|
||||
|
||||
bool is_overmem = atomic_load_relaxed(&ctx->stat[tid].is_overmem);
|
||||
|
||||
if (!is_overmem) {
|
||||
/* We are not overmem, check whether we should be? */
|
||||
size_t hiwater = atomic_load_relaxed(&ctx->hi_water);
|
||||
if (hiwater == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t inuse = isc_mem_inuse(ctx);
|
||||
if (inuse <= hiwater) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((ctx->debugging & ISC_MEM_DEBUGUSAGE) != 0) {
|
||||
fprintf(stderr,
|
||||
"overmem %s mctx %p inuse %zu hi_water %zu\n",
|
||||
ctx->name, ctx, inuse, hiwater);
|
||||
}
|
||||
|
||||
atomic_store_relaxed(&ctx->stat[tid].is_overmem, true);
|
||||
return true;
|
||||
} else {
|
||||
/* We are overmem, check whether we should not be? */
|
||||
size_t lowater = atomic_load_relaxed(&ctx->lo_water);
|
||||
if (lowater == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t inuse = isc_mem_inuse(ctx);
|
||||
if (inuse >= lowater) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if ((ctx->debugging & ISC_MEM_DEBUGUSAGE) != 0) {
|
||||
fprintf(stderr,
|
||||
"overmem %s mctx %p inuse %zu lo_water %zu\n",
|
||||
ctx->name, ctx, inuse, lowater);
|
||||
}
|
||||
atomic_store_relaxed(&ctx->stat[tid].is_overmem, false);
|
||||
size_t hiwater = atomic_load_relaxed(&ctx->hi_water);
|
||||
if (hiwater == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t inuse = isc_mem_inuse(ctx);
|
||||
if (inuse >= hiwater) {
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t lowater = atomic_load_relaxed(&ctx->lo_water);
|
||||
if (inuse <= lowater) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Between lo_water and hi_water, return true with a probability
|
||||
* that ramps linearly from 0 at lo_water to 1 at hi_water. This
|
||||
* spreads cache cleaning across many inserts instead of triggering
|
||||
* a thundering herd once the hi_water mark is crossed.
|
||||
*/
|
||||
uint32_t prob = (uint32_t)(((uint64_t)(inuse - lowater) * 256) /
|
||||
(hiwater - lowater));
|
||||
return isc_random8() < prob;
|
||||
}
|
||||
|
||||
const char *
|
||||
|
|
|
|||
|
|
@ -128,7 +128,7 @@ ISC_LOOP_TEST_IMPL(overmempurge_bigrdata) {
|
|||
dns_db_t *db = NULL;
|
||||
isc_mem_t *mctx = NULL;
|
||||
isc_stdtime_t now = isc_stdtime_now();
|
||||
size_t i;
|
||||
size_t i = 0;
|
||||
|
||||
isc_mem_create("test", &mctx);
|
||||
|
||||
|
|
@ -140,21 +140,21 @@ ISC_LOOP_TEST_IMPL(overmempurge_bigrdata) {
|
|||
isc_mem_setwater(mctx, hiwater, lowater);
|
||||
|
||||
/*
|
||||
* Add cache entries with minimum size of data until 'overmem'
|
||||
* condition is triggered.
|
||||
* This should eventually happen, but we also limit the number of
|
||||
* iteration to avoid an infinite loop in case something gets wrong.
|
||||
* Add a lot of data entries sufficient to push the context
|
||||
* above the hi_water mark.
|
||||
*/
|
||||
for (i = 0; !isc_mem_isovermem(mctx) && i < (maxcache / 10); i++) {
|
||||
overmempurge_addrdataset(db, now, i, 50053, 0, false);
|
||||
while (isc_mem_inuse(mctx) < hiwater) {
|
||||
overmempurge_addrdataset(db, now, i, 50053, 0, true);
|
||||
i++;
|
||||
}
|
||||
assert_true(isc_mem_isovermem(mctx));
|
||||
assert_true(isc_mem_inuse(mctx) >= hiwater);
|
||||
assert_true(isc_mem_inuse(mctx) < maxcache);
|
||||
|
||||
/*
|
||||
* Then try to add the same number of entries, each has very large data.
|
||||
* 'overmem purge' should keep the total cache size from exceeding
|
||||
* the 'hiwater' mark too much. So we should be able to assume the
|
||||
* cache size doesn't reach the "max".
|
||||
* Probabilistic LRU cleaning should keep the total cache size from
|
||||
* exceeding the 'hiwater' mark too much. So we should be able to
|
||||
* assume the cache size doesn't reach the "max".
|
||||
*/
|
||||
while (i-- > 0) {
|
||||
overmempurge_addrdataset(db, now, i, 50054,
|
||||
|
|
@ -180,7 +180,7 @@ ISC_LOOP_TEST_IMPL(overmempurge_longname) {
|
|||
dns_db_t *db = NULL;
|
||||
isc_mem_t *mctx = NULL;
|
||||
isc_stdtime_t now = isc_stdtime_now();
|
||||
size_t i;
|
||||
size_t i = 0;
|
||||
|
||||
isc_mem_create("test", &mctx);
|
||||
|
||||
|
|
@ -192,21 +192,21 @@ ISC_LOOP_TEST_IMPL(overmempurge_longname) {
|
|||
isc_mem_setwater(mctx, hiwater, lowater);
|
||||
|
||||
/*
|
||||
* Add cache entries with minimum size of data until 'overmem'
|
||||
* condition is triggered.
|
||||
* This should eventually happen, but we also limit the number of
|
||||
* iteration to avoid an infinite loop in case something gets wrong.
|
||||
* Add a lot of data entries sufficient to push the context
|
||||
* above the hi_water mark.
|
||||
*/
|
||||
for (i = 0; !isc_mem_isovermem(mctx) && i < (maxcache / 10); i++) {
|
||||
overmempurge_addrdataset(db, now, i, 50053, 0, false);
|
||||
while (isc_mem_inuse(mctx) < hiwater) {
|
||||
overmempurge_addrdataset(db, now, i, 50053, 0, true);
|
||||
i++;
|
||||
}
|
||||
assert_true(isc_mem_isovermem(mctx));
|
||||
assert_true(isc_mem_inuse(mctx) >= hiwater);
|
||||
assert_true(isc_mem_inuse(mctx) < maxcache);
|
||||
|
||||
/*
|
||||
* Then try to add the same number of entries, each has very long name.
|
||||
* 'overmem purge' should keep the total cache size from not exceeding
|
||||
* the 'hiwater' mark too much. So we should be able to assume the cache
|
||||
* size doesn't reach the "max".
|
||||
* Probabilistic LRU cleaning should keep the total cache size from
|
||||
* exceeding the 'hiwater' mark too much. So we should be able to
|
||||
* assume the cache size doesn't reach the "max".
|
||||
*/
|
||||
while (i-- > 0) {
|
||||
overmempurge_addrdataset(db, now, i, 50054, 0, true);
|
||||
|
|
|
|||
|
|
@ -291,6 +291,17 @@ ISC_RUN_TEST_IMPL(isc_mem_reallocate) {
|
|||
isc_mem_free(isc_g_mctx, data);
|
||||
}
|
||||
|
||||
static bool
|
||||
at_least_one_overmem(isc_mem_t *mctx) {
|
||||
for (size_t i = 0; i < UINT16_MAX; i++) {
|
||||
/* The overmem is probability based in this range */
|
||||
if (isc_mem_isovermem(mctx)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
ISC_RUN_TEST_IMPL(isc_mem_overmem) {
|
||||
isc_mem_t *mctx = NULL;
|
||||
isc_mem_create("test", &mctx);
|
||||
|
|
@ -298,27 +309,27 @@ ISC_RUN_TEST_IMPL(isc_mem_overmem) {
|
|||
|
||||
isc_mem_setwater(mctx, 1024, 512);
|
||||
|
||||
/* inuse < lo_water */
|
||||
/* inuse <= lo_water is always false */
|
||||
void *data1 = isc_mem_allocate(mctx, 256);
|
||||
assert_false(isc_mem_isovermem(mctx));
|
||||
|
||||
/* lo_water < inuse < hi_water */
|
||||
/* lo_water < inuse < hi_water might be true or false */
|
||||
void *data2 = isc_mem_allocate(mctx, 512);
|
||||
assert_false(isc_mem_isovermem(mctx));
|
||||
assert_true(at_least_one_overmem(mctx));
|
||||
|
||||
/* hi_water < inuse */
|
||||
/* hi_water <= inuse is always true */
|
||||
void *data3 = isc_mem_allocate(mctx, 512);
|
||||
assert_true(isc_mem_isovermem(mctx));
|
||||
|
||||
/* lo_water < inuse < hi_water */
|
||||
/* lo_water < inuse < hi_water might be true or false */
|
||||
isc_mem_free(mctx, data2);
|
||||
assert_true(isc_mem_isovermem(mctx));
|
||||
assert_true(at_least_one_overmem(mctx));
|
||||
|
||||
/* inuse < lo_water */
|
||||
/* inuse <= lo_water is always false */
|
||||
isc_mem_free(mctx, data3);
|
||||
assert_false(isc_mem_isovermem(mctx));
|
||||
|
||||
/* inuse == 0 */
|
||||
/* inuse == 0 is always false */
|
||||
isc_mem_free(mctx, data1);
|
||||
assert_false(isc_mem_isovermem(mctx));
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue