chg: usr: Switch to LRU-only cache eviction, enforce minimum cache size

Busy resolvers will now gradually fill the configured :any:max-cache-size
before entries start being evicted. Previously, expired records were
proactively removed based on their TTL, which kept memory usage below the
configured limit but added overhead. Cache eviction now relies solely on the
SIEVE-LRU mechanism, which has matured to the point where TTL-based cleaning
is no longer necessary.

Setting :any:max-cache-size to unlimited or 0 is no longer supported
and falls back to the default (90% of physical memory).

Merge branch 'ondrej/no-ttl-based-cleaning' into 'main'

See merge request isc-projects/bind9!11459
This commit is contained in:
Ondřej Surý 2026-03-30 22:29:17 +02:00
commit 8721a89b64
12 changed files with 189 additions and 334 deletions

View file

@ -18,14 +18,13 @@
#include <limits.h>
#include <signal.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <dns/acl.h>
#ifdef HAVE_DNSTAP
#include <fstrm.h>
#endif
@ -60,6 +59,7 @@
#include <isc/timer.h>
#include <isc/util.h>
#include <dns/acl.h>
#include <dns/adb.h>
#include <dns/badcache.h>
#include <dns/cache.h>
@ -119,6 +119,7 @@
#include <named/config.h>
#include <named/control.h>
#include <named/globals.h>
#include <named/nzd.h>
#if defined(HAVE_GEOIP2)
#include <named/geoip.h>
@ -152,10 +153,6 @@
#define SIZE_MAX ((size_t)(-1))
#endif /* ifndef SIZE_MAX */
#ifndef SIZE_AS_PERCENT
#define SIZE_AS_PERCENT ((size_t)(-2))
#endif /* ifndef SIZE_AS_PERCENT */
/* RFC7828 defines timeout as 16-bit value specified in units of 100
* milliseconds, so the maximum and minimum advertised and keepalive
* timeouts are capped by the data type (it's ~109 minutes)
@ -3575,6 +3572,113 @@ named_register_one_plugin(const cfg_obj_t *config, const cfg_obj_t *obj,
return result;
}
static size_t
sanitized_max_cache_size(const cfg_obj_t *obj, uint64_t value);
static size_t
max_cache_size_as_percent(const cfg_obj_t *obj, uint32_t percent) {
uint64_t totalphys = isc_meminfo_totalphys();
if (totalphys == 0) {
cfg_obj_log(obj, ISC_LOG_ERROR,
"Unable to determine amount of physical "
"memory, setting 'max-cache-size' to the "
"minimum value");
return DNS_CACHE_MINSIZE;
}
uint64_t max_cache_size = totalphys * percent / 100;
cfg_obj_log(obj, ISC_LOG_INFO,
"'max-cache-size %d%%' "
"- setting to %" PRIu64 "MB "
"(out of %" PRIu64 "MB)",
percent, (uint64_t)(max_cache_size / (1024 * 1024)),
totalphys / (1024 * 1024));
return sanitized_max_cache_size(obj, max_cache_size);
}
static size_t
default_max_cache_size(const dns_view_t *view, const cfg_obj_t *obj) {
if (view->recursion) {
return max_cache_size_as_percent(obj, 90);
} else {
return DNS_CACHE_MINSIZE;
}
}
static size_t
sanitized_max_cache_size(const cfg_obj_t *obj, uint64_t value) {
if (value > SIZE_MAX) {
cfg_obj_log(obj, ISC_LOG_WARNING,
"'max-cache-size %" PRIu64 "' "
"is too large for this system; reducing to %lu",
value, (unsigned long)SIZE_MAX);
return SIZE_MAX;
}
if (value < DNS_CACHE_MINSIZE) {
cfg_obj_log(obj, ISC_LOG_WARNING,
"'max-cache-size %" PRIu64 "' "
"is too small; setting to %" PRIu64,
value, DNS_CACHE_MINSIZE);
return DNS_CACHE_MINSIZE;
}
return value;
}
static size_t
configure_max_cache_size(dns_view_t *view, const cfg_obj_t *maps[4]) {
isc_result_t result;
const cfg_obj_t *obj = NULL;
const char *str = NULL;
if (named_g_maxcachesize != 0) {
/*
* If "-T maxcachesize=..." is in effect, it overrides any
* other "max-cache-size" setting found in configuration,
* either implicit or explicit. For simplicity, the value
* passed to that command line option is always treated as
* the number of bytes to set "max-cache-size" to.
*/
return named_g_maxcachesize;
}
obj = NULL;
result = named_config_get(maps, "max-cache-size", &obj);
INSIST(result == ISC_R_SUCCESS);
if (cfg_obj_isstring(obj) &&
strcasecmp(cfg_obj_asstring(obj), "default") == 0)
{
/*
* The default for a view with recursion
* is 90% of memory. With no recursion,
* it's the minimum cache size allowed by
* dns_cache_setcachesize().
*/
return default_max_cache_size(view, obj);
} else if (cfg_obj_isstring(obj)) {
str = cfg_obj_asstring(obj);
INSIST(strcasecmp(str, "unlimited") == 0);
cfg_obj_log(obj, ISC_LOG_WARNING,
"'max-cache-size' can't be unlimited; "
"falling back to default");
return default_max_cache_size(view, obj);
} else if (cfg_obj_ispercentage(obj)) {
return max_cache_size_as_percent(obj,
cfg_obj_aspercentage(obj));
} else if (cfg_obj_isuint64(obj)) {
uint64_t value = cfg_obj_asuint64(obj);
return sanitized_max_cache_size(obj, value);
} else {
UNREACHABLE();
}
}
static const char *const response_synonyms[] = { "response", NULL };
/*
@ -3613,7 +3717,6 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config,
dns_cache_t *cache = NULL;
isc_result_t result;
size_t max_cache_size;
uint32_t max_cache_size_percent = 0;
size_t max_adb_size;
uint32_t lame_ttl, fail_ttl;
uint32_t max_stale_ttl = 0;
@ -3820,78 +3923,7 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config,
INSIST(result == ISC_R_SUCCESS);
view->recursion = cfg_obj_asboolean(obj);
if (named_g_maxcachesize != 0) {
/*
* If "-T maxcachesize=..." is in effect, it overrides any
* other "max-cache-size" setting found in configuration,
* either implicit or explicit. For simplicity, the value
* passed to that command line option is always treated as
* the number of bytes to set "max-cache-size" to.
*/
max_cache_size = named_g_maxcachesize;
} else {
obj = NULL;
result = named_config_get(maps, "max-cache-size", &obj);
INSIST(result == ISC_R_SUCCESS);
if (cfg_obj_isstring(obj) &&
strcasecmp(cfg_obj_asstring(obj), "default") == 0)
{
/*
* The default for a view with recursion
* is 90% of memory. With no recursion,
* it's the minimum cache size allowed by
* dns_cache_setcachesize().
*/
if (view->recursion) {
max_cache_size = SIZE_AS_PERCENT;
max_cache_size_percent = 90;
} else {
max_cache_size = 1;
}
} else if (cfg_obj_isstring(obj)) {
str = cfg_obj_asstring(obj);
INSIST(strcasecmp(str, "unlimited") == 0);
max_cache_size = 0;
} else if (cfg_obj_ispercentage(obj)) {
max_cache_size = SIZE_AS_PERCENT;
max_cache_size_percent = cfg_obj_aspercentage(obj);
} else if (cfg_obj_isuint64(obj)) {
uint64_t value = cfg_obj_asuint64(obj);
if (value > SIZE_MAX) {
cfg_obj_log(obj, ISC_LOG_WARNING,
"'max-cache-size "
"%" PRIu64 "' "
"is too large for this "
"system; reducing to %lu",
value, (unsigned long)SIZE_MAX);
value = SIZE_MAX;
}
max_cache_size = (size_t)value;
} else {
UNREACHABLE();
}
}
if (max_cache_size == SIZE_AS_PERCENT) {
uint64_t totalphys = isc_meminfo_totalphys();
max_cache_size =
(size_t)(totalphys * max_cache_size_percent / 100);
if (totalphys == 0) {
cfg_obj_log(obj, ISC_LOG_WARNING,
"Unable to determine amount of physical "
"memory, setting 'max-cache-size' to "
"unlimited");
} else {
cfg_obj_log(obj, ISC_LOG_INFO,
"'max-cache-size %d%%' "
"- setting to %" PRIu64 "MB "
"(out of %" PRIu64 "MB)",
max_cache_size_percent,
(uint64_t)(max_cache_size / (1024 * 1024)),
totalphys / (1024 * 1024));
}
}
max_cache_size = configure_max_cache_size(view, maps);
/*
* Since both the delegation DB and ADB uses 1/8 of the
@ -4329,25 +4361,21 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config,
* Set the ADB cache size to 1/8th of the max-cache-size or
* MAX_ADB_SIZE_FOR_CACHESHARE when the cache is shared.
*/
max_adb_size = 0;
if (cache_size_slice != 0U) {
max_adb_size = cache_size_slice;
if (max_adb_size == 0U) {
max_adb_size = 1; /* Force minimum. */
}
if (view != nsc->primaryview &&
max_adb_size > MAX_ADB_SIZE_FOR_CACHESHARE)
{
max_adb_size = MAX_ADB_SIZE_FOR_CACHESHARE;
if (!nsc->adbsizeadjusted) {
dns_view_getadb(nsc->primaryview, &adb);
if (adb != NULL) {
dns_adb_setadbsize(
adb,
MAX_ADB_SIZE_FOR_CACHESHARE);
nsc->adbsizeadjusted = true;
dns_adb_detach(&adb);
}
max_adb_size = cache_size_slice;
if (max_adb_size < DNS_ADB_MINADBSIZE) {
max_adb_size = DNS_ADB_MINADBSIZE; /* Force minimum. */
}
if (view != nsc->primaryview &&
max_adb_size > MAX_ADB_SIZE_FOR_CACHESHARE)
{
max_adb_size = MAX_ADB_SIZE_FOR_CACHESHARE;
if (!nsc->adbsizeadjusted) {
dns_view_getadb(nsc->primaryview, &adb);
if (adb != NULL) {
dns_adb_setadbsize(adb,
MAX_ADB_SIZE_FOR_CACHESHARE);
nsc->adbsizeadjusted = true;
dns_adb_detach(&adb);
}
}
}

View file

@ -3832,9 +3832,12 @@ system.
- 2 MB for views with :any:`recursion` set to ``no``.
Any positive value smaller than 2 MB is ignored and reset to 2 MB.
The keyword ``unlimited``, or the value ``0``, places no limit on the
cache size; records are then purged from the cache only when they
expire (according to their TTLs).
.. warning::
Previously, the keyword ``unlimited``, or the value ``0``, placed
no limit on the cache size; this is no longer permitted as
TTL-based cleaning has been removed from :iscman:`named`.
.. note::
@ -3844,10 +3847,11 @@ system.
default value of that option (90% of physical memory for each
individual cache) may lead to memory exhaustion over time.
.. note::
.. warning::
:any:`max-cache-size` does not work reliably for a maximum
amount of memory of 100 MB or lower.
Setting :any:`max-cache-size` to a value lower than 256 MB is
permitted but not recommended; LRU-only cache eviction may cause
excessive churn under load.
Upon startup and reconfiguration, caches with a limited size
preallocate a small amount of memory (less than 1% of
@ -3856,10 +3860,13 @@ system.
internal cache structures.
On systems where detection of the amount of physical memory is not
supported, percentage-based values fall back to ``unlimited``. Note
that the amount of physical memory available is only detected on
startup, so :iscman:`named` does not adjust the cache size limits if the
amount of physical memory is changed at runtime.
supported, :iscman:`named` will fall back to the minimum value (2 MB).
.. note::
The amount of physical memory available is only detected on startup, so
:iscman:`named` does not adjust the cache size limits if the amount of
physical memory is changed at runtime.
On Linux, the system administrator can use `cgroup`_ (Control Group)
mechanism to limit the amount of available memory to the process. This limit

View file

@ -77,8 +77,6 @@
#define ADB_STALE_MARGIN 1800
#endif /* ifndef ADB_STALE_MARGIN */
#define DNS_ADB_MINADBSIZE (1024U * 1024U) /*%< 1 Megabyte */
typedef ISC_LIST(dns_adbname_t) dns_adbnamelist_t;
typedef struct dns_adbnamehook dns_adbnamehook_t;
typedef ISC_LIST(dns_adbnamehook_t) dns_adbnamehooklist_t;
@ -2258,8 +2256,6 @@ dns_adb_dump(dns_adb_t *adb, FILE *f) {
return;
}
cleanup_names(adb, now);
cleanup_entries(adb, now);
dump_adb(adb, f, false, now);
rcu_read_unlock();
@ -2293,7 +2289,19 @@ dump_adb(dns_adb_t *adb, FILE *f, bool debug, isc_stdtime_t now) {
*/
dns_adbname_t *adbname = NULL;
cds_lfht_for_each_entry(adb->names_ht, &iter, adbname, ht_node) {
dns_adbname_ref(adbname);
LOCK(&adbname->lock);
/*
* Lazily expire stale name hooks and names while dumping.
*/
maybe_expire_namehooks(adbname, now);
if (maybe_expire_name(adbname, now)) {
UNLOCK(&adbname->lock);
dns_adbname_detach(&adbname);
continue;
}
/*
* Dump the names
*/
@ -2320,17 +2328,25 @@ dump_adb(dns_adb_t *adb, FILE *f, bool debug, isc_stdtime_t now) {
print_find_list(f, adbname);
}
UNLOCK(&adbname->lock);
dns_adbname_detach(&adbname);
}
dns_adbentry_t *adbentry = NULL;
fprintf(f, ";\n; Unassociated entries\n;\n");
cds_lfht_for_each_entry(adb->entries_ht, &iter, adbentry, ht_node) {
dns_adbentry_ref(adbentry);
LOCK(&adbentry->lock);
if (maybe_expire_entry(adbentry, now)) {
UNLOCK(&adbentry->lock);
dns_adbentry_detach(&adbentry);
continue;
}
if (ISC_LIST_EMPTY(adbentry->nhs)) {
dump_entry(f, adb, adbentry, debug, now);
}
UNLOCK(&adbentry->lock);
dns_adbentry_detach(&adbentry);
}
}

View file

@ -48,12 +48,6 @@
#define CACHE_MAGIC ISC_MAGIC('$', '$', '$', '$')
#define VALID_CACHE(cache) ISC_MAGIC_VALID(cache, CACHE_MAGIC)
/*
* DNS_CACHE_MINSIZE is how many bytes is the floor for
* dns_cache_setcachesize().
*/
#define DNS_CACHE_MINSIZE 2097152U /*%< Bytes. 2097152 = 2 MB */
/***
*** Types
***/
@ -67,7 +61,6 @@ struct dns_cache {
unsigned int magic;
isc_mutex_t lock;
isc_mem_t *mctx; /* Memory context for the dns_cache object */
isc_mem_t *hmctx; /* Heap memory */
isc_mem_t *tmctx; /* Tree memory */
char *name;
isc_refcount_t references;
@ -88,12 +81,10 @@ struct dns_cache {
***/
static isc_result_t
cache_create_db(dns_cache_t *cache, dns_db_t **dbp, isc_mem_t **tmctxp,
isc_mem_t **hmctxp) {
cache_create_db(dns_cache_t *cache, dns_db_t **dbp, isc_mem_t **tmctxp) {
isc_result_t result;
char *argv[1] = { 0 };
dns_db_t *db = NULL;
isc_mem_t *tmctx = NULL, *hmctx = NULL;
isc_mem_t *tmctx = NULL;
/*
* This will be the cache memory context, which is subject
@ -101,22 +92,8 @@ cache_create_db(dns_cache_t *cache, dns_db_t **dbp, isc_mem_t **tmctxp,
*/
isc_mem_create("cache", &tmctx);
/*
* This will be passed to RBTDB to use for heaps. This is separate
* from the main cache memory because it can grow quite large under
* heavy load and could otherwise cause the cache to be cleaned too
* aggressively.
*/
isc_mem_create("cache_heap", &hmctx);
/*
* For databases of type "qpcache" or "rbt" (which are the
* only cache implementations currently in existence) we pass
* hmctx to dns_db_create() via argv[0].
*/
argv[0] = (char *)hmctx;
result = dns_db_create(tmctx, CACHEDB_DEFAULT, dns_rootname,
dns_dbtype_cache, cache->rdclass, 1, argv, &db);
dns_dbtype_cache, cache->rdclass, 0, NULL, &db);
if (result != ISC_R_SUCCESS) {
goto cleanup_mctx;
}
@ -131,7 +108,6 @@ cache_create_db(dns_cache_t *cache, dns_db_t **dbp, isc_mem_t **tmctxp,
dns_db_setmaxtypepername(db, cache->maxtypepername);
*dbp = db;
*hmctxp = hmctx;
*tmctxp = tmctx;
return ISC_R_SUCCESS;
@ -139,7 +115,6 @@ cache_create_db(dns_cache_t *cache, dns_db_t **dbp, isc_mem_t **tmctxp,
cleanup_db:
dns_db_detach(&db);
cleanup_mctx:
isc_mem_detach(&hmctx);
isc_mem_detach(&tmctx);
return result;
@ -150,9 +125,6 @@ cache_destroy(dns_cache_t *cache) {
isc_stats_detach(&cache->stats);
isc_mutex_destroy(&cache->lock);
isc_mem_free(cache->mctx, cache->name);
if (cache->hmctx != NULL) {
isc_mem_detach(&cache->hmctx);
}
if (cache->tmctx != NULL) {
isc_mem_detach(&cache->tmctx);
}
@ -184,7 +156,7 @@ dns_cache_create(dns_rdataclass_t rdclass, const char *cachename,
/*
* Create the database
*/
CHECK(cache_create_db(cache, &cache->db, &cache->tmctx, &cache->hmctx));
CHECK(cache_create_db(cache, &cache->db, &cache->tmctx));
*cachep = cache;
return ISC_R_SUCCESS;
@ -235,11 +207,7 @@ static void
updatewater(dns_cache_t *cache) {
size_t hi = cache->size - (cache->size >> 3); /* ~ 7/8ths. */
size_t lo = cache->size - (cache->size >> 2); /* ~ 3/4ths. */
if (cache->size == 0U || hi == 0U || lo == 0U) {
isc_mem_clearwater(cache->tmctx);
} else {
isc_mem_setwater(cache->tmctx, hi, lo);
}
isc_mem_setwater(cache->tmctx, hi, lo);
}
void
@ -250,7 +218,7 @@ dns_cache_setcachesize(dns_cache_t *cache, size_t size) {
* Impose a minimum cache size; pathological things happen if there
* is too little room.
*/
if (size != 0U && size < DNS_CACHE_MINSIZE) {
if (size < DNS_CACHE_MINSIZE) {
size = DNS_CACHE_MINSIZE;
}
@ -325,14 +293,11 @@ isc_result_t
dns_cache_flush(dns_cache_t *cache) {
dns_db_t *db = NULL, *olddb = NULL;
isc_mem_t *tmctx = NULL, *oldtmctx = NULL;
isc_mem_t *hmctx = NULL, *oldhmctx = NULL;
RETERR(cache_create_db(cache, &db, &tmctx, &hmctx));
RETERR(cache_create_db(cache, &db, &tmctx));
LOCK(&cache->lock);
isc_mem_clearwater(cache->tmctx);
oldhmctx = cache->hmctx;
cache->hmctx = hmctx;
oldtmctx = cache->tmctx;
cache->tmctx = tmctx;
updatewater(cache);
@ -341,7 +306,6 @@ dns_cache_flush(dns_cache_t *cache) {
UNLOCK(&cache->lock);
dns_db_detach(&olddb);
isc_mem_detach(&oldhmctx);
isc_mem_detach(&oldtmctx);
return ISC_R_SUCCESS;
@ -598,9 +562,6 @@ dns_cache_dumpstats(dns_cache_t *cache, FILE *fp) {
fprintf(fp, "%20" PRIu64 " %s\n",
values[dns_cachestatscounter_deletelru],
"cache records deleted due to memory exhaustion");
fprintf(fp, "%20" PRIu64 " %s\n",
values[dns_cachestatscounter_deletettl],
"cache records deleted due to TTL expiration");
fprintf(fp, "%20" PRIu64 " %s\n",
values[dns_cachestatscounter_coveringnsec],
"covering nsec returned");
@ -609,9 +570,6 @@ dns_cache_dumpstats(dns_cache_t *cache, FILE *fp) {
fprintf(fp, "%20" PRIu64 " %s\n", (uint64_t)isc_mem_inuse(cache->tmctx),
"cache tree memory in use");
fprintf(fp, "%20" PRIu64 " %s\n", (uint64_t)isc_mem_inuse(cache->hmctx),
"cache heap memory in use");
}
#ifdef HAVE_LIBXML2
@ -656,16 +614,12 @@ dns_cache_renderxml(dns_cache_t *cache, void *writer0) {
values[dns_cachestatscounter_querymisses], writer));
TRY0(renderstat("DeleteLRU", values[dns_cachestatscounter_deletelru],
writer));
TRY0(renderstat("DeleteTTL", values[dns_cachestatscounter_deletettl],
writer));
TRY0(renderstat("CoveringNSEC",
values[dns_cachestatscounter_coveringnsec], writer));
TRY0(renderstat("CacheNodes", dns_db_nodecount(cache->db), writer));
TRY0(renderstat("TreeMemInUse", isc_mem_inuse(cache->tmctx), writer));
TRY0(renderstat("HeapMemInUse", isc_mem_inuse(cache->hmctx), writer));
error:
return xmlrc;
}
@ -713,10 +667,6 @@ dns_cache_renderjson(dns_cache_t *cache, void *cstats0) {
CHECKMEM(obj);
json_object_object_add(cstats, "DeleteLRU", obj);
obj = json_object_new_int64(values[dns_cachestatscounter_deletettl]);
CHECKMEM(obj);
json_object_object_add(cstats, "DeleteTTL", obj);
obj = json_object_new_int64(values[dns_cachestatscounter_coveringnsec]);
CHECKMEM(obj);
json_object_object_add(cstats, "CoveringNSEC", obj);
@ -729,10 +679,6 @@ dns_cache_renderjson(dns_cache_t *cache, void *cstats0) {
CHECKMEM(obj);
json_object_object_add(cstats, "TreeMemInUse", obj);
obj = json_object_new_int64(isc_mem_inuse(cache->hmctx));
CHECKMEM(obj);
json_object_object_add(cstats, "HeapMemInUse", obj);
result = ISC_R_SUCCESS;
error:
return result;

View file

@ -83,6 +83,8 @@
#define DNS_ADBADDRINFO_MAGIC ISC_MAGIC('a', 'd', 'A', 'I')
#define DNS_ADBADDRINFO_VALID(x) ISC_MAGIC_VALID(x, DNS_ADBADDRINFO_MAGIC)
#define DNS_ADB_MINADBSIZE UINT64_C(1024 * 1024) /*%< 1 MB */
/***
*** TYPES
***/

View file

@ -52,6 +52,12 @@
#include <dns/types.h>
/*
* DNS_CACHE_MINSIZE is how many bytes is the floor for
* dns_cache_setcachesize().
*/
#define DNS_CACHE_MINSIZE UINT64_C(2 * 1024 * 1024) /*%< Bytes. 2 MB */
/***
*** Functions
***/
@ -130,7 +136,7 @@ dns_cache_getname(dns_cache_t *cache);
void
dns_cache_setcachesize(dns_cache_t *cache, size_t size);
/*%<
* Set the maximum cache size. 0 means unlimited.
* Set the maximum cache size.
*/
size_t

View file

@ -45,7 +45,6 @@
#include <stdbool.h>
#include <isc/atomic.h>
#include <isc/heap.h>
#include <isc/stdtime.h>
#include <isc/urcu.h>
@ -97,10 +96,6 @@ struct dns_slabheader {
isc_stdtime_t expire;
dns_typepair_t typepair;
/* TTL-cleaning (cache) */
unsigned int heap_index;
isc_heap_t *heap;
dns_slabheader_proof_t *noqname;
dns_slabheader_proof_t *closest;
@ -191,10 +186,8 @@ dns_rdataslab_fromrdataset(dns_rdataset_t *rdataset, isc_mem_t *mctx,
*
* dns_rdataslab_fromrdataset() allocates space for a dns_slabheader object
* and the memory needed for a raw slab, and partially initializes
* it, setting the type, trust, and TTL fields to match rdataset->type,
* rdataset->covers, rdataset->trust, and rdataset->ttl. (Note that the
* last field needs to be overridden when used in the cache database,
* since cache headers use an expire time instead of a TTL.)
* it, setting the type, and trust fields to match rdataset->type,
* rdataset->covers, and rdataset->trust.
*
* Requires:
*\li 'rdataset' is valid.

View file

@ -120,10 +120,9 @@ enum {
dns_cachestatscounter_queryhits = 3,
dns_cachestatscounter_querymisses = 4,
dns_cachestatscounter_deletelru = 5,
dns_cachestatscounter_deletettl = 6,
dns_cachestatscounter_coveringnsec = 7,
dns_cachestatscounter_coveringnsec = 6,
dns_cachestatscounter_max = 8,
dns_cachestatscounter_max = 7,
/*%
* Query statistics counters (obsolete).

View file

@ -249,8 +249,7 @@ typedef enum {
typedef enum {
dns_expire_lru = 0,
dns_expire_ttl = 1,
dns_expire_flush = 2,
dns_expire_flush = 1,
} dns_expire_t;
/*

View file

@ -21,7 +21,6 @@
#include <isc/async.h>
#include <isc/atomic.h>
#include <isc/file.h>
#include <isc/heap.h>
#include <isc/hex.h>
#include <isc/list.h>
#include <isc/log.h>
@ -91,20 +90,6 @@
#define HEADERNODE(h) ((qpcnode_t *)((h)->node))
/*
* Allow clients with a virtual time of up to 10 seconds in the past to see
* records that would have otherwise have expired.
*/
#define QPDB_VIRTUAL 10
/*
* This defines the number of headers that we try to expire each time the
* expire_ttl_headers() is run. The number should be small enough, so the
* TTL-based header expiration doesn't take too long, but it should be large
* enough, so we expire enough headers if their TTL is clustered.
*/
#define DNS_QPDB_EXPIRE_TTL_COUNT 10
/*%
* Forward declarations
*/
@ -179,20 +164,12 @@ typedef struct qpcache_bucket {
/* Per-bucket lock. */
isc_rwlock_t lock;
/*
* The heap is used for TTL based expiry. Note that qpcache->hmctx
* is the memory context to use for heap memory; this differs from
* the main database memory context, which is qpcache->common.mctx.
*/
isc_heap_t *heap;
/* SIEVE-LRU cache cleaning state. */
ISC_SIEVE(dns_slabtop_t) sieve;
/* Padding to prevent false sharing between locks. */
uint8_t __padding[ISC_OS_CACHELINE_SIZE -
(sizeof(isc_queue_t) + sizeof(isc_rwlock_t) +
sizeof(isc_heap_t *) +
sizeof(ISC_SIEVE(dns_slabtop_t))) %
ISC_OS_CACHELINE_SIZE];
@ -239,8 +216,6 @@ struct qpcache {
/* Locked by tree_lock. */
dns_qp_t *tree;
isc_mem_t *hmctx; /* Memory context for the heaps */
size_t buckets_count;
qpcache_bucket_t buckets[]; /* attribute((counted_by(buckets_count))) */
};
@ -518,9 +493,6 @@ qpcache_miss(qpcache_t *qpdb, dns_slabheader_t *newheader,
isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) {
uint32_t idx = HEADERNODE(newheader)->locknum;
isc_heap_insert(qpdb->buckets[idx].heap, newheader);
newheader->heap = qpdb->buckets[idx].heap;
if (isc_mem_isovermem(qpdb->common.mctx)) {
/*
* Maximum estimated size of the data being added: The size
@ -908,23 +880,7 @@ mark(dns_slabheader_t *header, uint_least16_t flag) {
static void
setttl(dns_slabheader_t *header, isc_stdtime_t newts) {
isc_stdtime_t oldts = header->expire;
header->expire = newts;
if (header->heap == NULL || header->heap_index == 0 || newts == oldts) {
return;
}
if (newts < oldts) {
isc_heap_increased(header->heap, header->heap_index);
} else {
isc_heap_decreased(header->heap, header->heap_index);
}
if (newts == 0) {
isc_heap_delete(header->heap, header->heap_index);
}
}
static void
@ -964,10 +920,6 @@ expireheader(dns_slabheader_t *header, isc_rwlocktype_t *nlocktypep,
}
switch (reason) {
case dns_expire_ttl:
isc_stats_increment(qpdb->cachestats,
dns_cachestatscounter_deletettl);
break;
case dns_expire_lru:
isc_stats_increment(qpdb->cachestats,
dns_cachestatscounter_deletelru);
@ -2056,28 +2008,6 @@ qpcnode_expiredata(dns_dbnode_t *node, void *data) {
INSIST(tlocktype == isc_rwlocktype_none);
}
/*%
* These functions allow the heap code to rank the priority of each
* element. It returns true if v1 happens "sooner" than v2.
*/
static bool
ttl_sooner(void *v1, void *v2) {
dns_slabheader_t *h1 = v1;
dns_slabheader_t *h2 = v2;
return h1->expire < h2->expire;
}
/*%
* This function sets the heap index into the header.
*/
static void
set_index(void *what, unsigned int idx) {
dns_slabheader_t *h = what;
h->heap_index = idx;
}
static void
qpcache__destroy(qpcache_t *qpdb) {
unsigned int i;
@ -2104,8 +2034,6 @@ qpcache__destroy(qpcache_t *qpdb) {
INSIST(isc_queue_empty(&qpdb->buckets[i].deadnodes));
isc_queue_destroy(&qpdb->buckets[i].deadnodes);
isc_heap_destroy(&qpdb->buckets[i].heap);
}
dns_stats_detach(&qpdb->rrsetstats);
@ -2121,7 +2049,6 @@ qpcache__destroy(qpcache_t *qpdb) {
isc_rwlock_destroy(&qpdb->lock);
qpdb->common.magic = 0;
qpdb->common.impmagic = 0;
isc_mem_detach(&qpdb->hmctx);
isc_mem_putanddetach(&qpdb->common.mctx, qpdb,
sizeof(*qpdb) + qpdb->buckets_count *
@ -2829,11 +2756,6 @@ cleanup:
return result;
}
static void
expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum,
isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep,
isc_stdtime_t now DNS__DB_FLARG);
static isc_result_t
qpcache_addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
isc_stdtime_t __now, dns_rdataset_t *rdataset,
@ -2874,11 +2796,9 @@ qpcache_addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
dns_slabheader_reset(newheader, node);
/*
* By default, dns_rdataslab_fromrdataset() sets newheader->ttl
* to the rdataset TTL. In the case of the cache, that's wrong;
* we need it to be set to the expire time instead.
* Set the correct expire time.
*/
setttl(newheader, rdataset->ttl + now);
setttl(newheader, now + rdataset->ttl);
if (rdataset->ttl == 0U) {
DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_ZEROTTL);
}
@ -2927,9 +2847,6 @@ qpcache_addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
NODE_WRLOCK(nlock, &nlocktype);
expire_ttl_headers(qpdb, qpnode->locknum, &nlocktype, &tlocktype,
now DNS__DB_FLARG_PASS);
if (newnsec && !qpnode->havensec) {
qpcnode_t *nsecnode = NULL;
@ -3040,7 +2957,6 @@ dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin,
unsigned int argc, char *argv[],
void *driverarg ISC_ATTR_UNUSED, dns_db_t **dbp) {
qpcache_t *qpdb = NULL;
isc_mem_t *hmctx = mctx;
isc_loop_t *loop = isc_loop();
int i;
size_t nloops = isc_loopmgr_nloops();
@ -3048,6 +2964,8 @@ dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin,
/* This database implementation only supports cache semantics */
REQUIRE(type == dns_dbtype_cache);
REQUIRE(loop != NULL);
REQUIRE(argc == 0);
REQUIRE(argv == NULL);
qpdb = isc_mem_get(mctx,
sizeof(*qpdb) + nloops * sizeof(qpdb->buckets[0]));
@ -3061,13 +2979,6 @@ dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin,
.buckets_count = nloops,
};
/*
* If argv[0] exists, it points to a memory context to use for heap
*/
if (argc != 0) {
hmctx = (isc_mem_t *)argv[0];
}
isc_rwlock_init(&qpdb->lock);
TREE_INITLOCK(&qpdb->tree_lock);
@ -3077,10 +2988,6 @@ dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin,
for (i = 0; i < (int)qpdb->buckets_count; i++) {
ISC_SIEVE_INIT(qpdb->buckets[i].sieve);
qpdb->buckets[i].heap = NULL;
isc_heap_create(hmctx, ttl_sooner, set_index, 0,
&qpdb->buckets[i].heap);
isc_queue_init(&qpdb->buckets[i].deadnodes);
NODE_INITLOCK(&qpdb->buckets[i].lock);
@ -3092,7 +2999,6 @@ dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin,
* mctx won't disappear out from under us.
*/
isc_mem_attach(mctx, &qpdb->common.mctx);
isc_mem_attach(hmctx, &qpdb->hmctx);
/*
* Make a copy of the origin name.
@ -3552,10 +3458,6 @@ qpcnode_deletedata(dns_dbnode_t *node ISC_ATTR_UNUSED, void *data) {
ISC_LIST_UNLINK(HEADERNODE(header)->dirty, header, dirtylink);
}
if (header->heap != NULL && header->heap_index != 0) {
isc_heap_delete(header->heap, header->heap_index);
}
/*
* This place is the only place where we actually need header->typepair.
*/
@ -3570,40 +3472,6 @@ qpcnode_deletedata(dns_dbnode_t *node ISC_ATTR_UNUSED, void *data) {
}
}
/*
* Caller must be holding the node write lock.
*/
static void
expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum,
isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep,
isc_stdtime_t now DNS__DB_FLARG) {
isc_heap_t *heap = qpdb->buckets[locknum].heap;
for (size_t i = 0; i < DNS_QPDB_EXPIRE_TTL_COUNT; i++) {
dns_slabheader_t *header = isc_heap_element(heap, 1);
if (header == NULL) {
/* No headers left on this TTL heap; exit cleaning */
return;
}
dns_ttl_t ttl = header->expire + STALE_TTL(header, qpdb);
if (ttl >= now - QPDB_VIRTUAL) {
/*
* The header at the top of this TTL heap is not yet
* eligible for expiry, so none of the other headers on
* the same heap can be eligible for expiry, either;
* exit cleaning.
*/
return;
}
(void)expireheader(header, nlocktypep, tlocktypep,
dns_expire_ttl DNS__DB_FLARG_PASS);
}
}
static void
setmaxrrperset(dns_db_t *db, uint32_t value) {
qpcache_t *qpdb = (qpcache_t *)db;

View file

@ -13,7 +13,6 @@
#pragma once
#include <isc/heap.h>
#include <isc/urcu.h>
#include <dns/nsec3.h>
@ -37,10 +36,6 @@ dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *base, dns_dbtype_t type,
* Create a new database of type "qpcache". Called via dns_db_create();
* see documentation for that function for more details.
*
* If argv[0] is set, it points to a valid memory context to be used for
* allocation of heap memory. Generally this is used for cache databases
* only.
*
* Requires:
*
* \li argc == 0 or argv[0] is a valid memory context.

View file

@ -16,8 +16,6 @@
#include <stdbool.h>
#include <stdlib.h>
#include <urcu/list.h>
#include <isc/ascii.h>
#include <isc/atomic.h>
#include <isc/list.h>
@ -25,6 +23,7 @@
#include <isc/region.h>
#include <isc/result.h>
#include <isc/string.h>
#include <isc/urcu.h>
#include <isc/util.h>
#include <dns/db.h>
@ -110,7 +109,6 @@ newslab(dns_rdataset_t *rdataset, isc_mem_t *mctx, isc_region_t *region,
*header = (dns_slabheader_t){
.headers_link = CDS_LIST_HEAD_INIT(header->headers_link),
.trust = rdataset->trust,
.expire = rdataset->ttl,
.dirtylink = ISC_LINK_INITIALIZER,
.nitems = nitems,
};
@ -460,8 +458,6 @@ dns_rdataslab_equalx(dns_slabheader_t *slab1, dns_slabheader_t *slab2,
void
dns_slabheader_reset(dns_slabheader_t *h, dns_dbnode_t *node) {
h->heap_index = 0;
h->heap = NULL;
h->node = node;
atomic_init(&h->attributes, 0);