From 4827ad0ec46e0fb30623363958289494b71de596 Mon Sep 17 00:00:00 2001 From: Diego Fronza Date: Mon, 19 Oct 2020 17:02:03 -0300 Subject: [PATCH 01/12] Add stale-refresh-time option Before this update, BIND would attempt to do a full recursive resolution process for each query received if the requested rrset had its ttl expired. If the resolution fails for any reason, only then BIND would check for stale rrset in cache (if 'stale-cache-enable' and 'stale-answer-enable' is on). The problem with this approach is that if an authoritative server is unreachable or is failing to respond, it is very unlikely that the problem will be fixed in the next seconds. A better approach to improve performance in those cases, is to mark the moment in which a resolution failed, and if new queries arrive for that same rrset, try to respond directly from the stale cache, and do that for a window of time configured via 'stale-refresh-time'. Only when this interval expires we then try to do a normal refresh of the rrset. The logic behind this commit is as following: - In query.c / query_gotanswer(), if the test of 'result' variable falls to the default case, an error is assumed to have happened, and a call to 'query_usestale()' is made to check if serving of stale rrset is enabled in configuration. - If serving of stale answers is enabled, a flag will be turned on in the query context to look for stale records: query.c:6839 qctx->client->query.dboptions |= DNS_DBFIND_STALEOK; - A call to query_lookup() will be made again, inside it a call to 'dns_db_findext()' is made, which in turn will invoke rbdb.c / cache_find(). - In rbtdb.c / cache_find() the important bits of this change is the call to 'check_stale_header()', which is a function that yields true if we should skip the stale entry, or false if we should consider it. - In check_stale_header() we now check if the DNS_DBFIND_STALEOK option is set, if that is the case we know that this new search for stale records was made due to a failure in a normal resolution, so we keep track of the time in which the failured occured in rbtdb.c:4559: header->last_refresh_fail_ts = search->now; - In check_stale_header(), if DNS_DBFIND_STALEOK is not set, then we know this is a normal lookup, if the record is stale and the query time is between last failure time + stale-refresh-time window, then we return false so cache_find() knows it can consider this stale rrset entry to return as a response. The last additions are two new methods to the database interface: - setservestale_refresh - getservestale_refresh Those were added so rbtdb can be aware of the value set in configuration option, since in that level we have no access to the view object. --- bin/named/config.c | 1 + bin/named/server.c | 7 ++++ bin/tests/system/dyndb/driver/db.c | 2 ++ lib/dns/cache.c | 7 ++++ lib/dns/db.c | 22 ++++++++++++ lib/dns/dnsrps.c | 2 ++ lib/dns/include/dns/cache.h | 12 +++++++ lib/dns/include/dns/db.h | 36 +++++++++++++++++++ lib/dns/rbtdb.c | 56 +++++++++++++++++++++++++++++ lib/dns/sdb.c | 2 ++ lib/dns/sdlz.c | 2 ++ lib/dns/win32/libdns.def.in | 4 +++ lib/isccfg/namedconf.c | 1 + lib/ns/query.c | 58 ++++++++++++++++++++++++++---- 14 files changed, 206 insertions(+), 6 deletions(-) diff --git a/bin/named/config.c b/bin/named/config.c index 863feae8b3..9b0c6f06e2 100644 --- a/bin/named/config.c +++ b/bin/named/config.c @@ -196,6 +196,7 @@ options {\n\ servfail-ttl 1;\n\ # sortlist \n\ stale-answer-enable false;\n\ + stale-refresh-time 30; /* 30 seconds */\n\ stale-answer-ttl 1; /* 1 second */\n\ stale-cache-enable false;\n\ synth-from-dnssec no;\n\ diff --git a/bin/named/server.c b/bin/named/server.c index a864c1d8fd..f83473c1b2 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -3897,6 +3897,7 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config, size_t max_adb_size; uint32_t lame_ttl, fail_ttl; uint32_t max_stale_ttl = 0; + uint32_t stale_refresh_time = 0; dns_tsig_keyring_t *ring = NULL; dns_view_t *pview = NULL; /* Production view */ isc_mem_t *cmctx = NULL, *hmctx = NULL; @@ -4395,6 +4396,11 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config, view->staleanswersok = dns_stale_answer_conf; } + obj = NULL; + result = named_config_get(maps, "stale-refresh-time", &obj); + INSIST(result == ISC_R_SUCCESS); + stale_refresh_time = cfg_obj_asduration(obj); + /* * Configure the view's cache. * @@ -4529,6 +4535,7 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config, dns_cache_setcachesize(cache, max_cache_size); dns_cache_setservestalettl(cache, max_stale_ttl); + dns_cache_setservestalerefresh(cache, stale_refresh_time); dns_cache_detach(&cache); diff --git a/bin/tests/system/dyndb/driver/db.c b/bin/tests/system/dyndb/driver/db.c index cbeff6172b..77d335e2ea 100644 --- a/bin/tests/system/dyndb/driver/db.c +++ b/bin/tests/system/dyndb/driver/db.c @@ -589,6 +589,8 @@ static dns_dbmethods_t sampledb_methods = { NULL, /* getsize */ NULL, /* setservestalettl */ NULL, /* getservestalettl */ + NULL, /* setservestalerefresh */ + NULL, /* getservestalerefresh */ NULL, /* setgluecachestats */ NULL /* adjusthashsize */ }; diff --git a/lib/dns/cache.c b/lib/dns/cache.c index b8e719951a..873b825f2e 100644 --- a/lib/dns/cache.c +++ b/lib/dns/cache.c @@ -999,6 +999,13 @@ dns_cache_getservestalettl(dns_cache_t *cache) { return (result == ISC_R_SUCCESS ? ttl : 0); } +void +dns_cache_setservestalerefresh(dns_cache_t *cache, uint32_t interval) { + REQUIRE(VALID_CACHE(cache)); + + (void)dns_db_setservestalerefresh(cache->db, interval); +} + /* * The cleaner task is shutting down; do the necessary cleanup. */ diff --git a/lib/dns/db.c b/lib/dns/db.c index 6db94d51b8..fa605097da 100644 --- a/lib/dns/db.c +++ b/lib/dns/db.c @@ -1089,6 +1089,28 @@ dns_db_getservestalettl(dns_db_t *db, dns_ttl_t *ttl) { return (ISC_R_NOTIMPLEMENTED); } +isc_result_t +dns_db_setservestalerefresh(dns_db_t *db, uint32_t interval) { + REQUIRE(DNS_DB_VALID(db)); + REQUIRE((db->attributes & DNS_DBATTR_CACHE) != 0); + + if (db->methods->setservestalerefresh != NULL) { + return ((db->methods->setservestalerefresh)(db, interval)); + } + return (ISC_R_NOTIMPLEMENTED); +} + +isc_result_t +dns_db_getservestalerefresh(dns_db_t *db, uint32_t *interval) { + REQUIRE(DNS_DB_VALID(db)); + REQUIRE((db->attributes & DNS_DBATTR_CACHE) != 0); + + if (db->methods->getservestalerefresh != NULL) { + return ((db->methods->getservestalerefresh)(db, interval)); + } + return (ISC_R_NOTIMPLEMENTED); +} + isc_result_t dns_db_setgluecachestats(dns_db_t *db, isc_stats_t *stats) { REQUIRE(dns_db_iszone(db)); diff --git a/lib/dns/dnsrps.c b/lib/dns/dnsrps.c index 2d261c5c6a..0f2ffb5f35 100644 --- a/lib/dns/dnsrps.c +++ b/lib/dns/dnsrps.c @@ -967,6 +967,8 @@ static dns_dbmethods_t rpsdb_db_methods = { NULL, /* getsize */ NULL, /* setservestalettl */ NULL, /* getservestalettl */ + NULL, /* setservestalerefresh */ + NULL, /* getservestalerefresh */ NULL, /* setgluecachestats */ NULL /* adjusthashsize */ }; diff --git a/lib/dns/include/dns/cache.h b/lib/dns/include/dns/cache.h index ce39765ebc..0474d78dbd 100644 --- a/lib/dns/include/dns/cache.h +++ b/lib/dns/include/dns/cache.h @@ -255,6 +255,18 @@ dns_cache_getservestalettl(dns_cache_t *cache); *\li 'cache' to be valid. */ +void +dns_cache_setservestalerefresh(dns_cache_t *cache, uint32_t interval); +/*%< + * Sets the length of time to wait before attempting to refresh a rrset + * if a previous attempt in doing so has failed. + * During this time window if stale rrset are available in cache they + * will be directly returned to client. + * + * Requires: + *\li 'cache' to be valid. + */ + isc_result_t dns_cache_flush(dns_cache_t *cache); /*%< diff --git a/lib/dns/include/dns/db.h b/lib/dns/include/dns/db.h index 395e8e9679..b79dcae0fa 100644 --- a/lib/dns/include/dns/db.h +++ b/lib/dns/include/dns/db.h @@ -178,6 +178,8 @@ typedef struct dns_dbmethods { uint64_t *records, uint64_t *bytes); isc_result_t (*setservestalettl)(dns_db_t *db, dns_ttl_t ttl); isc_result_t (*getservestalettl)(dns_db_t *db, dns_ttl_t *ttl); + isc_result_t (*setservestalerefresh)(dns_db_t *db, uint32_t interval); + isc_result_t (*getservestalerefresh)(dns_db_t *db, uint32_t *interval); isc_result_t (*setgluecachestats)(dns_db_t *db, isc_stats_t *stats); isc_result_t (*adjusthashsize)(dns_db_t *db, size_t size); } dns_dbmethods_t; @@ -238,6 +240,7 @@ struct dns_dbonupdatelistener { #define DNS_DBFIND_ADDITIONALOK 0x0100 #define DNS_DBFIND_NOZONECUT 0x0200 #define DNS_DBFIND_STALEOK 0x0400 +#define DNS_DBFIND_STALEENABLED 0x0800 /*@}*/ /*@{*/ @@ -1701,6 +1704,39 @@ dns_db_getservestalettl(dns_db_t *db, dns_ttl_t *ttl); * \li #ISC_R_NOTIMPLEMENTED - Not supported by this DB implementation. */ +isc_result_t +dns_db_setservestalerefresh(dns_db_t *db, uint32_t interval); +/*%< + * Sets the length of time to wait before attempting to refresh a rrset + * if a previous attempt in doing so has failed. + * During this time window if stale rrset are available in cache they + * will be directly returned to client. + * + * Requires: + * \li 'db' is a valid cache database. + * \li 'interval' is number of seconds before attempting to refresh data. + * + * Returns: + * \li #ISC_R_SUCCESS + * \li #ISC_R_NOTIMPLEMENTED - Not supported by this DB implementation. + */ + +isc_result_t +dns_db_getservestalerefresh(dns_db_t *db, uint32_t *interval); +/*%< + * Gets the length of time in which stale answers are directly returned from + * cache before attempting to refresh them, in case a previous attempt in + * doing so has failed. + * + * Requires: + * \li 'db' is a valid cache database. + * \li 'interval' is number of seconds before attempting to refresh data. + * + * Returns: + * \li #ISC_R_SUCCESS + * \li #ISC_R_NOTIMPLEMENTED - Not supported by this DB implementation. + */ + isc_result_t dns_db_setgluecachestats(dns_db_t *db, isc_stats_t *stats); /*%< diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index 6cad7abdd9..f8d51e9a4b 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -205,6 +205,7 @@ typedef struct rdatasetheader { rbtdb_rdatatype_t type; atomic_uint_least16_t attributes; dns_trust_t trust; + isc_stdtime_t last_refresh_fail_ts; struct noqname *noqname; struct noqname *closest; unsigned int is_mmapped : 1; @@ -488,6 +489,13 @@ struct dns_rbtdb { */ dns_ttl_t serve_stale_ttl; + /* + * The time after a failed lookup, where stale answers from cache + * may be used directly in a DNS response without attempting a + * new iterative lookup. + */ + uint32_t serve_stale_refresh; + /* * This is a linked list used to implement the LRU cache. There will * be node_lock_count linked lists here. Nodes in bucket 1 will be @@ -4547,6 +4555,27 @@ check_stale_header(dns_rbtnode_t *node, rdatasetheader_t *header, stale > search->now) { mark_header_stale(search->rbtdb, header); *header_prev = header; + /* + * If DNS_DBFIND_STALEOK is set then it means we failed + * to resolve the name during recursion, in this case we + * mark the time in which the refresh failed. + */ + if ((search->options & DNS_DBFIND_STALEOK) != 0) { + header->last_refresh_fail_ts = search->now; + } else if ((search->options & + DNS_DBFIND_STALEENABLED) != 0 && + search->now < + (header->last_refresh_fail_ts + + search->rbtdb->serve_stale_refresh)) + { + /* + * If we are within interval between last + * refresh failure time + 'stale-refresh-time', + * then don't skip this stale entry but use it + * instead. + */ + return (false); + } return ((search->options & DNS_DBFIND_STALEOK) == 0); } @@ -8379,6 +8408,29 @@ getservestalettl(dns_db_t *db, dns_ttl_t *ttl) { return (ISC_R_SUCCESS); } +static isc_result_t +setservestalerefresh(dns_db_t *db, uint32_t interval) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(IS_CACHE(rbtdb)); + + /* currently no bounds checking. 0 means disable. */ + rbtdb->serve_stale_refresh = interval; + return (ISC_R_SUCCESS); +} + +static isc_result_t +getservestalerefresh(dns_db_t *db, uint32_t *interval) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(IS_CACHE(rbtdb)); + + *interval = rbtdb->serve_stale_refresh; + return (ISC_R_SUCCESS); +} + static dns_dbmethods_t zone_methods = { attach, detach, beginload, @@ -8426,6 +8478,8 @@ static dns_dbmethods_t zone_methods = { attach, getsize, NULL, /* setservestalettl */ NULL, /* getservestalettl */ + NULL, /* setservestalerefresh */ + NULL, /* getservestalerefresh */ setgluecachestats, adjusthashsize }; @@ -8476,6 +8530,8 @@ static dns_dbmethods_t cache_methods = { attach, NULL, /* getsize */ setservestalettl, getservestalettl, + setservestalerefresh, + getservestalerefresh, NULL, adjusthashsize }; diff --git a/lib/dns/sdb.c b/lib/dns/sdb.c index d525963f52..d9de422409 100644 --- a/lib/dns/sdb.c +++ b/lib/dns/sdb.c @@ -1309,6 +1309,8 @@ static dns_dbmethods_t sdb_methods = { NULL, /* getsize */ NULL, /* setservestalettl */ NULL, /* getservestalettl */ + NULL, /* setservestalerefresh */ + NULL, /* getservestalerefresh */ NULL, /* setgluecachestats */ NULL /* adjusthashsize */ }; diff --git a/lib/dns/sdlz.c b/lib/dns/sdlz.c index 14b55eb074..c8a615a0f3 100644 --- a/lib/dns/sdlz.c +++ b/lib/dns/sdlz.c @@ -1281,6 +1281,8 @@ static dns_dbmethods_t sdlzdb_methods = { NULL, /* getsize */ NULL, /* setservestalettl */ NULL, /* getservestalettl */ + NULL, /* setservestalerefresh */ + NULL, /* getservestalerefresh */ NULL, /* setgluecachestats */ NULL /* adjusthashsize */ }; diff --git a/lib/dns/win32/libdns.def.in b/lib/dns/win32/libdns.def.in index 6567fa4ced..63d9530f05 100644 --- a/lib/dns/win32/libdns.def.in +++ b/lib/dns/win32/libdns.def.in @@ -82,6 +82,7 @@ dns_cache_flushname dns_cache_flushnode dns_cache_getcachesize dns_cache_getname +dns_cache_getservestalerefresh dns_cache_getservestalettl dns_cache_getstats dns_cache_load @@ -93,6 +94,7 @@ dns_cache_renderxml @END LIBXML2 dns_cache_setcachesize dns_cache_setfilename +dns_cache_setservestalerefresh dns_cache_setservestalettl dns_cache_updatestats dns_catz_add_zone @@ -198,6 +200,7 @@ dns_db_getnsec3parameters dns_db_getoriginnode dns_db_getrrsetstats dns_db_getservestalettl +dns_db_getservestalerefresh dns_db_getsigningtime dns_db_getsize dns_db_getsoaserial @@ -223,6 +226,7 @@ dns_db_serialize dns_db_setcachestats dns_db_setgluecachestats dns_db_setservestalettl +dns_db_setservestalerefresh dns_db_setsigningtime dns_db_settask dns_db_subtractrdataset diff --git a/lib/isccfg/namedconf.c b/lib/isccfg/namedconf.c index 23cf73f477..7551e1d8cf 100644 --- a/lib/isccfg/namedconf.c +++ b/lib/isccfg/namedconf.c @@ -2051,6 +2051,7 @@ static cfg_clausedef_t view_clauses[] = { { "stale-answer-enable", &cfg_type_boolean, 0 }, { "stale-answer-ttl", &cfg_type_duration, 0 }, { "stale-cache-enable", &cfg_type_boolean, 0 }, + { "stale-refresh-time", &cfg_type_duration, 0 }, { "suppress-initial-notify", &cfg_type_boolean, CFG_CLAUSEFLAG_NYI }, { "synth-from-dnssec", &cfg_type_boolean, 0 }, { "topology", &cfg_type_bracketed_aml, CFG_CLAUSEFLAG_ANCIENT }, diff --git a/lib/ns/query.c b/lib/ns/query.c index fd4c7fb9c7..33c7b6a2db 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -5523,6 +5523,9 @@ query_lookup(query_ctx_t *qctx) { dns_clientinfo_t ci; dns_name_t *rpzqname = NULL; unsigned int dboptions; + dns_ttl_t stale_ttl = 0; + dns_ttl_t stale_refresh = 0; + bool dbfind_stale = false; CCTRACE(ISC_LOG_DEBUG(3), "query_lookup"); @@ -5581,6 +5584,22 @@ query_lookup(query_ctx_t *qctx) { dboptions |= DNS_DBFIND_COVERINGNSEC; } + dns_db_getservestalerefresh(qctx->client->view->cachedb, + &stale_refresh); + dns_db_getservestalettl(qctx->client->view->cachedb, &stale_ttl); + if (stale_refresh > 0) { + if (qctx->client->view->staleanswersok == dns_stale_answer_yes) + { + dboptions |= DNS_DBFIND_STALEENABLED; + } else if (qctx->client->view->staleanswersok == + dns_stale_answer_conf) { + if (qctx->client->view->staleanswersenable && + stale_ttl > 0) { + dboptions |= DNS_DBFIND_STALEENABLED; + } + } + } + result = dns_db_findext(qctx->db, rpzqname, qctx->version, qctx->type, dboptions, qctx->client->now, &qctx->node, qctx->fname, &cm, &ci, qctx->rdataset, @@ -5601,10 +5620,28 @@ query_lookup(query_ctx_t *qctx) { dns_cache_updatestats(qctx->view->cache, result); } - if ((qctx->client->query.dboptions & DNS_DBFIND_STALEOK) != 0) { + /* + * If DNS_DBFIND_STALEOK is set this means we are dealing with a + * lookup following a failed lookup and it is okay to serve a stale + * answer. This will start a time window in rbtdb, tracking the last + * time the RRset lookup failed. + * + * A stale answer may also be served if this is a normal lookup, + * the view has enabled serve-stale (DNS_DBFIND_STALE_ENABLED is set), + * and the request is within the stale-refresh-time window. If this + * is the case we have to make sure that the lookup found a stale + * answer, otherwise "fresh" answers are also treated as stale. + */ + dbfind_stale = ((dboptions & DNS_DBFIND_STALEOK) != 0); + if (dbfind_stale != 0 || + (((dboptions & DNS_DBFIND_STALEENABLED) != 0) && + STALE(qctx->rdataset))) + { char namebuf[DNS_NAME_FORMATSIZE]; bool success; + inc_stats(qctx->client, ns_statscounter_trystale); + qctx->client->query.dboptions &= ~DNS_DBFIND_STALEOK; if (dns_rdataset_isassociated(qctx->rdataset) && dns_rdataset_count(qctx->rdataset) > 0 && @@ -5618,10 +5655,20 @@ query_lookup(query_ctx_t *qctx) { dns_name_format(qctx->client->query.qname, namebuf, sizeof(namebuf)); - isc_log_write(ns_lctx, NS_LOGCATEGORY_SERVE_STALE, - NS_LOGMODULE_QUERY, ISC_LOG_INFO, - "%s resolver failure, stale answer %s", namebuf, - success ? "used" : "unavailable"); + if (dbfind_stale) { + isc_log_write(ns_lctx, NS_LOGCATEGORY_SERVE_STALE, + NS_LOGMODULE_QUERY, ISC_LOG_INFO, + "%s resolver failure, stale answer %s", + namebuf, + success ? "used" : "unavailable"); + } else { + isc_log_write(ns_lctx, NS_LOGCATEGORY_SERVE_STALE, + NS_LOGMODULE_QUERY, ISC_LOG_INFO, + "%s query within stale refresh time, " + "stale answer %s", + namebuf, + success ? "used" : "unavailable"); + } if (!success) { QUERY_ERROR(qctx, DNS_R_SERVFAIL); @@ -6833,7 +6880,6 @@ query_usestale(query_ctx_t *qctx) { if (staleanswersok) { qctx->client->query.dboptions |= DNS_DBFIND_STALEOK; - inc_stats(qctx->client, ns_statscounter_trystale); if (qctx->client->query.fetch != NULL) { dns_resolver_destroyfetch(&qctx->client->query.fetch); } From 5e47a13fd05b6e146ad5a89f2f23701d950a608d Mon Sep 17 00:00:00 2001 From: Diego Fronza Date: Thu, 5 Nov 2020 13:07:47 -0300 Subject: [PATCH 02/12] Warn if 'stale-refresh-time' < 30 (default) RFC 8767 recommends that attempts to refresh to be done no more frequently than every 30 seconds. Added check into named-checkconf, which will warn if values below the default are found in configuration. BIND will also log the warning during loading of configuration in the same fashion. --- .../checkconf/servestale.stale-refresh-time.0.conf | 14 ++++++++++++++ .../servestale.stale-refresh-time.29.conf | 14 ++++++++++++++ bin/tests/system/checkconf/tests.sh | 13 +++++++++++++ lib/bind9/check.c | 11 +++++++++++ 4 files changed, 52 insertions(+) create mode 100644 bin/tests/system/checkconf/servestale.stale-refresh-time.0.conf create mode 100644 bin/tests/system/checkconf/servestale.stale-refresh-time.29.conf diff --git a/bin/tests/system/checkconf/servestale.stale-refresh-time.0.conf b/bin/tests/system/checkconf/servestale.stale-refresh-time.0.conf new file mode 100644 index 0000000000..2e58140c2a --- /dev/null +++ b/bin/tests/system/checkconf/servestale.stale-refresh-time.0.conf @@ -0,0 +1,14 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +options { + stale-refresh-time 0; +}; diff --git a/bin/tests/system/checkconf/servestale.stale-refresh-time.29.conf b/bin/tests/system/checkconf/servestale.stale-refresh-time.29.conf new file mode 100644 index 0000000000..92fe8dcf8c --- /dev/null +++ b/bin/tests/system/checkconf/servestale.stale-refresh-time.29.conf @@ -0,0 +1,14 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +options { + stale-refresh-time 29; +}; diff --git a/bin/tests/system/checkconf/tests.sh b/bin/tests/system/checkconf/tests.sh index d8e6db714e..244c226469 100644 --- a/bin/tests/system/checkconf/tests.sh +++ b/bin/tests/system/checkconf/tests.sh @@ -139,6 +139,19 @@ grep '.*' < checkconf.out$n.2 > /dev/null && ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` +n=`expr $n + 1` +echo_i "checking named-checkconf servestale warnings ($n)" +ret=0 +$CHECKCONF servestale.stale-refresh-time.0.conf > checkconf.out$n.1 2>&1 +grep "'stale-refresh-time' should either be 0 or otherwise 30 seconds or higher" < checkconf.out$n.1 > /dev/null && ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=`expr $status + $ret` +ret=0 +$CHECKCONF servestale.stale-refresh-time.29.conf > checkconf.out$n.1 2>&1 +grep "'stale-refresh-time' should either be 0 or otherwise 30 seconds or higher" < checkconf.out$n.1 > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=`expr $status + $ret` + n=`expr $n + 1` echo_i "range checking fields that do not allow zero ($n)" ret=0 diff --git a/lib/bind9/check.c b/lib/bind9/check.c index fde3ec12bb..e1b986bc14 100644 --- a/lib/bind9/check.c +++ b/lib/bind9/check.c @@ -1662,6 +1662,17 @@ check_options(const cfg_obj_t *options, isc_log_t *logctx, isc_mem_t *mctx, } } + obj = NULL; + (void)cfg_map_get(options, "stale-refresh-time", &obj); + if (obj != NULL) { + uint32_t refresh_time = cfg_obj_asduration(obj); + if (refresh_time > 0 && refresh_time < 30) { + cfg_obj_log(obj, logctx, ISC_LOG_WARNING, + "'stale-refresh-time' should either be 0 " + "or otherwise 30 seconds or higher"); + } + } + return (result); } From fc074f15a828dcb572dc1f75d022918224569a35 Mon Sep 17 00:00:00 2001 From: Diego Fronza Date: Mon, 19 Oct 2020 21:24:38 -0300 Subject: [PATCH 03/12] Adjusted ancient rrset system test Before the stale-refresh-time feature, the system test for ancient rrset was somewhat based on the average time the previous tests and queries were taking, thus not very precise. After the addition of stale-refresh-time the system test for ancient rrset started to fail since the queries for stale records (low max-stale-ttl) were not taking the time to do a full resolution anymore, since the answers now were coming from the cache (because the rrset were stale and within stale-refresh-time window after the previous resolution failure). To handle this, the correct time to wait before rrset become ancient is calculated from max-stale-ttl configuration plus the TTL set in the rrset used in the tests (ans2/ans.pl). Then before sending queries for ancient rrset, we check if we need to sleep enough to ensure those rrset will be marked as ancient. --- bin/tests/system/serve-stale/tests.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/bin/tests/system/serve-stale/tests.sh b/bin/tests/system/serve-stale/tests.sh index 518396bde4..e5bfcbb018 100755 --- a/bin/tests/system/serve-stale/tests.sh +++ b/bin/tests/system/serve-stale/tests.sh @@ -471,6 +471,10 @@ grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) +# keep track of time so we can access these rrset later, +# when we expect them to become ancient. +t1=`$PERL -e 'print time()'` + n=$((n+1)) echo_i "prime cache othertype.example (low max-stale-ttl) ($n)" ret=0 @@ -594,6 +598,20 @@ grep "1 #NXDOMAIN" ns1/named.stats.$n.cachedb > /dev/null || ret=1 status=$((status+ret)) if [ $ret != 0 ]; then echo_i "failed"; fi +# retrieve max-stale-ttl value, +interval_to_ancient=`grep 'max-stale-ttl' ns1/named2.conf.in | awk '{ print $2 }' | tr -d ';'` +# we add 2 seconds to it since this is the ttl value of the records being tested. +interval_to_ancient=$((interval_to_ancient + 2)) +t2=`$PERL -e 'print time()'` +elapsed=$((t2 - t1)) + +# if elapsed time so far is less than max-stale-ttl + 2 seconds, +# then we sleep enough to ensure that we'll ask for ancient rrsets +# in the next queries. +if [ $elapsed -lt $interval_to_ancient ]; then + sleep $((interval_to_ancient - elapsed)) +fi + echo_i "sending queries for tests $((n+1))-$((n+4))..." $DIG -p ${PORT} @10.53.0.1 data.example TXT > dig.out.test$((n+1)) & $DIG -p ${PORT} @10.53.0.1 othertype.example CAA > dig.out.test$((n+2)) & From a3dbc5fb05332358dfd6cab997d5dda69caefd7b Mon Sep 17 00:00:00 2001 From: Diego Fronza Date: Mon, 19 Oct 2020 21:25:34 -0300 Subject: [PATCH 04/12] Added system test for stale-refresh-time This test works as follow: - Query for data.example rrset. - Sleep until its TTL expires (2 secs). - Disable authoritative server. - Query for data.example again. - Since server is down, answer come from stale cache, which has a configured stale-answer-ttl of 3 seconds. - Enable authoritative server. - Query for data.example again - Since last query before activating authoritative server failed, and since 'stale-refresh-time' seconds hasn't elapsed yet, answer should come from stale cache and not from the authoritative server. --- bin/tests/system/serve-stale/tests.sh | 102 +++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 4 deletions(-) diff --git a/bin/tests/system/serve-stale/tests.sh b/bin/tests/system/serve-stale/tests.sh index e5bfcbb018..6a774c52ed 100755 --- a/bin/tests/system/serve-stale/tests.sh +++ b/bin/tests/system/serve-stale/tests.sh @@ -471,10 +471,6 @@ grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) -# keep track of time so we can access these rrset later, -# when we expect them to become ancient. -t1=`$PERL -e 'print time()'` - n=$((n+1)) echo_i "prime cache othertype.example (low max-stale-ttl) ($n)" ret=0 @@ -502,6 +498,10 @@ grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) +# keep track of time so we can access these rrset later, +# when we expect them to become ancient. +t1=`$PERL -e 'print time()'` + n=$((n+1)) echo_i "verify prime cache statistics (low max-stale-ttl) ($n)" ret=0 @@ -656,6 +656,100 @@ grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) +# Test stale-refresh-time when serve-stale is enabled via rndc. +# Steps for testing stale-refresh-time option (default). +# 1. Prime cache data.example txt +# 2. Disable responses from authoritative server. +# 3. Sleep for TTL duration so rrset TTL expires (2 sec) +# 4. Query data.example +# 5. Check if response come from stale rrset (3 sec TTL) +# 6. Enable responses from authoritative server. +# 7. Query data.example +# 8. Check if response come from stale rrset, since the query +# is within stale-refresh-time window. +n=$((n+1)) +echo_i "flush cache, enable responses from authoritative server ($n)" +ret=0 +$RNDCCMD 10.53.0.1 flushtree example > rndc.out.test$n.1 2>&1 || ret=1 +$DIG -p ${PORT} @10.53.0.2 txt enable > dig.out.test$n +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "TXT.\"1\"" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +n=$((n+1)) +echo_i "check 'rndc serve-stale status' ($n)" +ret=0 +$RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 +grep '_default: on (rndc) (stale-answer-ttl=3 max-stale-ttl=20)' rndc.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Step 1. +n=$((n+1)) +echo_i "prime cache data.example (stale-refresh-time rndc) ($n)" +ret=0 +$DIG -p ${PORT} @10.53.0.1 data.example TXT > dig.out.test$n +grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "data\.example\..*2.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Step 2. +n=$((n+1)) +echo_i "disable responses from authoritative server ($n)" +ret=0 +$DIG -p ${PORT} @10.53.0.2 txt disable > dig.out.test$n +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "TXT.\"0\"" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Step 3. +sleep 2 + +# Step 4. +n=$((n+1)) +echo_i "sending query for test ($n)" +$DIG -p ${PORT} @10.53.0.1 data.example TXT > dig.out.test$n + +# Step 5. +echo_i "check stale data.example (stale-refresh-time rndc) ($n)" +ret=0 +grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "data\.example\..*3.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Step 6. +n=$((n+1)) +echo_i "enable responses from authoritative server ($n)" +ret=0 +$DIG -p ${PORT} @10.53.0.2 txt enable > dig.out.test$n +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "TXT.\"1\"" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Step 7. +echo_i "sending query for test $((n+1))" +$DIG -p ${PORT} @10.53.0.1 data.example TXT > dig.out.test$((n+1)) & + +# ensure the file has been written before proceeding +waitfile dig.out.test$((n+1)) + +# Step 8. +n=$((n+1)) +echo_i "check stale data.example comes from cache (stale-refresh-time rndc) ($n)" +ret=0 +grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "data\.example\..*3.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + # # Now test server with no serve-stale options set. # From cc70ea860b879afd769fa9728a44fc42fffc1a9b Mon Sep 17 00:00:00 2001 From: Diego Fronza Date: Tue, 20 Oct 2020 16:07:56 -0300 Subject: [PATCH 05/12] Wait for multiple parallel dig commands to fully finish The strategy of running many dig commands in parallel and waiting for the respective output files to be non empty was resulting in random test failures, hard to reproduce, where it was possible that the subsequent reading of the files could have been failing due to the file's content not being fully flushed. Instead of checking if output files are non empty, we now wait for the dig processes to finish. --- bin/tests/system/serve-stale/tests.sh | 74 ++++----------------------- 1 file changed, 11 insertions(+), 63 deletions(-) diff --git a/bin/tests/system/serve-stale/tests.sh b/bin/tests/system/serve-stale/tests.sh index 6a774c52ed..d312c056b6 100755 --- a/bin/tests/system/serve-stale/tests.sh +++ b/bin/tests/system/serve-stale/tests.sh @@ -12,15 +12,6 @@ . ../conf.sh RNDCCMD="$RNDC -c ../common/rndc.conf -p ${CONTROLPORT} -s" - -# wait up to 11 seconds to ensure that a file has been written -waitfile () { - for try in 0 1 2 3 4 5 6 7 8 9 10; do - [ -s "$1" ] && break - sleep 1 - done -} - DIG="$DIG +time=11" max_stale_ttl=$(sed -ne 's,^[[:space:]]*max-stale-ttl \([[:digit:]]*\).*,\1,p' $TOP_SRCDIR/bin/named/config.c) @@ -120,11 +111,7 @@ $DIG -p ${PORT} @10.53.0.1 othertype.example CAA > dig.out.test$((n+2)) & $DIG -p ${PORT} @10.53.0.1 nodata.example TXT > dig.out.test$((n+3)) & $DIG -p ${PORT} @10.53.0.1 nxdomain.example TXT > dig.out.test$((n+4)) -# ensure all files have been written before proceeding -waitfile dig.out.test$((n+1)) -waitfile dig.out.test$((n+2)) -waitfile dig.out.test$((n+3)) -waitfile dig.out.test$((n+4)) +wait n=$((n+1)) echo_i "check stale data.example ($n)" @@ -215,11 +202,7 @@ $DIG -p ${PORT} @10.53.0.1 othertype.example CAA > dig.out.test$((n+2)) & $DIG -p ${PORT} @10.53.0.1 nodata.example TXT > dig.out.test$((n+3)) & $DIG -p ${PORT} @10.53.0.1 nxdomain.example TXT > dig.out.test$((n+4)) -# ensure all files have been written before proceeding -waitfile dig.out.test$((n+1)) -waitfile dig.out.test$((n+2)) -waitfile dig.out.test$((n+3)) -waitfile dig.out.test$((n+4)) +wait n=$((n+1)) echo_i "check stale data.example (serve-stale off) ($n)" @@ -270,11 +253,7 @@ $DIG -p ${PORT} @10.53.0.1 othertype.example CAA > dig.out.test$((n+2)) & $DIG -p ${PORT} @10.53.0.1 nodata.example TXT > dig.out.test$((n+3)) & $DIG -p ${PORT} @10.53.0.1 nxdomain.example TXT > dig.out.test$((n+4)) -# ensure all files have been written before proceeding -waitfile dig.out.test$((n+1)) -waitfile dig.out.test$((n+2)) -waitfile dig.out.test$((n+3)) -waitfile dig.out.test$((n+4)) +wait n=$((n+1)) echo_i "check stale data.example (serve-stale on) ($n)" @@ -340,11 +319,7 @@ $DIG -p ${PORT} @10.53.0.1 othertype.example CAA > dig.out.test$((n+2)) & $DIG -p ${PORT} @10.53.0.1 nodata.example TXT > dig.out.test$((n+3)) & $DIG -p ${PORT} @10.53.0.1 nxdomain.example TXT > dig.out.test$((n+4)) -# ensure all files have been written before proceeding -waitfile dig.out.test$((n+1)) -waitfile dig.out.test$((n+2)) -waitfile dig.out.test$((n+3)) -waitfile dig.out.test$((n+4)) +wait n=$((n+1)) echo_i "check stale data.example (serve-stale reset) ($n)" @@ -536,11 +511,7 @@ $DIG -p ${PORT} @10.53.0.1 othertype.example CAA > dig.out.test$((n+2)) & $DIG -p ${PORT} @10.53.0.1 nodata.example TXT > dig.out.test$((n+3)) & $DIG -p ${PORT} @10.53.0.1 nxdomain.example TXT > dig.out.test$((n+4)) -# ensure all files have been written before proceeding -waitfile dig.out.test$((n+1)) -waitfile dig.out.test$((n+2)) -waitfile dig.out.test$((n+3)) -waitfile dig.out.test$((n+4)) +wait n=$((n+1)) echo_i "check stale data.example (low max-stale-ttl) ($n)" @@ -618,11 +589,7 @@ $DIG -p ${PORT} @10.53.0.1 othertype.example CAA > dig.out.test$((n+2)) & $DIG -p ${PORT} @10.53.0.1 nodata.example TXT > dig.out.test$((n+3)) & $DIG -p ${PORT} @10.53.0.1 nxdomain.example TXT > dig.out.test$((n+4)) -# ensure all files have been written before proceeding -waitfile dig.out.test$((n+1)) -waitfile dig.out.test$((n+2)) -waitfile dig.out.test$((n+3)) -waitfile dig.out.test$((n+4)) +wait n=$((n+1)) echo_i "check ancient data.example (low max-stale-ttl) ($n)" @@ -735,10 +702,7 @@ status=$((status+ret)) # Step 7. echo_i "sending query for test $((n+1))" -$DIG -p ${PORT} @10.53.0.1 data.example TXT > dig.out.test$((n+1)) & - -# ensure the file has been written before proceeding -waitfile dig.out.test$((n+1)) +$DIG -p ${PORT} @10.53.0.1 data.example TXT > dig.out.test$((n+1)) # Step 8. n=$((n+1)) @@ -855,11 +819,7 @@ $DIG -p ${PORT} @10.53.0.3 othertype.example CAA > dig.out.test$((n+2)) & $DIG -p ${PORT} @10.53.0.3 nodata.example TXT > dig.out.test$((n+3)) & $DIG -p ${PORT} @10.53.0.3 nxdomain.example TXT > dig.out.test$((n+4)) -# ensure all files have been written before proceeding -waitfile dig.out.test$((n+1)) -waitfile dig.out.test$((n+2)) -waitfile dig.out.test$((n+3)) -waitfile dig.out.test$((n+4)) +wait n=$((n+1)) echo_i "check fail of data.example (max-stale-ttl default) ($n)" @@ -936,11 +896,7 @@ $DIG -p ${PORT} @10.53.0.3 othertype.example CAA > dig.out.test$((n+2)) & $DIG -p ${PORT} @10.53.0.3 nodata.example TXT > dig.out.test$((n+3)) & $DIG -p ${PORT} @10.53.0.3 nxdomain.example TXT > dig.out.test$((n+4)) -# ensure all files have been written before proceeding -waitfile dig.out.test$((n+1)) -waitfile dig.out.test$((n+2)) -waitfile dig.out.test$((n+3)) -waitfile dig.out.test$((n+4)) +wait n=$((n+1)) echo_i "check data.example (max-stale-ttl default) ($n)" @@ -1083,11 +1039,7 @@ $DIG -p ${PORT} @10.53.0.4 othertype.example CAA > dig.out.test$((n+2)) & $DIG -p ${PORT} @10.53.0.4 nodata.example TXT > dig.out.test$((n+3)) & $DIG -p ${PORT} @10.53.0.4 nxdomain.example TXT > dig.out.test$((n+4)) -# ensure all files have been written before proceeding -waitfile dig.out.test$((n+1)) -waitfile dig.out.test$((n+2)) -waitfile dig.out.test$((n+3)) -waitfile dig.out.test$((n+4)) +wait n=$((n+1)) echo_i "check fail of data.example (serve-stale answers disabled) ($n)" @@ -1293,11 +1245,7 @@ $DIG -p ${PORT} @10.53.0.5 othertype.example CAA > dig.out.test$((n+2)) & $DIG -p ${PORT} @10.53.0.5 nodata.example TXT > dig.out.test$((n+3)) & $DIG -p ${PORT} @10.53.0.5 nxdomain.example TXT > dig.out.test$((n+4)) -# ensure all files have been written before proceeding -waitfile dig.out.test$((n+1)) -waitfile dig.out.test$((n+2)) -waitfile dig.out.test$((n+3)) -waitfile dig.out.test$((n+4)) +wait n=$((n+1)) echo_i "check fail of data.example (serve-stale cache disabled) ($n)" From dee778de12bdc24a55cba669ddbd6115dd702041 Mon Sep 17 00:00:00 2001 From: Matthijs Mekking Date: Tue, 10 Nov 2020 14:48:24 +0100 Subject: [PATCH 06/12] Change serve-stale test stale-answer-ttl Using a 'stale-answer-ttl' the same value as the authoritative ttl value makes it hard to differentiate between a response from the stale cache and a response from the authoritative server. Change the stale-answer-ttl from 2 to 4, so that it differs from the authoritative ttl. --- .../system/serve-stale/ns1/named1.conf.in | 2 +- bin/tests/system/serve-stale/tests.sh | 37 ++++++++++--------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/bin/tests/system/serve-stale/ns1/named1.conf.in b/bin/tests/system/serve-stale/ns1/named1.conf.in index 6586849d85..1ef85afad8 100644 --- a/bin/tests/system/serve-stale/ns1/named1.conf.in +++ b/bin/tests/system/serve-stale/ns1/named1.conf.in @@ -28,7 +28,7 @@ options { listen-on-v6 { none; }; recursion yes; max-stale-ttl 3600; - stale-answer-ttl 2; + stale-answer-ttl 4; stale-answer-enable yes; stale-cache-enable yes; servfail-ttl 0; diff --git a/bin/tests/system/serve-stale/tests.sh b/bin/tests/system/serve-stale/tests.sh index d312c056b6..d1f7f828ab 100755 --- a/bin/tests/system/serve-stale/tests.sh +++ b/bin/tests/system/serve-stale/tests.sh @@ -99,7 +99,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep '_default: on (stale-answer-ttl=2 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 +grep '_default: on (stale-answer-ttl=4 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -118,7 +118,7 @@ echo_i "check stale data.example ($n)" ret=0 grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 -grep "data\.example\..*2.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1 +grep "data\.example\..*4.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -140,7 +140,7 @@ echo_i "check stale othertype.example ($n)" ret=0 grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 -grep "othertype\.example\..*2.*IN.*CAA.*0.*issue" dig.out.test$n > /dev/null || ret=1 +grep "othertype\.example\..*4.*IN.*CAA.*0.*issue" dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -149,7 +149,7 @@ echo_i "check stale nodata.example ($n)" ret=0 grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1 -grep "example\..*2.*IN.*SOA" dig.out.test$n > /dev/null || ret=1 +grep "example\..*4.*IN.*SOA" dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -158,7 +158,7 @@ echo_i "check stale nxdomain.example ($n)" ret=0 grep "status: NXDOMAIN" dig.out.test$n > /dev/null || ret=1 grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1 -grep "example\..*2.*IN.*SOA" dig.out.test$n > /dev/null || ret=1 +grep "example\..*4.*IN.*SOA" dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -192,7 +192,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep '_default: off (rndc) (stale-answer-ttl=2 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 +grep '_default: off (rndc) (stale-answer-ttl=4 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -232,6 +232,9 @@ grep "status: SERVFAIL" dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) +# +# Test enabling serve-stale via rndc. +# n=$((n+1)) echo_i "running 'rndc serve-stale on' ($n)" ret=0 @@ -243,7 +246,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep '_default: on (rndc) (stale-answer-ttl=2 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 +grep '_default: on (rndc) (stale-answer-ttl=4 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -260,7 +263,7 @@ echo_i "check stale data.example (serve-stale on) ($n)" ret=0 grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 -grep "data\.example\..*2.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1 +grep "data\.example\..*4.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -269,7 +272,7 @@ echo_i "check stale othertype.example (serve-stale on) ($n)" ret=0 grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 -grep "othertype\.example\..*2.*IN.*CAA.*0.*issue" dig.out.test$n > /dev/null || ret=1 +grep "othertype\.example\..*4.*IN.*CAA.*0.*issue" dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -278,7 +281,7 @@ echo_i "check stale nodata.example (serve-stale on) ($n)" ret=0 grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1 -grep "example\..*2.*IN.*SOA" dig.out.test$n > /dev/null || ret=1 +grep "example\..*4.*IN.*SOA" dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -287,7 +290,7 @@ echo_i "check stale nxdomain.example (serve-stale on) ($n)" ret=0 grep "status: NXDOMAIN" dig.out.test$n > /dev/null || ret=1 grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1 -grep "example\..*2.*IN.*SOA" dig.out.test$n > /dev/null || ret=1 +grep "example\..*4.*IN.*SOA" dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -309,7 +312,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep '_default: on (stale-answer-ttl=2 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 +grep '_default: on (stale-answer-ttl=4 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -326,7 +329,7 @@ echo_i "check stale data.example (serve-stale reset) ($n)" ret=0 grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 -grep "data\.example\..*2.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1 +grep "data\.example\..*4.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -335,7 +338,7 @@ echo_i "check stale othertype.example (serve-stale reset) ($n)" ret=0 grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 -grep "othertype.example\..*2.*IN.*CAA.*0.*issue" dig.out.test$n > /dev/null || ret=1 +grep "othertype.example\..*4.*IN.*CAA.*0.*issue" dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -344,7 +347,7 @@ echo_i "check stale nodata.example (serve-stale reset) ($n)" ret=0 grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1 -grep "example\..*2.*IN.*SOA" dig.out.test$n > /dev/null || ret=1 +grep "example\..*4.*IN.*SOA" dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -353,7 +356,7 @@ echo_i "check stale nxdomain.example (serve-stale reset) ($n)" ret=0 grep "status: NXDOMAIN" dig.out.test$n > /dev/null || ret=1 grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1 -grep "example\..*2.*IN.*SOA" dig.out.test$n > /dev/null || ret=1 +grep "example\..*4.*IN.*SOA" dig.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -368,7 +371,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep '_default: off (rndc) (stale-answer-ttl=2 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 +grep '_default: off (rndc) (stale-answer-ttl=4 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) From e99671e8ddbdc35183bb4e55da94fbac414c489f Mon Sep 17 00:00:00 2001 From: Matthijs Mekking Date: Tue, 10 Nov 2020 14:55:18 +0100 Subject: [PATCH 07/12] Add two more system tests for stale-refresh-time Add one test that checks the behavior when serve-stale is enabled via configuration (as opposed to enabled via rndc). Add one test that checks the behavior when stale-refresh-time is disabled (set to 0). --- .../system/serve-stale/ns1/named1.conf.in | 1 + .../system/serve-stale/ns1/named3.conf.in | 41 ++++ bin/tests/system/serve-stale/tests.sh | 175 ++++++++++++++++++ 3 files changed, 217 insertions(+) create mode 100644 bin/tests/system/serve-stale/ns1/named3.conf.in diff --git a/bin/tests/system/serve-stale/ns1/named1.conf.in b/bin/tests/system/serve-stale/ns1/named1.conf.in index 1ef85afad8..41347871cd 100644 --- a/bin/tests/system/serve-stale/ns1/named1.conf.in +++ b/bin/tests/system/serve-stale/ns1/named1.conf.in @@ -31,6 +31,7 @@ options { stale-answer-ttl 4; stale-answer-enable yes; stale-cache-enable yes; + stale-refresh-time 30; servfail-ttl 0; }; diff --git a/bin/tests/system/serve-stale/ns1/named3.conf.in b/bin/tests/system/serve-stale/ns1/named3.conf.in new file mode 100644 index 0000000000..f97dea958d --- /dev/null +++ b/bin/tests/system/serve-stale/ns1/named3.conf.in @@ -0,0 +1,41 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +key rndc_key { + secret "1234abcd8765"; + algorithm hmac-sha256; +}; + +controls { + inet 10.53.0.1 port @CONTROLPORT@ allow { any; } keys { rndc_key; }; +}; + +options { + query-source address 10.53.0.1; + notify-source 10.53.0.1; + transfer-source 10.53.0.1; + port @PORT@; + pid-file "named.pid"; + listen-on { 10.53.0.1; }; + listen-on-v6 { none; }; + recursion yes; + max-stale-ttl 20; + stale-answer-ttl 3; + stale-answer-enable yes; + stale-cache-enable yes; + stale-refresh-time 0; + servfail-ttl 0; +}; + +zone "." { + type primary; + file "root.db"; +}; diff --git a/bin/tests/system/serve-stale/tests.sh b/bin/tests/system/serve-stale/tests.sh index d1f7f828ab..c0098541bc 100755 --- a/bin/tests/system/serve-stale/tests.sh +++ b/bin/tests/system/serve-stale/tests.sh @@ -181,6 +181,77 @@ grep "1 #NXDOMAIN" ns1/named.stats.$n.cachedb > /dev/null || ret=1 status=$((status+ret)) if [ $ret != 0 ]; then echo_i "failed"; fi +# Test stale-refresh-time when serve-stale is enabled via configuration. +# Steps for testing stale-refresh-time option (default). +# 1. Prime cache data.example txt +# 2. Disable responses from authoritative server. +# 3. Sleep for TTL duration so rrset TTL expires (2 sec) +# 4. Query data.example +# 5. Check if response come from stale rrset (3 sec TTL) +# 6. Enable responses from authoritative server. +# 7. Query data.example +# 8. Check if response come from stale rrset, since the query +# is within stale-refresh-time window. +n=$((n+1)) +echo_i "check 'rndc serve-stale status' ($n)" +ret=0 +$RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 +grep '_default: on (stale-answer-ttl=4 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Step 1-3 done above. + +# Step 4. +n=$((n+1)) +echo_i "sending query for test ($n)" +$DIG -p ${PORT} @10.53.0.1 data.example TXT > dig.out.test$n + +# Step 5. +echo_i "check stale data.example (stale-refresh-time) ($n)" +ret=0 +grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "data\.example\..*4.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Step 6. +n=$((n+1)) +echo_i "enable responses from authoritative server ($n)" +ret=0 +$DIG -p ${PORT} @10.53.0.2 txt enable > dig.out.test$n +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "TXT.\"1\"" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Step 7. +echo_i "sending query for test $((n+1))" +$DIG -p ${PORT} @10.53.0.1 data.example TXT > dig.out.test$((n+1)) + +# Step 8. +n=$((n+1)) +echo_i "check stale data.example comes from cache (stale-refresh-time) ($n)" +ret=0 +grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "data\.example\..*4.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# +# Test disabling serve-stale via rndc. +# +n=$((n+1)) +echo_i "disable responses from authoritative server ($n)" +ret=0 +$DIG -p ${PORT} @10.53.0.2 txt disable > dig.out.test$n +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "TXT.\"0\"" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + n=$((n+1)) echo_i "running 'rndc serve-stale off' ($n)" ret=0 @@ -717,6 +788,110 @@ grep "data\.example\..*3.*IN.*TXT.*A text record with a 2 second ttl" dig.out.te if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) +# Steps for testing stale-refresh-time option (disabled). +# 1. Prime cache data.example txt +# 2. Disable responses from authoritative server. +# 3. Sleep for TTL duration so rrset TTL expires (2 sec) +# 4. Query data.example +# 5. Check if response come from stale rrset (3 sec TTL) +# 6. Enable responses from authoritative server. +# 7. Query data.example +# 8. Check if response come from stale rrset, since the query +# is within stale-refresh-time window. +n=$((n+1)) +echo_i "updating ns1/named.conf ($n)" +ret=0 +copy_setports ns1/named3.conf.in ns1/named.conf +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +n=$((n+1)) +echo_i "running 'rndc reload' ($n)" +ret=0 +rndc_reload ns1 10.53.0.1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +n=$((n+1)) +echo_i "check 'rndc serve-stale status' ($n)" +ret=0 +$RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 +grep '_default: on (rndc) (stale-answer-ttl=3 max-stale-ttl=20)' rndc.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +n=$((n+1)) +echo_i "flush cache, enable responses from authoritative server ($n)" +ret=0 +$RNDCCMD 10.53.0.1 flushtree example > rndc.out.test$n.1 2>&1 || ret=1 +$DIG -p ${PORT} @10.53.0.2 txt enable > dig.out.test$n +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "TXT.\"1\"" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Step 1. +n=$((n+1)) +echo_i "prime cache data.example (stale-refresh-time disabled) ($n)" +ret=0 +$DIG -p ${PORT} @10.53.0.1 data.example TXT > dig.out.test$n +grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "data\.example\..*2.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Step 2. +n=$((n+1)) +echo_i "disable responses from authoritative server ($n)" +ret=0 +$DIG -p ${PORT} @10.53.0.2 txt disable > dig.out.test$n +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "TXT.\"0\"" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Step 3. +sleep 2 + +# Step 4. +n=$((n+1)) +echo_i "sending query for test ($n)" +$DIG -p ${PORT} @10.53.0.1 data.example TXT > dig.out.test$n + +# Step 5. +echo_i "check stale data.example (stale-refresh-time disabled) ($n)" +ret=0 +grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "data\.example\..*3.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Step 6. +n=$((n+1)) +echo_i "enable responses from authoritative server ($n)" +ret=0 +$DIG -p ${PORT} @10.53.0.2 txt enable > dig.out.test$n +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "TXT.\"1\"" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Step 7. +echo_i "sending query for test $((n+1))" +$DIG -p ${PORT} @10.53.0.1 data.example TXT > dig.out.test$((n+1)) + +# Step 8. +n=$((n+1)) +echo_i "check stale data.example comes from authoritative (stale-refresh-time disabled) ($n)" +ret=0 +grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "data\.example\..*2.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + # # Now test server with no serve-stale options set. # From 581e2a8f283d24f0f34f5b873fd960a3f84995de Mon Sep 17 00:00:00 2001 From: Diego Fronza Date: Tue, 10 Nov 2020 13:50:54 -0300 Subject: [PATCH 08/12] Check 'stale-refresh-time' when sharing cache between views This commit ensures that, along with previous restrictions, a cache is shareable between views only if their 'stale-refresh-time' value are equal. --- bin/named/server.c | 5 +++-- lib/dns/cache.c | 18 +++++++++++++++++- lib/dns/include/dns/cache.h | 12 +++++++++++- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/bin/named/server.c b/bin/named/server.c index f83473c1b2..17889f9d49 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -1869,7 +1869,7 @@ cache_reusable(dns_view_t *originview, dns_view_t *view, static bool cache_sharable(dns_view_t *originview, dns_view_t *view, bool new_zero_no_soattl, uint64_t new_max_cache_size, - uint32_t new_stale_ttl) { + uint32_t new_stale_ttl, uint32_t new_stale_refresh_time) { /* * If the cache cannot even reused for the same view, it cannot be * shared with other views. @@ -1883,6 +1883,7 @@ cache_sharable(dns_view_t *originview, dns_view_t *view, * the sharing views. */ if (dns_cache_getservestalettl(originview->cache) != new_stale_ttl || + dns_cache_getservestalerefresh(originview->cache) != new_stale_refresh_time || dns_cache_getcachesize(originview->cache) != new_max_cache_size) { return (false); @@ -4435,7 +4436,7 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config, nsc = cachelist_find(cachelist, cachename, view->rdclass); if (nsc != NULL) { if (!cache_sharable(nsc->primaryview, view, zero_no_soattl, - max_cache_size, max_stale_ttl)) + max_cache_size, max_stale_ttl, stale_refresh_time)) { isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL, NAMED_LOGMODULE_SERVER, ISC_LOG_ERROR, diff --git a/lib/dns/cache.c b/lib/dns/cache.c index 873b825f2e..931340fe53 100644 --- a/lib/dns/cache.c +++ b/lib/dns/cache.c @@ -142,6 +142,7 @@ struct dns_cache { char **db_argv; size_t size; dns_ttl_t serve_stale_ttl; + dns_ttl_t serve_stale_refresh; isc_stats_t *stats; /* Locked by 'filelock'. */ @@ -1000,12 +1001,27 @@ dns_cache_getservestalettl(dns_cache_t *cache) { } void -dns_cache_setservestalerefresh(dns_cache_t *cache, uint32_t interval) { +dns_cache_setservestalerefresh(dns_cache_t *cache, dns_ttl_t interval) { REQUIRE(VALID_CACHE(cache)); + LOCK(&cache->lock); + cache->serve_stale_refresh = interval; + UNLOCK(&cache->lock); + (void)dns_db_setservestalerefresh(cache->db, interval); } +dns_ttl_t +dns_cache_getservestalerefresh(dns_cache_t *cache) { + isc_result_t result; + dns_ttl_t interval; + + REQUIRE(VALID_CACHE(cache)); + + result = dns_db_getservestalerefresh(cache->db, &interval); + return (result == ISC_R_SUCCESS ? interval : 0); +} + /* * The cleaner task is shutting down; do the necessary cleanup. */ diff --git a/lib/dns/include/dns/cache.h b/lib/dns/include/dns/cache.h index 0474d78dbd..22e94da9d5 100644 --- a/lib/dns/include/dns/cache.h +++ b/lib/dns/include/dns/cache.h @@ -256,7 +256,7 @@ dns_cache_getservestalettl(dns_cache_t *cache); */ void -dns_cache_setservestalerefresh(dns_cache_t *cache, uint32_t interval); +dns_cache_setservestalerefresh(dns_cache_t *cache, dns_ttl_t interval); /*%< * Sets the length of time to wait before attempting to refresh a rrset * if a previous attempt in doing so has failed. @@ -267,6 +267,16 @@ dns_cache_setservestalerefresh(dns_cache_t *cache, uint32_t interval); *\li 'cache' to be valid. */ +dns_ttl_t +dns_cache_getservestalerefresh(dns_cache_t *cache); +/*%< + * Gets the 'stale-refresh-time' value, set by a previous call to + * 'dns_cache_setservestalerefresh'. + * + * Requires: + *\li 'cache' to be valid. + */ + isc_result_t dns_cache_flush(dns_cache_t *cache); /*%< From d4142d2bed40fd9b0acff978d2446fe55865eeef Mon Sep 17 00:00:00 2001 From: Diego Fronza Date: Tue, 10 Nov 2020 14:30:01 -0300 Subject: [PATCH 09/12] Output 'stale-refresh-time' value on rndc serve-stale status --- bin/named/server.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/bin/named/server.c b/bin/named/server.c index 17889f9d49..7964cd20fa 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -1883,7 +1883,8 @@ cache_sharable(dns_view_t *originview, dns_view_t *view, * the sharing views. */ if (dns_cache_getservestalettl(originview->cache) != new_stale_ttl || - dns_cache_getservestalerefresh(originview->cache) != new_stale_refresh_time || + dns_cache_getservestalerefresh(originview->cache) != + new_stale_refresh_time || dns_cache_getcachesize(originview->cache) != new_max_cache_size) { return (false); @@ -4436,7 +4437,8 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config, nsc = cachelist_find(cachelist, cachename, view->rdclass); if (nsc != NULL) { if (!cache_sharable(nsc->primaryview, view, zero_no_soattl, - max_cache_size, max_stale_ttl, stale_refresh_time)) + max_cache_size, max_stale_ttl, + stale_refresh_time)) { isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL, NAMED_LOGMODULE_SERVER, ISC_LOG_ERROR, @@ -16170,6 +16172,7 @@ named_server_servestale(named_server_t *server, isc_lex_t *lex, view = ISC_LIST_NEXT(view, link)) { dns_ttl_t stale_ttl = 0; + uint32_t stale_refresh = 0; dns_db_t *db = NULL; if (classtxt != NULL && rdclass != view->rdclass) { @@ -16189,6 +16192,7 @@ named_server_servestale(named_server_t *server, isc_lex_t *lex, db = NULL; dns_db_attach(view->cachedb, &db); (void)dns_db_getservestalettl(db, &stale_ttl); + (void)dns_db_getservestalerefresh(db, &stale_refresh); dns_db_detach(&db); if (found) { CHECK(putstr(text, "\n")); @@ -16218,8 +16222,10 @@ named_server_servestale(named_server_t *server, isc_lex_t *lex, } if (stale_ttl > 0) { snprintf(msg, sizeof(msg), - " (stale-answer-ttl=%u max-stale-ttl=%u)", - view->staleanswerttl, stale_ttl); + " (stale-answer-ttl=%u max-stale-ttl=%u " + "stale-refresh-time=%u)", + view->staleanswerttl, stale_ttl, + stale_refresh); CHECK(putstr(text, msg)); } found = true; From 8cca23a147dc0590b3d9c56453171fd77d9bf1fe Mon Sep 17 00:00:00 2001 From: Diego Fronza Date: Tue, 10 Nov 2020 18:08:23 -0300 Subject: [PATCH 10/12] Adjusted test to match new rndc serve-stale status output --- bin/tests/system/serve-stale/tests.sh | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/bin/tests/system/serve-stale/tests.sh b/bin/tests/system/serve-stale/tests.sh index c0098541bc..6a1c675a46 100755 --- a/bin/tests/system/serve-stale/tests.sh +++ b/bin/tests/system/serve-stale/tests.sh @@ -99,7 +99,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep '_default: on (stale-answer-ttl=4 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 +grep '_default: on (stale-answer-ttl=4 max-stale-ttl=3600 stale-refresh-time=30)' rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -196,7 +196,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep '_default: on (stale-answer-ttl=4 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 +grep '_default: on (stale-answer-ttl=4 max-stale-ttl=3600 stale-refresh-time=30)' rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -263,7 +263,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep '_default: off (rndc) (stale-answer-ttl=4 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 +grep '_default: off (rndc) (stale-answer-ttl=4 max-stale-ttl=3600 stale-refresh-time=30)' rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -317,7 +317,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep '_default: on (rndc) (stale-answer-ttl=4 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 +grep '_default: on (rndc) (stale-answer-ttl=4 max-stale-ttl=3600 stale-refresh-time=30)' rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -383,7 +383,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep '_default: on (stale-answer-ttl=4 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 +grep '_default: on (stale-answer-ttl=4 max-stale-ttl=3600 stale-refresh-time=30)' rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -442,7 +442,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep '_default: off (rndc) (stale-answer-ttl=4 max-stale-ttl=3600)' rndc.out.test$n > /dev/null || ret=1 +grep '_default: off (rndc) (stale-answer-ttl=4 max-stale-ttl=3600 stale-refresh-time=30)' rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -470,7 +470,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep '_default: off (rndc) (stale-answer-ttl=3 max-stale-ttl=20)' rndc.out.test$n > /dev/null || ret=1 +grep '_default: off (rndc) (stale-answer-ttl=3 max-stale-ttl=20 stale-refresh-time=30)' rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -489,7 +489,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep '_default: on (rndc) (stale-answer-ttl=3 max-stale-ttl=20)' rndc.out.test$n > /dev/null || ret=1 +grep '_default: on (rndc) (stale-answer-ttl=3 max-stale-ttl=20 stale-refresh-time=30)' rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -722,7 +722,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep '_default: on (rndc) (stale-answer-ttl=3 max-stale-ttl=20)' rndc.out.test$n > /dev/null || ret=1 +grep '_default: on (rndc) (stale-answer-ttl=3 max-stale-ttl=20 stale-refresh-time=30)' rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -816,7 +816,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep '_default: on (rndc) (stale-answer-ttl=3 max-stale-ttl=20)' rndc.out.test$n > /dev/null || ret=1 +grep '_default: on (rndc) (stale-answer-ttl=3 max-stale-ttl=20 stale-refresh-time=0)' rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -985,7 +985,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.3 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep "_default: off (stale-answer-ttl=1 max-stale-ttl=$max_stale_ttl)" rndc.out.test$n > /dev/null || ret=1 +grep "_default: off (stale-answer-ttl=1 max-stale-ttl=$max_stale_ttl stale-refresh-time=30)" rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -1062,7 +1062,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.3 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep "_default: on (rndc) (stale-answer-ttl=1 max-stale-ttl=$max_stale_ttl)" rndc.out.test$n > /dev/null || ret=1 +grep "_default: on (rndc) (stale-answer-ttl=1 max-stale-ttl=$max_stale_ttl stale-refresh-time=30)" rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) @@ -1205,7 +1205,7 @@ n=$((n+1)) echo_i "check 'rndc serve-stale status' ($n)" ret=0 $RNDCCMD 10.53.0.4 serve-stale status > rndc.out.test$n 2>&1 || ret=1 -grep "_default: off (stale-answer-ttl=1 max-stale-ttl=$max_stale_ttl)" rndc.out.test$n > /dev/null || ret=1 +grep "_default: off (stale-answer-ttl=1 max-stale-ttl=$max_stale_ttl stale-refresh-time=30)" rndc.out.test$n > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) From b4c997537b37bc824d43863d9058f1037c4c32e4 Mon Sep 17 00:00:00 2001 From: Diego Fronza Date: Wed, 4 Nov 2020 20:02:34 -0300 Subject: [PATCH 11/12] Add CHANGES and release notes entry --- CHANGES | 5 +++++ doc/notes/notes-current.rst | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/CHANGES b/CHANGES index 03542d8df4..8aa352925c 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,8 @@ +5533. [func] Add "stale-refresh-time" option, a time window that + starts after a failed lookup, during which stale rrset + will be served directly from cache before a new + attempt to refresh it is made. [GL #2066] + 5532. [cleanup] Unused header files were removed: bin/rndc/include/rndc/os.h, lib/isc/timer_p.h, lib/isccfg/include/isccfg/dnsconf.h and code related diff --git a/doc/notes/notes-current.rst b/doc/notes/notes-current.rst index a4ce7dfe8c..2a784e95fd 100644 --- a/doc/notes/notes-current.rst +++ b/doc/notes/notes-current.rst @@ -26,6 +26,10 @@ New Features - None. +- A new configuration option ``stale-refresh-time`` has been introduced, it + allows stale RRset to be served directly from cache for a period of time + after a failed lookup, before a new attempt to refresh it is made. [GL #2066] + Removed Features ~~~~~~~~~~~~~~~~ From 1ba2215c29dfe693343b86cb607f313bad3337e3 Mon Sep 17 00:00:00 2001 From: Diego Fronza Date: Wed, 4 Nov 2020 20:02:58 -0300 Subject: [PATCH 12/12] Update ARM and other documents --- bin/named/named.conf.rst | 2 ++ doc/arm/reference.rst | 13 +++++++++++++ doc/man/named.conf.5in | 2 ++ doc/misc/options | 2 ++ doc/misc/options.active | 2 ++ doc/misc/options.grammar.rst | 1 + 6 files changed, 22 insertions(+) diff --git a/bin/named/named.conf.rst b/bin/named/named.conf.rst index 2f7c6a9854..42f6f80063 100644 --- a/bin/named/named.conf.rst +++ b/bin/named/named.conf.rst @@ -401,6 +401,7 @@ OPTIONS stale-answer-enable boolean; stale-answer-ttl duration; stale-cache-enable boolean; + stale-refresh-time duration; startup-notify-rate integer; statistics-file quoted_string; synth-from-dnssec boolean; @@ -797,6 +798,7 @@ VIEW stale-answer-enable boolean; stale-answer-ttl duration; stale-cache-enable boolean; + stale-refresh-time duration; synth-from-dnssec boolean; transfer-format ( many-answers | one-answer ); transfer-source ( ipv4_address | * ) [ port ( integer | * ) ] [ diff --git a/doc/arm/reference.rst b/doc/arm/reference.rst index 47485165fb..15ad929a90 100644 --- a/doc/arm/reference.rst +++ b/doc/arm/reference.rst @@ -1839,6 +1839,19 @@ Boolean Options ``stale-cache-enable`` If ``yes``, enable the retaining of "stale" cached answers. Default ``no``. +``stale-refresh-time`` + If the name servers for a given zone are not answering, this sets the time + window for which ``named`` will promptly return "stale" cached answers for + that RRSet being requested before a new attempt in contacting the servers + is made. For convenience, TTL-style time-unit suffixes may be used to + specify the value. It also accepts ISO 8601 duration formats. + + The default ``stale-refresh-time`` is 30 seconds, as RFC 8767 recommends + that attempts to refresh to be done no more frequently than every 30 + seconds. A value of zero disables the feature, meaning that normal + resolution will take place first, if that fails only then ``named`` will + return "stale" cached answers. + ``nocookie-udp-size`` This sets the maximum size of UDP responses that are sent to queries without a valid server COOKIE. A value below 128 is silently diff --git a/doc/man/named.conf.5in b/doc/man/named.conf.5in index 36a770977b..356a0d122b 100644 --- a/doc/man/named.conf.5in +++ b/doc/man/named.conf.5in @@ -464,6 +464,7 @@ options { stale\-answer\-enable boolean; stale\-answer\-ttl duration; stale\-cache\-enable boolean; + stale\-refresh\-time duration; startup\-notify\-rate integer; statistics\-file quoted_string; synth\-from\-dnssec boolean; @@ -892,6 +893,7 @@ view string [ class ] { stale\-answer\-enable boolean; stale\-answer\-ttl duration; stale\-cache\-enable boolean; + stale\-refresh\-time duration; synth\-from\-dnssec boolean; transfer\-format ( many\-answers | one\-answer ); transfer\-source ( ipv4_address | * ) [ port ( integer | * ) ] [ diff --git a/doc/misc/options b/doc/misc/options index 620407c12f..b49cf12797 100644 --- a/doc/misc/options +++ b/doc/misc/options @@ -366,6 +366,7 @@ options { stale-answer-enable ; stale-answer-ttl ; stale-cache-enable ; + stale-refresh-time ; startup-notify-rate ; statistics-file ; statistics-interval ; // ancient @@ -758,6 +759,7 @@ view [ ] { stale-answer-enable ; stale-answer-ttl ; stale-cache-enable ; + stale-refresh-time ; suppress-initial-notify ; // not yet implemented synth-from-dnssec ; topology { ; ... }; // ancient diff --git a/doc/misc/options.active b/doc/misc/options.active index 99adabde9c..da43db123a 100644 --- a/doc/misc/options.active +++ b/doc/misc/options.active @@ -329,6 +329,7 @@ options { stale-answer-enable ; stale-answer-ttl ; stale-cache-enable ; + stale-refresh-time ; startup-notify-rate ; statistics-file ; synth-from-dnssec ; @@ -686,6 +687,7 @@ view [ ] { stale-answer-enable ; stale-answer-ttl ; stale-cache-enable ; + stale-refresh-time ; synth-from-dnssec ; transfer-format ( many-answers | one-answer ); transfer-source ( | * ) [ port ( | * ) ] [ diff --git a/doc/misc/options.grammar.rst b/doc/misc/options.grammar.rst index 1725a6e458..ace29b5704 100644 --- a/doc/misc/options.grammar.rst +++ b/doc/misc/options.grammar.rst @@ -259,6 +259,7 @@ stale-answer-enable ; stale-answer-ttl ; stale-cache-enable ; + stale-refresh-time ; startup-notify-rate ; statistics-file ; synth-from-dnssec ;