diff --git a/CHANGES b/CHANGES index 44712112f0..63a2c54db1 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,7 @@ +5573. [func] Also return stale data if an error occurred and we are + not resuming. Only start the stale-refresh-time window + if we timed out. [GL #2434] + 5572. [bug] Address potential double free in generatexml. [GL #2420] diff --git a/bin/tests/system/serve-stale/ans2/ans.pl b/bin/tests/system/serve-stale/ans2/ans.pl index 8be8392f28..4bb9f896ff 100644 --- a/bin/tests/system/serve-stale/ans2/ans.pl +++ b/bin/tests/system/serve-stale/ans2/ans.pl @@ -145,7 +145,7 @@ sub reply_handler { $rcode = "NXDOMAIN"; } - # mark the answer as authoritative (by setting the 'aa' flag + # mark the answer as authoritative (by setting the 'aa' flag) return ($rcode, \@ans, \@auth, \@add, { aa => 1 }); } diff --git a/bin/tests/system/serve-stale/ns3/named6.conf.in b/bin/tests/system/serve-stale/ns3/named6.conf.in new file mode 100644 index 0000000000..1aea7c85c2 --- /dev/null +++ b/bin/tests/system/serve-stale/ns3/named6.conf.in @@ -0,0 +1,50 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + + +/* + * Test stale-answer-client-timeout 0. + */ + +key rndc_key { + secret "1234abcd8765"; + algorithm hmac-sha256; +}; + +controls { + inet 10.53.0.3 port @CONTROLPORT@ allow { any; } keys { rndc_key; }; +}; + +options { + query-source address 10.53.0.3; + notify-source 10.53.0.3; + transfer-source 10.53.0.3; + port @PORT@; + pid-file "named.pid"; + listen-on { 10.53.0.3; }; + listen-on-v6 { none; }; + dnssec-validation no; + recursion yes; + stale-answer-enable no; + stale-cache-enable yes; + stale-answer-ttl 3; + stale-answer-client-timeout disabled; + stale-refresh-time 4; + resolver-query-timeout 10; + fetches-per-zone 1 fail; + fetches-per-server 1 fail; + max-stale-ttl 3600; +}; + +zone "." { + type hint; + file "root.db"; +}; diff --git a/bin/tests/system/serve-stale/tests.sh b/bin/tests/system/serve-stale/tests.sh index 92263d86df..47a117308e 100755 --- a/bin/tests/system/serve-stale/tests.sh +++ b/bin/tests/system/serve-stale/tests.sh @@ -2010,5 +2010,104 @@ grep "data\.example\..*[12].*IN.*TXT.*A text record with a 2 second ttl" dig.out if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) +#################################################################### +# Test if fetch-limits quota is reached, stale data is served. # +#################################################################### +echo_i "test stale data with fetch-limits" + +n=$((n+1)) +echo_i "updating ns3/named.conf ($n)" +ret=0 +copy_setports ns3/named6.conf.in ns3/named.conf +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +n=$((n+1)) +echo_i "running 'rndc reload' ($n)" +ret=0 +rndc_reload ns3 10.53.0.3 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Disable responses from authoritative server. +n=$((n+1)) +echo_i "disable responses from authoritative server ($n)" +ret=0 +$DIG -p ${PORT} @10.53.0.2 txt disable > dig.out.test$n +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "TXT.\"0\"" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Hit the fetch-limits. +burst() { + num=${1} + rm -f burst.input.$$ + while [ $num -gt 0 ]; do + num=`expr $num - 1` + echo "${num}.data.example A" >> burst.input.$$ + done + $PERL ../ditch.pl -p ${PORT} -s 10.53.0.3 burst.input.$$ + rm -f burst.input.$$ +} + +wait_for_fetchlimits() { + burst 20 + $DIG -p ${PORT} @10.53.0.3 data.example A > dig.out.test$n + grep "status: SERVFAIL" dig.out.test$n > /dev/null || return 1 +} + +n=$((n+1)) +echo_i "hit fetch limits ($n)" +ret=0 +retry_quiet 10 wait_for_fetchlimits || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Allow RRset to become stale. +sleep 2 + +# Turn on serve-stale. +n=$((n+1)) +echo_i "running 'rndc serve-stale on' ($n)" +ret=0 +$RNDCCMD 10.53.0.3 serve-stale on || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +n=$((n+1)) +echo_i "check 'rndc serve-stale status' ($n)" +ret=0 +$RNDCCMD 10.53.0.3 serve-stale status > rndc.out.test$n 2>&1 || ret=1 +grep '_default: on (rndc) (stale-answer-ttl=3 max-stale-ttl=3600 stale-refresh-time=4)' rndc.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Expect stale data now. +n=$((n+1)) +ret=0 +echo_i "check stale data.example comes from cache (fetch-limits) ($n)" +nextpart ns3/named.run > /dev/null +$DIG -p ${PORT} @10.53.0.3 data.example TXT > dig.out.test$n +wait_for_log 5 "data.example resolver failure, stale answer used" ns3/named.run || ret=1 +grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "data\.example\..*3.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# The previous query should not have started the stale-refresh-time window. +n=$((n+1)) +ret=0 +echo_i "check stale data.example comes from cache again (fetch-limits) ($n)" +nextpart ns3/named.run > /dev/null +$DIG -p ${PORT} @10.53.0.3 data.example TXT > dig.out.test$n +wait_for_log 5 "data.example resolver failure, stale answer used" ns3/named.run || ret=1 +grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "data\.example\..*3.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + echo_i "exit status: $status" [ $status -eq 0 ] || exit 1 diff --git a/doc/notes/notes-current.rst b/doc/notes/notes-current.rst index b459256ff6..600911bd2a 100644 --- a/doc/notes/notes-current.rst +++ b/doc/notes/notes-current.rst @@ -24,9 +24,7 @@ Known Issues New Features ~~~~~~~~~~~~ -- None. - -- A new option, ```stale-answer-client-timeout``, has been added to +- A new option, ``stale-answer-client-timeout``, has been added to improve ``named``'s behavior with respect to serving stale data. The option defines the amount of time ``named`` waits before attempting to answer the query with a stale RRset from cache. If a stale answer @@ -41,7 +39,13 @@ New Features The option can be disabled by setting the value to ``off`` or ``disabled``. It also has no effect if ``stale-answer-enable`` is - disabled. + disabled. [GL #2247] + +- Also return stale data if an error occurred and we are not resuming a + query (and serve-stale is enabled). This may happen for example if + ``fetches-per-server`` or ``fetches-per-zone` limits are reached. In this + case, we will try to answer DNS requests with stale data, but not start + the ``stale-refresh-time`` window. [GL #2434] Removed Features ~~~~~~~~~~~~~~~~ diff --git a/lib/dns/include/dns/db.h b/lib/dns/include/dns/db.h index 1e01c43dfd..f77cc797d4 100644 --- a/lib/dns/include/dns/db.h +++ b/lib/dns/include/dns/db.h @@ -241,33 +241,36 @@ struct dns_dbonupdatelistener { #define DNS_DBFIND_NOZONECUT 0x0200 /* - * DNS_DBFIND_STALEOK: This flag is set when BIND fails to refresh a - * RRset due to timeout (resolver-query-timeout), its intent is to - * try to look for stale data in cache as a fallback, but only if - * stale answers are enabled in configuration. - * - * This flag is also used to activate stale-refresh-time window, since it - * is the only way the database knows that a resolution has failed. + * DNS_DBFIND_STALEOK: This flag is set when BIND fails to refresh a RRset due + * to timeout (resolver-query-timeout). Its intent is to try to look for stale + * data in cache as a fallback, but only if stale answers are enabled in + * configuration. */ #define DNS_DBFIND_STALEOK 0x0400 /* - * DNS_DBFIND_STALEENABLED: This flag is used as a hint to the database - * that it may use stale data. It is always set during query lookup if - * stale answers are enabled, but only effectively used during - * stale-refresh-time window. Also during this window, the resolver will - * not try to resolve the query, in other words no attempt to refresh the - * data in cache is made when the stale-refresh-time window is active. + * DNS_DBFIND_STALEENABLED: This flag is used as a hint to the database that + * it may use stale data. It is always set during query lookup if stale + * answers are enabled, but only effectively used during stale-refresh-time + * window. Also during this window, the resolver will not try to resolve the + * query, in other words no attempt to refresh the data in cache is made when + * the stale-refresh-time window is active. */ #define DNS_DBFIND_STALEENABLED 0x0800 /* - * DNS_DBFIND_STALEONLY: This new introduced flag is used when we want - * stale data from the database, but not due to a failure in resolution, - * it also doesn't require stale-refresh-time window timer to be active. - * As long as there is a stale RRset available, it should be returned. + * DNS_DBFIND_STALEONLY: This flag is used when we want stale data from the + * database, but not due to a failure in resolution, it also doesn't require + * stale-refresh-time window timer to be active. As long as there is a stale + * RRset available, it should be returned. */ #define DNS_DBFIND_STALEONLY 0x1000 + +/* + * DNS_DBFIND_STALESTART: This flag is used to activate stale-refresh-time + * window. + */ +#define DNS_DBFIND_STALESTART 0x2000 /*@}*/ /*@{*/ diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index 13be34268f..83f6e30c7c 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -4565,11 +4565,12 @@ check_stale_header(dns_rbtnode_t *node, rdatasetheader_t *header, mark_header_stale(search->rbtdb, header); *header_prev = header; /* - * If DNS_DBFIND_STALEOK is set then it means we failed - * to resolve the name during recursion, in this case we - * mark the time in which the refresh failed. + * If DNS_DBFIND_STALESTART is set then it means we + * failed to resolve the name during recursion, in + * this case we mark the time in which the refresh + * failed. */ - if ((search->options & DNS_DBFIND_STALEOK) != 0) { + if ((search->options & DNS_DBFIND_STALESTART) != 0) { header->last_refresh_fail_ts = search->now; } else if ((search->options & DNS_DBFIND_STALEENABLED) != 0 && diff --git a/lib/ns/query.c b/lib/ns/query.c index f413ddee19..e7a666ef2a 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -7441,6 +7441,14 @@ root_key_sentinel_return_servfail(query_ctx_t *qctx, isc_result_t result) { */ static bool query_usestale(query_ctx_t *qctx) { + if ((qctx->client->query.dboptions & DNS_DBFIND_STALEOK) != 0) { + /* + * Query was already using stale, if that didn't work the + * last time, it won't work this time either. + */ + return (false); + } + qctx_clean(qctx); qctx_freedata(qctx); @@ -7548,11 +7556,19 @@ query_gotanswer(query_ctx_t *qctx, isc_result_t res) { "query_gotanswer: unexpected error: %s", isc_result_totext(result)); CCTRACE(ISC_LOG_ERROR, errmsg); - if (qctx->resuming && query_usestale(qctx)) { + if (query_usestale(qctx)) { /* * If serve-stale is enabled, query_usestale() already * set up 'qctx' for looking up a stale response. + * + * We only need to check if the query timed out or + * something else has gone wrong. If the query timed + * out, we will start the stale-refresh-time window. */ + if (qctx->resuming && result == ISC_R_TIMEDOUT) { + qctx->client->query.dboptions |= + DNS_DBFIND_STALESTART; + } return (query_lookup(qctx)); }