mirror of
https://github.com/isc-projects/bind9.git
synced 2026-06-09 00:53:50 -04:00
Merge branch '2434-fetch-limit-serve-stale' into 'main'
Resolve "Serve stale when fetch limits are hit" Closes #2434 See merge request isc-projects/bind9!4607
This commit is contained in:
commit
0fab6cf88e
8 changed files with 204 additions and 27 deletions
4
CHANGES
4
CHANGES
|
|
@ -1,3 +1,7 @@
|
|||
5573. [func] Also return stale data if an error occurred and we are
|
||||
not resuming. Only start the stale-refresh-time window
|
||||
if we timed out. [GL #2434]
|
||||
|
||||
5572. [bug] Address potential double free in generatexml.
|
||||
[GL #2420]
|
||||
|
||||
|
|
|
|||
|
|
@ -145,7 +145,7 @@ sub reply_handler {
|
|||
$rcode = "NXDOMAIN";
|
||||
}
|
||||
|
||||
# mark the answer as authoritative (by setting the 'aa' flag
|
||||
# mark the answer as authoritative (by setting the 'aa' flag)
|
||||
return ($rcode, \@ans, \@auth, \@add, { aa => 1 });
|
||||
}
|
||||
|
||||
|
|
|
|||
50
bin/tests/system/serve-stale/ns3/named6.conf.in
Normal file
50
bin/tests/system/serve-stale/ns3/named6.conf.in
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
|
||||
*
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
*
|
||||
* See the COPYRIGHT file distributed with this work for additional
|
||||
* information regarding copyright ownership.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* Test stale-answer-client-timeout 0.
|
||||
*/
|
||||
|
||||
key rndc_key {
|
||||
secret "1234abcd8765";
|
||||
algorithm hmac-sha256;
|
||||
};
|
||||
|
||||
controls {
|
||||
inet 10.53.0.3 port @CONTROLPORT@ allow { any; } keys { rndc_key; };
|
||||
};
|
||||
|
||||
options {
|
||||
query-source address 10.53.0.3;
|
||||
notify-source 10.53.0.3;
|
||||
transfer-source 10.53.0.3;
|
||||
port @PORT@;
|
||||
pid-file "named.pid";
|
||||
listen-on { 10.53.0.3; };
|
||||
listen-on-v6 { none; };
|
||||
dnssec-validation no;
|
||||
recursion yes;
|
||||
stale-answer-enable no;
|
||||
stale-cache-enable yes;
|
||||
stale-answer-ttl 3;
|
||||
stale-answer-client-timeout disabled;
|
||||
stale-refresh-time 4;
|
||||
resolver-query-timeout 10;
|
||||
fetches-per-zone 1 fail;
|
||||
fetches-per-server 1 fail;
|
||||
max-stale-ttl 3600;
|
||||
};
|
||||
|
||||
zone "." {
|
||||
type hint;
|
||||
file "root.db";
|
||||
};
|
||||
|
|
@ -2010,5 +2010,104 @@ grep "data\.example\..*[12].*IN.*TXT.*A text record with a 2 second ttl" dig.out
|
|||
if [ $ret != 0 ]; then echo_i "failed"; fi
|
||||
status=$((status+ret))
|
||||
|
||||
####################################################################
|
||||
# Test if fetch-limits quota is reached, stale data is served. #
|
||||
####################################################################
|
||||
echo_i "test stale data with fetch-limits"
|
||||
|
||||
n=$((n+1))
|
||||
echo_i "updating ns3/named.conf ($n)"
|
||||
ret=0
|
||||
copy_setports ns3/named6.conf.in ns3/named.conf
|
||||
if [ $ret != 0 ]; then echo_i "failed"; fi
|
||||
status=$((status+ret))
|
||||
|
||||
n=$((n+1))
|
||||
echo_i "running 'rndc reload' ($n)"
|
||||
ret=0
|
||||
rndc_reload ns3 10.53.0.3
|
||||
if [ $ret != 0 ]; then echo_i "failed"; fi
|
||||
status=$((status+ret))
|
||||
|
||||
# Disable responses from authoritative server.
|
||||
n=$((n+1))
|
||||
echo_i "disable responses from authoritative server ($n)"
|
||||
ret=0
|
||||
$DIG -p ${PORT} @10.53.0.2 txt disable > dig.out.test$n
|
||||
grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1
|
||||
grep "TXT.\"0\"" dig.out.test$n > /dev/null || ret=1
|
||||
if [ $ret != 0 ]; then echo_i "failed"; fi
|
||||
status=$((status+ret))
|
||||
|
||||
# Hit the fetch-limits.
|
||||
burst() {
|
||||
num=${1}
|
||||
rm -f burst.input.$$
|
||||
while [ $num -gt 0 ]; do
|
||||
num=`expr $num - 1`
|
||||
echo "${num}.data.example A" >> burst.input.$$
|
||||
done
|
||||
$PERL ../ditch.pl -p ${PORT} -s 10.53.0.3 burst.input.$$
|
||||
rm -f burst.input.$$
|
||||
}
|
||||
|
||||
wait_for_fetchlimits() {
|
||||
burst 20
|
||||
$DIG -p ${PORT} @10.53.0.3 data.example A > dig.out.test$n
|
||||
grep "status: SERVFAIL" dig.out.test$n > /dev/null || return 1
|
||||
}
|
||||
|
||||
n=$((n+1))
|
||||
echo_i "hit fetch limits ($n)"
|
||||
ret=0
|
||||
retry_quiet 10 wait_for_fetchlimits || ret=1
|
||||
if [ $ret != 0 ]; then echo_i "failed"; fi
|
||||
status=$((status+ret))
|
||||
|
||||
# Allow RRset to become stale.
|
||||
sleep 2
|
||||
|
||||
# Turn on serve-stale.
|
||||
n=$((n+1))
|
||||
echo_i "running 'rndc serve-stale on' ($n)"
|
||||
ret=0
|
||||
$RNDCCMD 10.53.0.3 serve-stale on || ret=1
|
||||
if [ $ret != 0 ]; then echo_i "failed"; fi
|
||||
status=$((status+ret))
|
||||
|
||||
n=$((n+1))
|
||||
echo_i "check 'rndc serve-stale status' ($n)"
|
||||
ret=0
|
||||
$RNDCCMD 10.53.0.3 serve-stale status > rndc.out.test$n 2>&1 || ret=1
|
||||
grep '_default: on (rndc) (stale-answer-ttl=3 max-stale-ttl=3600 stale-refresh-time=4)' rndc.out.test$n > /dev/null || ret=1
|
||||
if [ $ret != 0 ]; then echo_i "failed"; fi
|
||||
status=$((status+ret))
|
||||
|
||||
# Expect stale data now.
|
||||
n=$((n+1))
|
||||
ret=0
|
||||
echo_i "check stale data.example comes from cache (fetch-limits) ($n)"
|
||||
nextpart ns3/named.run > /dev/null
|
||||
$DIG -p ${PORT} @10.53.0.3 data.example TXT > dig.out.test$n
|
||||
wait_for_log 5 "data.example resolver failure, stale answer used" ns3/named.run || ret=1
|
||||
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
|
||||
grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1
|
||||
grep "data\.example\..*3.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1
|
||||
if [ $ret != 0 ]; then echo_i "failed"; fi
|
||||
status=$((status+ret))
|
||||
|
||||
# The previous query should not have started the stale-refresh-time window.
|
||||
n=$((n+1))
|
||||
ret=0
|
||||
echo_i "check stale data.example comes from cache again (fetch-limits) ($n)"
|
||||
nextpart ns3/named.run > /dev/null
|
||||
$DIG -p ${PORT} @10.53.0.3 data.example TXT > dig.out.test$n
|
||||
wait_for_log 5 "data.example resolver failure, stale answer used" ns3/named.run || ret=1
|
||||
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
|
||||
grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1
|
||||
grep "data\.example\..*3.*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1
|
||||
if [ $ret != 0 ]; then echo_i "failed"; fi
|
||||
status=$((status+ret))
|
||||
|
||||
echo_i "exit status: $status"
|
||||
[ $status -eq 0 ] || exit 1
|
||||
|
|
|
|||
|
|
@ -24,9 +24,7 @@ Known Issues
|
|||
New Features
|
||||
~~~~~~~~~~~~
|
||||
|
||||
- None.
|
||||
|
||||
- A new option, ```stale-answer-client-timeout``, has been added to
|
||||
- A new option, ``stale-answer-client-timeout``, has been added to
|
||||
improve ``named``'s behavior with respect to serving stale data. The option
|
||||
defines the amount of time ``named`` waits before attempting
|
||||
to answer the query with a stale RRset from cache. If a stale answer
|
||||
|
|
@ -41,7 +39,13 @@ New Features
|
|||
|
||||
The option can be disabled by setting the value to ``off`` or
|
||||
``disabled``. It also has no effect if ``stale-answer-enable`` is
|
||||
disabled.
|
||||
disabled. [GL #2247]
|
||||
|
||||
- Also return stale data if an error occurred and we are not resuming a
|
||||
query (and serve-stale is enabled). This may happen for example if
|
||||
``fetches-per-server`` or ``fetches-per-zone` limits are reached. In this
|
||||
case, we will try to answer DNS requests with stale data, but not start
|
||||
the ``stale-refresh-time`` window. [GL #2434]
|
||||
|
||||
Removed Features
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
|
|
|||
|
|
@ -241,33 +241,36 @@ struct dns_dbonupdatelistener {
|
|||
#define DNS_DBFIND_NOZONECUT 0x0200
|
||||
|
||||
/*
|
||||
* DNS_DBFIND_STALEOK: This flag is set when BIND fails to refresh a
|
||||
* RRset due to timeout (resolver-query-timeout), its intent is to
|
||||
* try to look for stale data in cache as a fallback, but only if
|
||||
* stale answers are enabled in configuration.
|
||||
*
|
||||
* This flag is also used to activate stale-refresh-time window, since it
|
||||
* is the only way the database knows that a resolution has failed.
|
||||
* DNS_DBFIND_STALEOK: This flag is set when BIND fails to refresh a RRset due
|
||||
* to timeout (resolver-query-timeout). Its intent is to try to look for stale
|
||||
* data in cache as a fallback, but only if stale answers are enabled in
|
||||
* configuration.
|
||||
*/
|
||||
#define DNS_DBFIND_STALEOK 0x0400
|
||||
|
||||
/*
|
||||
* DNS_DBFIND_STALEENABLED: This flag is used as a hint to the database
|
||||
* that it may use stale data. It is always set during query lookup if
|
||||
* stale answers are enabled, but only effectively used during
|
||||
* stale-refresh-time window. Also during this window, the resolver will
|
||||
* not try to resolve the query, in other words no attempt to refresh the
|
||||
* data in cache is made when the stale-refresh-time window is active.
|
||||
* DNS_DBFIND_STALEENABLED: This flag is used as a hint to the database that
|
||||
* it may use stale data. It is always set during query lookup if stale
|
||||
* answers are enabled, but only effectively used during stale-refresh-time
|
||||
* window. Also during this window, the resolver will not try to resolve the
|
||||
* query, in other words no attempt to refresh the data in cache is made when
|
||||
* the stale-refresh-time window is active.
|
||||
*/
|
||||
#define DNS_DBFIND_STALEENABLED 0x0800
|
||||
|
||||
/*
|
||||
* DNS_DBFIND_STALEONLY: This new introduced flag is used when we want
|
||||
* stale data from the database, but not due to a failure in resolution,
|
||||
* it also doesn't require stale-refresh-time window timer to be active.
|
||||
* As long as there is a stale RRset available, it should be returned.
|
||||
* DNS_DBFIND_STALEONLY: This flag is used when we want stale data from the
|
||||
* database, but not due to a failure in resolution, it also doesn't require
|
||||
* stale-refresh-time window timer to be active. As long as there is a stale
|
||||
* RRset available, it should be returned.
|
||||
*/
|
||||
#define DNS_DBFIND_STALEONLY 0x1000
|
||||
|
||||
/*
|
||||
* DNS_DBFIND_STALESTART: This flag is used to activate stale-refresh-time
|
||||
* window.
|
||||
*/
|
||||
#define DNS_DBFIND_STALESTART 0x2000
|
||||
/*@}*/
|
||||
|
||||
/*@{*/
|
||||
|
|
|
|||
|
|
@ -4565,11 +4565,12 @@ check_stale_header(dns_rbtnode_t *node, rdatasetheader_t *header,
|
|||
mark_header_stale(search->rbtdb, header);
|
||||
*header_prev = header;
|
||||
/*
|
||||
* If DNS_DBFIND_STALEOK is set then it means we failed
|
||||
* to resolve the name during recursion, in this case we
|
||||
* mark the time in which the refresh failed.
|
||||
* If DNS_DBFIND_STALESTART is set then it means we
|
||||
* failed to resolve the name during recursion, in
|
||||
* this case we mark the time in which the refresh
|
||||
* failed.
|
||||
*/
|
||||
if ((search->options & DNS_DBFIND_STALEOK) != 0) {
|
||||
if ((search->options & DNS_DBFIND_STALESTART) != 0) {
|
||||
header->last_refresh_fail_ts = search->now;
|
||||
} else if ((search->options &
|
||||
DNS_DBFIND_STALEENABLED) != 0 &&
|
||||
|
|
|
|||
|
|
@ -7441,6 +7441,14 @@ root_key_sentinel_return_servfail(query_ctx_t *qctx, isc_result_t result) {
|
|||
*/
|
||||
static bool
|
||||
query_usestale(query_ctx_t *qctx) {
|
||||
if ((qctx->client->query.dboptions & DNS_DBFIND_STALEOK) != 0) {
|
||||
/*
|
||||
* Query was already using stale, if that didn't work the
|
||||
* last time, it won't work this time either.
|
||||
*/
|
||||
return (false);
|
||||
}
|
||||
|
||||
qctx_clean(qctx);
|
||||
qctx_freedata(qctx);
|
||||
|
||||
|
|
@ -7548,11 +7556,19 @@ query_gotanswer(query_ctx_t *qctx, isc_result_t res) {
|
|||
"query_gotanswer: unexpected error: %s",
|
||||
isc_result_totext(result));
|
||||
CCTRACE(ISC_LOG_ERROR, errmsg);
|
||||
if (qctx->resuming && query_usestale(qctx)) {
|
||||
if (query_usestale(qctx)) {
|
||||
/*
|
||||
* If serve-stale is enabled, query_usestale() already
|
||||
* set up 'qctx' for looking up a stale response.
|
||||
*
|
||||
* We only need to check if the query timed out or
|
||||
* something else has gone wrong. If the query timed
|
||||
* out, we will start the stale-refresh-time window.
|
||||
*/
|
||||
if (qctx->resuming && result == ISC_R_TIMEDOUT) {
|
||||
qctx->client->query.dboptions |=
|
||||
DNS_DBFIND_STALESTART;
|
||||
}
|
||||
return (query_lookup(qctx));
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue