Merge branch '3950-serve-stale-strikes-again-9.18' into 'bind-9.18'

[9.18] Fix serve-stale bug when cache has no data

See merge request isc-projects/bind9!7909
This commit is contained in:
Matthijs Mekking 2023-05-30 13:15:13 +00:00
commit 076b8363fc
7 changed files with 126 additions and 14 deletions

View file

@ -1,3 +1,6 @@
6183. [bug] Fix a serve-stale bug where a delegation from cache
could be returned to the client. [GL #3950]
6182. [cleanup] Remove configure checks for epoll, kqueue and
/dev/poll. [GL #4098]

View file

@ -49,6 +49,16 @@ my $udpsock = IO::Socket::INET->new(LocalAddr => "$localaddr",
my $SOA = "example 300 IN SOA . . 0 0 0 0 300";
my $NS = "example 300 IN NS ns.example";
my $A = "ns.example 300 IN A $localaddr";
#
# Slow delegation
#
my $slowSOA = "slow 300 IN SOA . . 0 0 0 0 300";
my $slowNS = "slow 300 IN NS ns.slow";
my $slowA = "ns.slow 300 IN A $localaddr";
my $slowTXT = "data.slow 2 IN TXT \"A slow text record with a 2 second ttl\"";
my $slownegSOA = "slow 2 IN SOA . . 0 0 0 0 300";
#
# Records to be TTL stretched
#
@ -218,6 +228,44 @@ sub reply_handler {
push @auth, $rr;
}
$rcode = "NOERROR";
} elsif ($qname eq "ns.slow" ) {
if ($qtype eq "A") {
my $rr = new Net::DNS::RR($slowA);
push @ans, $rr;
} else {
my $rr = new Net::DNS::RR($slowSOA);
push @auth, $rr;
}
$rcode = "NOERROR";
} elsif ($qname eq "slow") {
if ($qtype eq "NS") {
my $rr = new Net::DNS::RR($slowNS);
push @auth, $rr;
$rr = new Net::DNS::RR($slowA);
push @add, $rr;
} elsif ($qtype eq "SOA") {
my $rr = new Net::DNS::RR($slowSOA);
push @ans, $rr;
} else {
my $rr = new Net::DNS::RR($slowSOA);
push @auth, $rr;
}
$rcode = "NOERROR";
} elsif ($qname eq "data.slow") {
if ($slow_response) {
print " Sleeping 3 seconds\n";
sleep(3);
# only one time
$slow_response = 0;
}
if ($qtype eq "TXT") {
my $rr = new Net::DNS::RR($slowTXT);
push @ans, $rr;
} else {
my $rr = new Net::DNS::RR($slownegSOA);
push @auth, $rr;
}
$rcode = "NOERROR";
} else {
my $rr = new Net::DNS::RR($SOA);
push @auth, $rr;

View file

@ -14,3 +14,5 @@
ns.nil. 300 A 10.53.0.1
example. 300 NS ns.example.
ns.example. 300 A 10.53.0.2
slow. 300 NS ns.slow.
ns.slow. 300 A 10.53.0.2

View file

@ -42,10 +42,10 @@ options {
recursive-clients 10; # CVE-2022-3924
max-stale-ttl 3600;
resolver-query-timeout 30000; # 30 seconds
qname-minimization disabled;
};
zone "." {
type secondary;
primaries { 10.53.0.1; };
file "root.bk";
type hint;
file "root.db";
};

View file

@ -1693,6 +1693,24 @@ grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status+ret))
n=$((n+1))
echo_i "delay responses from authoritative server ($n)"
ret=0
$DIG -p ${PORT} @10.53.0.2 txt slowdown > dig.out.test$n
grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1
grep "TXT.\"1\"" dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status+ret))
n=$((n+1))
echo_i "prime cache data.slow TXT (stale-answer-client-timeout) ($n)"
ret=0
$DIG -p ${PORT} @10.53.0.3 data.slow TXT > dig.out.test$n
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status+ret))
n=$((n+1))
echo_i "disable responses from authoritative server ($n)"
ret=0
@ -1707,10 +1725,11 @@ sleep 2
nextpart ns3/named.run > /dev/null
echo_i "sending queries for tests $((n+1))-$((n+2))..."
echo_i "sending queries for tests $((n+1))-$((n+3))..."
t1=`$PERL -e 'print time()'`
$DIG -p ${PORT} +tries=1 +timeout=11 @10.53.0.3 data.example TXT > dig.out.test$((n+1)) &
$DIG -p ${PORT} +tries=1 +timeout=11 @10.53.0.3 nodata.example TXT > dig.out.test$((n+2)) &
$DIG -p ${PORT} +tries=1 +timeout=11 @10.53.0.3 data.slow TXT > dig.out.test$((n+3)) &
wait
t2=`$PERL -e 'print time()'`
@ -1741,6 +1760,16 @@ grep "example\..*3.*IN.*SOA" dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status+ret))
n=$((n+1))
echo_i "check stale data.slow TXT comes from cache (stale-answer-client-timeout 1.8) ($n)"
ret=0
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
grep "EDE: 3 (Stale Answer): (client timeout)" dig.out.test$n > /dev/null || ret=1
grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1
grep "data\.slow\..*3.*IN.*TXT.*A slow text record with a 2 second ttl" dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status+ret))
# Now query for RRset not in cache. The first query should time out, but once
# we enable the authoritative server, the second query should be able to get a
# response.

View file

@ -44,6 +44,10 @@ Bug Fixes
for HTTP is removed from the configuration. That has been fixed.
:gl:`#4071`
- It could happen that after the :any:`stale-answer-client-timeout` duration,
a delegation from cache was returned to the client. This has now been fixed.
:gl:`#3950`
Known Issues
~~~~~~~~~~~~

View file

@ -5852,6 +5852,27 @@ query_refresh_rrset(query_ctx_t *orig_qctx) {
qctx_destroy(&qctx);
}
/*%
* Depending on the db lookup result, we can respond to the
* client this stale answer.
*/
static bool
stale_client_answer(isc_result_t result) {
switch (result) {
case ISC_R_SUCCESS:
case DNS_R_EMPTYNAME:
case DNS_R_NXRRSET:
case DNS_R_NCACHENXRRSET:
case DNS_R_CNAME:
case DNS_R_DNAME:
return (true);
default:
return (false);
}
UNREACHABLE();
}
/*%
* Perform a local database lookup, in either an authoritative or
* cache database. If unable to answer, call ns_query_done(); otherwise
@ -5983,7 +6004,6 @@ query_lookup(query_ctx_t *qctx) {
{
/* Found non-stale usable rdataset. */
answer_found = true;
goto gotanswer;
}
if (dbfind_stale || stale_refresh_window || stale_timeout) {
@ -6019,7 +6039,7 @@ query_lookup(query_ctx_t *qctx) {
if (stale_found) {
ns_client_extendederror(qctx->client, ede,
"resolver failure");
} else {
} else if (!answer_found) {
/*
* Resolver failure, no stale data, nothing more we
* can do, return SERVFAIL.
@ -6042,7 +6062,7 @@ query_lookup(query_ctx_t *qctx) {
ns_client_extendederror(
qctx->client, ede,
"query within stale refresh time window");
} else {
} else if (!answer_found) {
/*
* During the stale refresh window explicitly do not try
* to refresh the data, because a recent lookup failed.
@ -6052,7 +6072,7 @@ query_lookup(query_ctx_t *qctx) {
}
} else if (stale_timeout) {
if ((qctx->options & DNS_GETDB_STALEFIRST) != 0) {
if (!stale_found) {
if (!stale_found && !answer_found) {
/*
* We have nothing useful in cache to return
* immediately.
@ -6069,7 +6089,7 @@ query_lookup(query_ctx_t *qctx) {
&qctx->client->query.fetch);
}
return (query_lookup(qctx));
} else {
} else if (stale_client_answer(result)) {
/*
* Immediately return the stale answer, start a
* resolver fetch to refresh the data in cache.
@ -6081,9 +6101,12 @@ query_lookup(query_ctx_t *qctx) {
"refresh the RRset will still be made",
namebuf);
qctx->refresh_rrset = STALE(qctx->rdataset);
ns_client_extendederror(
qctx->client, ede,
"stale data prioritized over lookup");
if (stale_found) {
ns_client_extendederror(
qctx->client, ede,
"stale data prioritized over "
"lookup");
}
}
} else {
/*
@ -6099,7 +6122,11 @@ query_lookup(query_ctx_t *qctx) {
if (stale_found) {
ns_client_extendederror(qctx->client, ede,
"client timeout");
} else {
} else if (!answer_found) {
return (result);
}
if (!stale_client_answer(result)) {
return (result);
}
@ -6112,7 +6139,6 @@ query_lookup(query_ctx_t *qctx) {
}
}
gotanswer:
if (stale_timeout && (answer_found || stale_found)) {
/*
* Mark RRsets that we are adding to the client message on a