From d54e7052abe0a7448e803bd0b7abb93ffff6fb0c Mon Sep 17 00:00:00 2001 From: Aram Sargsyan Date: Thu, 17 Nov 2022 13:48:36 +0000 Subject: [PATCH 1/4] Add serve-stale CNAME check with stale-answer-client-timeout off Prime the cache with the following records: shortttl.cname.example. 1 IN CNAME longttl.target.example. longttl.target.example. 600 IN A 10.53.0.2 Wait for the CNAME record to expire, disable the authoritative server, and query 'shortttl.cname.example' again, expecting a stale answer. (cherry picked from commit 537187bf2f1a1c402d862bbb105da5947b18d217) --- bin/tests/system/serve-stale/ans2/ans.pl | 24 ++++++++ bin/tests/system/serve-stale/tests.sh | 74 ++++++++++++++++++++++++ 2 files changed, 98 insertions(+) diff --git a/bin/tests/system/serve-stale/ans2/ans.pl b/bin/tests/system/serve-stale/ans2/ans.pl index 0af055a4fb..f14ae67fe8 100644 --- a/bin/tests/system/serve-stale/ans2/ans.pl +++ b/bin/tests/system/serve-stale/ans2/ans.pl @@ -58,6 +58,8 @@ my $CAA = "othertype.example 2 IN CAA 0 issue \"ca1.example.net\""; my $negSOA = "example 2 IN SOA . . 0 0 0 0 300"; my $CNAME = "cname.example 7 IN CNAME target.example"; my $TARGET = "target.example 9 IN A $localaddr"; +my $SHORTCNAME = "shortttl.cname.example 1 IN CNAME longttl.target.example"; +my $LONGTARGET = "longttl.target.example 600 IN A $localaddr"; sub reply_handler { my ($qname, $qclass, $qtype) = @_; @@ -166,6 +168,28 @@ sub reply_handler { push @auth, $rr; } $rcode = "NOERROR"; + } elsif ($qname eq "shortttl.cname.example") { + if ($qtype eq "A") { + my $rr = new Net::DNS::RR($SHORTCNAME); + push @ans, $rr; + } else { + my $rr = new Net::DNS::RR($negSOA); + push @auth, $rr; + } + $rcode = "NOERROR"; + } elsif ($qname eq "longttl.target.example") { + if ($slow_response) { + print " Sleeping 3 seconds\n"; + sleep(3); + } + if ($qtype eq "A") { + my $rr = new Net::DNS::RR($LONGTARGET); + push @ans, $rr; + } else { + my $rr = new Net::DNS::RR($negSOA); + push @auth, $rr; + } + $rcode = "NOERROR"; } elsif ($qname eq "longttl.example") { if ($qtype eq "TXT") { my $rr = new Net::DNS::RR($LONGTXT); diff --git a/bin/tests/system/serve-stale/tests.sh b/bin/tests/system/serve-stale/tests.sh index d64452bb93..067c1bdaa9 100755 --- a/bin/tests/system/serve-stale/tests.sh +++ b/bin/tests/system/serve-stale/tests.sh @@ -1829,6 +1829,80 @@ grep "data\.example\..*[12].*IN.*TXT.*A text record with a 2 second ttl" dig.out if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) +############################################################## +# Test for stale-answer-client-timeout off and CNAME record. # +############################################################## +echo_i "test stale-answer-client-timeout (0) and CNAME record" + +n=$((n+1)) +echo_i "prime cache shortttl.cname.example (stale-answer-client-timeout off) ($n)" +ret=0 +$DIG -p ${PORT} @10.53.0.3 shortttl.cname.example A > dig.out.test$n +grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 +grep "ANSWER: 2," dig.out.test$n > /dev/null || ret=1 +grep "shortttl\.cname\.example\..*1.*IN.*CNAME.*longttl\.target\.example\." dig.out.test$n > /dev/null || ret=1 +grep "longttl\.target\.example\..*600.*IN.*A.*10\.53\.0\.2" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +# Allow RRset to become stale. +sleep 1 + +n=$((n+1)) +echo_i "disable responses from authoritative server ($n)" +ret=0 +$DIG -p ${PORT} @10.53.0.2 txt disable > dig.out.test$n +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "TXT.\"0\"" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +n=$((n+1)) +ret=0 +echo_i "check stale shortttl.cname.example comes from cache (stale-answer-client-timeout off) ($n)" +nextpart ns3/named.run > /dev/null +$DIG -p ${PORT} @10.53.0.3 shortttl.cname.example A > dig.out.test$n +wait_for_log 5 "shortttl.cname.example resolver failure, stale answer used" ns3/named.run || ret=1 +grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 +grep "EDE: 3 (Stale Answer): (resolver failure)" dig.out.test$n > /dev/null || ret=1 +grep "ANSWER: 2," dig.out.test$n > /dev/null || ret=1 +grep "shortttl\.cname\.example\..*3.*IN.*CNAME.*longttl\.target\.example\." dig.out.test$n > /dev/null || ret=1 +# We can't reliably test the TTL of the longttl.target.example A record. +grep "longttl\.target\.example\..*IN.*A.*10\.53\.0\.2" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +n=$((n+1)) +echo_i "enable responses from authoritative server ($n)" +ret=0 +$DIG -p ${PORT} @10.53.0.2 txt enable > dig.out.test$n +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "TXT.\"1\"" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + +n=$((n+1)) +echo_i "check server is alive or restart ($n)" +ret=0 +$RNDCCMD 10.53.0.3 status > rndc.out.test$n 2>&1 || ret=1 +if [ $ret != 0 ]; then + echo_i "failed" + echo_i "restart ns3" + start_server --noclean --restart --port ${PORT} serve-stale ns3 +fi +status=$((status+ret)) + +n=$((n+1)) +echo_i "check server is alive or restart ($n)" +ret=0 +$RNDCCMD 10.53.0.3 status > rndc.out.test$n 2>&1 || ret=1 +if [ $ret != 0 ]; then + echo_i "failed" + echo_i "restart ns3" + start_server --noclean --restart --port ${PORT} serve-stale ns3 +fi +status=$((status+ret)) + ############################################# # Test for stale-answer-client-timeout 0. # ############################################# From 271bc20b1c0ded79348625c0b216a03c932ed4d1 Mon Sep 17 00:00:00 2001 From: Matthijs Mekking Date: Thu, 17 Nov 2022 13:52:26 +0000 Subject: [PATCH 2/4] Consider non-stale data when in serve-stale mode With 'stale-answer-enable yes;' and 'stale-answer-client-timeout off;', consider the following situation: A CNAME record and its target record are in the cache, then the CNAME record expires, but the target record is still valid. When a new query for the CNAME record arrives, and the query fails, the stale record is used, and then the query "restarts" to follow the CNAME target. The problem is that the query's multiple stale options (like DNS_DBFIND_STALEOK) are not reset, so 'query_lookup()' treats the restarted query as a lookup following a failed lookup, and returns a SERVFAIL answer when there is no stale data found in the cache, even if there is valid non-stale data there available. With this change, query_lookup() now considers non-stale data in the cache in the first place, and returns it if it is available. (cherry picked from commit 91a1a8efc5bca44ff3aa6861c31759449ea65ecd) --- lib/dns/include/dns/rdataset.h | 5 +++++ lib/ns/query.c | 12 +++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/dns/include/dns/rdataset.h b/lib/dns/include/dns/rdataset.h index 6ae21dcc4b..d240d2e019 100644 --- a/lib/dns/include/dns/rdataset.h +++ b/lib/dns/include/dns/rdataset.h @@ -156,6 +156,11 @@ struct dns_rdataset { * * \def DNS_RDATASETATTR_LOADORDER * Output the RRset in load order. + * + * \def DNS_RDATASETATTR_STALE_ADDED + * Set on rdatasets that were added during a stale-answer-client-timeout + * lookup. In other words, the RRset was added during a lookup of stale + * data and does not necessarily mean that the rdataset itself is stale. */ #define DNS_RDATASETATTR_NONE 0x00000000 /*%< No ordering. */ diff --git a/lib/ns/query.c b/lib/ns/query.c index d1fac6f383..35c6af86ce 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -5866,6 +5866,7 @@ query_lookup(query_ctx_t *qctx) { dns_ttl_t stale_refresh = 0; bool dbfind_stale = false; bool stale_timeout = false; + bool answer_found = false; bool stale_found = false; bool stale_refresh_window = false; uint16_t ede = 0; @@ -5973,6 +5974,14 @@ query_lookup(query_ctx_t *qctx) { */ stale_timeout = ((dboptions & DNS_DBFIND_STALETIMEOUT) != 0); + if (dns_rdataset_isassociated(qctx->rdataset) && + dns_rdataset_count(qctx->rdataset) > 0 && !STALE(qctx->rdataset)) + { + /* Found non-stale usable rdataset. */ + answer_found = true; + goto gotanswer; + } + if (dbfind_stale || stale_refresh_window || stale_timeout) { dns_name_format(qctx->client->query.qname, namebuf, sizeof(namebuf)); @@ -6099,7 +6108,8 @@ query_lookup(query_ctx_t *qctx) { } } - if (stale_timeout && stale_found) { +gotanswer: + if (stale_timeout && (answer_found || stale_found)) { /* * Mark RRsets that we are adding to the client message on a * lookup during 'stale-answer-client-timeout', so we can From 096d980a8798a97b8b7f48417ae2318bcfb2f938 Mon Sep 17 00:00:00 2001 From: Aram Sargsyan Date: Thu, 17 Nov 2022 14:21:31 +0000 Subject: [PATCH 3/4] Add a CHANGES note for [GL #3678] (cherry picked from commit 40dee61a1e6c3be351b70fe4a99fbcf0f8a741db) --- CHANGES | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGES b/CHANGES index 0e7eeab1b5..9dbb6995f7 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,8 @@ +6059. [bug] In some serve stale scenarios, like when following an + expired CNAME record, named could return SERVFAIL if the + previous request wasn't successful. Consider non-stale + data when in serve-stale mode. [GL #3678] + 6058. [bug] Prevent named from crashing when "rndc delzone" attempts to delete a zone added by a catalog zone. [GL #3745] From 90408617d7185b922e0bda7ff72db1af9695ff7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= Date: Mon, 13 Jun 2022 14:03:16 +0200 Subject: [PATCH 4/4] Check for NULL before dereferencing qctx->rpz_st Commit 9ffb4a7ba11fae64a6ce2dd6390cd334372b7ab7 causes Clang Static Analyzer to flag a potential NULL dereference in query_nxdomain(): query.c:9394:26: warning: Dereference of null pointer [core.NullDereference] if (!qctx->nxrewrite || qctx->rpz_st->m.rpz->addsoa) { ^~~~~~~~~~~~~~~~~~~ 1 warning generated. The warning above is for qctx->rpz_st potentially being a NULL pointer when query_nxdomain() is called from query_resume(). This is a false positive because none of the database lookup result codes currently causing query_nxdomain() to be called (DNS_R_EMPTYWILD, DNS_R_NXDOMAIN) can be returned by a database lookup following a recursive resolution attempt. Add a NULL check nevertheless in order to future-proof the code and silence Clang Static Analyzer. (cherry picked from commit 07592d1315412c38c978e8d009aace5d0f5bef93) (cherry picked from commit a4547a109324fff1bdd21032c5c7d8fdeb0e4ad8) --- lib/ns/query.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/ns/query.c b/lib/ns/query.c index 35c6af86ce..36f3c40dc1 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -9602,7 +9602,9 @@ query_nxdomain(query_ctx_t *qctx, isc_result_t res) { { ttl = 0; } - if (!qctx->nxrewrite || qctx->rpz_st->m.rpz->addsoa) { + if (!qctx->nxrewrite || + (qctx->rpz_st != NULL && qctx->rpz_st->m.rpz->addsoa)) + { result = query_addsoa(qctx, ttl, section); if (result != ISC_R_SUCCESS) { QUERY_ERROR(qctx, result);