From 7b46d898b6b63b51b14dcc53225c2ee6e9f2cf31 Mon Sep 17 00:00:00 2001 From: Aram Sargsyan Date: Wed, 21 May 2025 14:44:50 +0000 Subject: [PATCH 1/4] Fix a zone refresh bug in zone.c:refresh_callback() When the zone.c:refresh_callback() callback function is called during a SOA request before a zone transfer, it can receive a ISC_R_SHUTTINGDOWN result for the sent request when named is shutting down, and in that case it just destroys the request and finishes the ongoing transfer, without clearing the DNS_ZONEFLG_REFRESH flag of the zone. This is alright when named is going to shutdown, but currently the callback can get a ISC_R_SHUTTINGDOWN result also when named is reconfigured during the ongoibg SOA request. In that case, leaving the DNS_ZONEFLG_REFRESH flag set results in the zone never being able to refresh again, because any new attempts will be caneled while the flag is set. Clear the DNS_ZONEFLG_REFRESH flag on the 'exiting' error path of the callback function. (cherry picked from commit 228e441328af8f3a54c1ae3f0cd7b871dab83609) --- lib/dns/zone.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/dns/zone.c b/lib/dns/zone.c index d539d4695b..bf0ba0ecad 100644 --- a/lib/dns/zone.c +++ b/lib/dns/zone.c @@ -14308,6 +14308,17 @@ next_primary: goto detach; exiting: + /* + * We can get here not only during shutdown, but also when the refresh + * is canceled during reconfiguration. In that case, make sure to clear + * the DNS_ZONEFLG_REFRESH flag so that future zone refreshes don't get + * stuck, and make sure a new refresh attempt is made again soon after + * the reconfiguration is complete. + */ + DNS_ZONE_CLRFLAG(zone, DNS_ZONEFLG_REFRESH); + zone->refreshtime = now; + zone_settimer(zone, &now); + dns_request_destroy(&zone->request); goto detach; From 0d1251a2ecb48d543d8183a12b316d418f5ed384 Mon Sep 17 00:00:00 2001 From: Aram Sargsyan Date: Wed, 21 May 2025 14:56:04 +0000 Subject: [PATCH 2/4] Add a debug log in zone.c:refresh_callback() The new debug message logs the request result in the SOA request callback function. (cherry picked from commit b07ec4f0b3429f688d35d2694f56cffc9d3ac56b) --- lib/dns/zone.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/dns/zone.c b/lib/dns/zone.c index bf0ba0ecad..816e801bf7 100644 --- a/lib/dns/zone.c +++ b/lib/dns/zone.c @@ -13919,6 +13919,7 @@ refresh_callback(void *arg) { dns_rdata_t rdata = DNS_RDATA_INIT; dns_rdata_soa_t soa; isc_result_t result; + const isc_result_t eresult = dns_request_getresult(request); isc_sockaddr_t curraddr; uint32_t serial, oldserial = 0; bool do_queue_xfrin = false; @@ -13927,6 +13928,12 @@ refresh_callback(void *arg) { ENTER; + if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(3))) { + dns_zone_logc(zone, DNS_LOGCATEGORY_XFER_IN, ISC_LOG_DEBUG(3), + "refresh: request result: %s", + isc_result_totext(eresult)); + } + now = isc_time_now(); LOCK_ZONE(zone); @@ -13942,7 +13949,7 @@ refresh_callback(void *arg) { isc_sockaddr_format(&curraddr, primary, sizeof(primary)); isc_sockaddr_format(&zone->sourceaddr, source, sizeof(source)); - switch (dns_request_getresult(request)) { + switch (eresult) { case ISC_R_SUCCESS: break; case ISC_R_SHUTTINGDOWN: @@ -13988,7 +13995,7 @@ refresh_callback(void *arg) { } FALLTHROUGH; default: - result = dns_request_getresult(request); + result = eresult; dns_zone_logc(zone, DNS_LOGCATEGORY_XFER_IN, ISC_LOG_INFO, "refresh: failure trying primary " "%s (source %s): %s", From 20eb80333eed46127c34e833fafcbc285bc777ed Mon Sep 17 00:00:00 2001 From: Aram Sargsyan Date: Wed, 21 May 2025 14:57:47 +0000 Subject: [PATCH 3/4] Test named reconfiguration during zone transfer's SOA request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This new test checks that named can correctly process an interrupted SOA request during zone transfer, caused by reconfiguration. Co-authored-by: Michał Kępień (cherry picked from commit aa6ca3e77682462ed3af8bc42ea8590addba6626) --- bin/tests/system/xfer/ans9/ans.py | 111 ++++++++++++++++++++++++ bin/tests/system/xfer/ns6/named.conf.in | 7 ++ bin/tests/system/xfer/tests.sh | 43 +++++++++ bin/tests/system/xfer/tests_sh_xfer.py | 1 + 4 files changed, 162 insertions(+) create mode 100644 bin/tests/system/xfer/ans9/ans.py diff --git a/bin/tests/system/xfer/ans9/ans.py b/bin/tests/system/xfer/ans9/ans.py new file mode 100644 index 0000000000..56c80becb9 --- /dev/null +++ b/bin/tests/system/xfer/ans9/ans.py @@ -0,0 +1,111 @@ +""" +Copyright (C) Internet Systems Consortium, Inc. ("ISC") + +SPDX-License-Identifier: MPL-2.0 + +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, you can obtain one at https://mozilla.org/MPL/2.0/. + +See the COPYRIGHT file distributed with this work for additional +information regarding copyright ownership. +""" + +from typing import AsyncGenerator + +import dns.message +import dns.rdataclass +import dns.rdatatype +import dns.rrset + +from isctest.asyncserver import ( + ControllableAsyncDnsServer, + DomainHandler, + QueryContext, + ResponseAction, + DnsResponseSend, + ToggleResponsesCommand, +) + + +class AXFRServer(DomainHandler): + """ + Yield SOA and AXFR responses. Every new AXFR response increments the SOA + version. + """ + + domains = ["xfr-and-reconfig"] + + def __init__(self) -> None: + super().__init__() + self.soa_version = 0 + + async def get_responses( + self, qctx: QueryContext + ) -> AsyncGenerator[ResponseAction, None]: + # This is oversimplified because I am lazy - we are appending the SOA + # RRset to the ANSWER section for _every_ QTYPE. named is only + # expected to send a SOA query over UDP and then an AXFR query over + # TCP. Responses to both of those start with a SOA RRset in the ANSWER + # section :-) + soa_message = dns.message.make_response(qctx.query) + soa_rrset = dns.rrset.from_text( + qctx.qname, + 300, + dns.rdataclass.IN, + dns.rdatatype.SOA, + f". . {self.soa_version} 0 0 0 0", + ) + soa_message.answer.append(soa_rrset) + + yield DnsResponseSend(soa_message, authoritative=True) + + if qctx.qtype == dns.rdatatype.SOA: + # If QTYPE=SOA, the SOA record is the complete response. + return + + if qctx.qtype != dns.rdatatype.AXFR: + # If QTYPE=AXFR, we will continue cramming RRsets into the ANSWER + # section of a subsequent DNS message below. + # + # If QTYPE was not SOA or AXFR, abort. Yeah, we just sent a broken + # response by yielding DnsResponseSend() with a SOA RRset in the + # ANSWER section above. We will have to carry that burden for the + # rest of our lives. + return + + # Send just the obligatory NS RRset at zone apex in the next message. + # This is stupidly inefficient, but makes looping below simpler as we + # will already have been done with the mandatory stuff by then. + ns_message = dns.message.make_response(qctx.query) + ns_rrset = dns.rrset.from_text( + qctx.qname, 300, dns.rdataclass.IN, dns.rdatatype.NS, "." + ) + ns_message.answer.append(ns_rrset) + + yield DnsResponseSend(ns_message, authoritative=True) + + # Generate the AXFR with a txt rrset. + txt_message = dns.message.make_response(qctx.query) + txt_rrset = dns.rrset.from_text( + qctx.qname, + 300, + dns.rdataclass.IN, + dns.rdatatype.TXT, + "foo bar", + ) + txt_message.answer.append(txt_rrset) + + yield DnsResponseSend(txt_message, authoritative=True) + + # Finish the AXFR transaction by sending the second SOA RRset. + yield DnsResponseSend(soa_message, authoritative=True) + + # This makes sure that the next SOA request causes a new zone transfer + self.soa_version += 1 + + +if __name__ == "__main__": + server = ControllableAsyncDnsServer([ToggleResponsesCommand]) + server.install_response_handler(AXFRServer()) + server.run() diff --git a/bin/tests/system/xfer/ns6/named.conf.in b/bin/tests/system/xfer/ns6/named.conf.in index da321a2af0..9b37f20910 100644 --- a/bin/tests/system/xfer/ns6/named.conf.in +++ b/bin/tests/system/xfer/ns6/named.conf.in @@ -104,3 +104,10 @@ zone "ixfr-too-big" { primaries { 10.53.0.1; }; file "ixfr-too-big.bk"; }; + +zone "xfr-and-reconfig" { + type secondary; + primaries { 10.53.0.9; }; + file "xfr-and-reconfig.bk"; + request-ixfr no; # ans9 supports only axfr +}; diff --git a/bin/tests/system/xfer/tests.sh b/bin/tests/system/xfer/tests.sh index 006ce4e1df..8b44dcfd98 100755 --- a/bin/tests/system/xfer/tests.sh +++ b/bin/tests/system/xfer/tests.sh @@ -19,6 +19,10 @@ DIGOPTS="+tcp +noadd +nosea +nostat +noquest +nocomm +nocmd -p ${PORT}" RNDCCMD="$RNDC -c ../_common/rndc.conf -p ${CONTROLPORT} -s" NS_PARAMS="-m record -c named.conf -d 99 -g -T maxcachesize=2097152" +dig_with_opts() ( + "$DIG" -p "$PORT" "$@" +) + status=0 n=0 @@ -738,5 +742,44 @@ if [ $tmp -eq 0 ]; then fi status=$((status + tmp)) +nextpart ns6/named.run >/dev/null + +sendcmd() ( + dig_with_opts "@${1}" "${2}._control." TXT +time=5 +tries=1 +tcp >/dev/null 2>&1 +) + +# See #5307#note_558185 +n=$((n + 1)) +echo_i "test reconfiguration when zone transfer is in the middle of a SOA query (part 1) ($n)" +tmp=0 +# Check that xfr-and-reconfig has been successfully transferred by the secondary. +grep -F 'zone xfr-and-reconfig/IN: zone transfer finished: success' ns6/named.run 2>&1 >/dev/null || tmp=0 +# Make ans6 receive queries without responding to them. +sendcmd 10.53.0.9 "disable.send-responses" +sleep 1 +# Try to reload the zone from an unresponsive primary. +$RNDCCMD 10.53.0.6 reload xfr-and-reconfig 2>&1 | sed 's/^/ns6 /' | cat_i +sleep 1 +# Reconfigure named while zone transfer attempt is in progress. +$RNDCCMD 10.53.0.6 reconfig 2>&1 | sed 's/^/ns6 /' | cat_i +# Confirm that the ongoing SOA request was canceled, caused by the reconfiguratoin. +retry_quiet 60 wait_for_message "refresh: request result: shutting down" || tmp=1 +if test $tmp != 0; then echo_i "failed"; fi +status=$((status + tmp)) + +nextpart ns6/named.run >/dev/null + +n=$((n + 1)) +echo_i "test reconfiguration when zone transfer is in the middle of a SOA query (part 2) ($n)" +tmp=0 +# Make ans6 receive queries and respond to them. +sendcmd 10.53.0.9 "enable.send-responses" +sleep 1 +# Try to reload the zone from the primary. +$RNDCCMD 10.53.0.6 reload xfr-and-reconfig 2>&1 | sed 's/^/ns6 /' | cat_i +retry_quiet 60 wait_for_message "zone xfr-and-reconfig/IN: Transfer started." || tmp=1 +if test $tmp != 0; then echo_i "failed"; fi +status=$((status + tmp)) + echo_i "exit status: $status" [ $status -eq 0 ] || exit 1 diff --git a/bin/tests/system/xfer/tests_sh_xfer.py b/bin/tests/system/xfer/tests_sh_xfer.py index 0b82e0bf12..d94f90b981 100644 --- a/bin/tests/system/xfer/tests_sh_xfer.py +++ b/bin/tests/system/xfer/tests_sh_xfer.py @@ -51,6 +51,7 @@ pytestmark = pytest.mark.extra_artifacts( "ns6/primary.db.jnl", "ns6/sec.bk", "ns6/xot-primary-try-next.bk", + "ns6/xfr-and-reconfig.bk", "ns7/edns-expire.bk", "ns7/primary2.db", "ns7/sec.bk", From fa974811a9b8ab688f7d39b263c49bebb4d07f6f Mon Sep 17 00:00:00 2001 From: Aram Sargsyan Date: Wed, 21 May 2025 15:27:53 +0000 Subject: [PATCH 4/4] Emit a ISC_R_CANCELED result instead of ISC_R_SHUTTINGDOWN When request manager shuts down, it also shuts down all its ongoing requests. Currently it calls their callback functions with a ISC_R_SHUTTINGDOWN result code for the request. Since a request manager can shutdown not only during named shutdown but also during named reconfiguration, instead of sending ISC_R_SHUTTINGDOWN result code send a ISC_R_CANCELED code to avoid confusion and errors with the expectation that a ISC_R_SHUTTINGDOWN result code can only be received during actual shutdown of named. All the callback functions which are passed to either the dns_request_create() or the dns_request_createraw() functions have been analyzed to confirm that they can process both the ISC_R_SHUTTINGDOWN and ISC_R_CANCELED result codes. Changes were made where it was necessary. (cherry picked from commit f4cd307c6b705e13c45136ac4dc49e262a598297) --- bin/tests/system/xfer/tests.sh | 2 +- lib/dns/request.c | 10 +++++----- lib/dns/zone.c | 2 ++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/bin/tests/system/xfer/tests.sh b/bin/tests/system/xfer/tests.sh index 8b44dcfd98..2ba58710e9 100755 --- a/bin/tests/system/xfer/tests.sh +++ b/bin/tests/system/xfer/tests.sh @@ -763,7 +763,7 @@ sleep 1 # Reconfigure named while zone transfer attempt is in progress. $RNDCCMD 10.53.0.6 reconfig 2>&1 | sed 's/^/ns6 /' | cat_i # Confirm that the ongoing SOA request was canceled, caused by the reconfiguratoin. -retry_quiet 60 wait_for_message "refresh: request result: shutting down" || tmp=1 +retry_quiet 60 wait_for_message "refresh: request result: operation canceled" || tmp=1 if test $tmp != 0; then echo_i "failed"; fi status=$((status + tmp)) diff --git a/lib/dns/request.c b/lib/dns/request.c index c2ba096dbf..38130e80bf 100644 --- a/lib/dns/request.c +++ b/lib/dns/request.c @@ -143,7 +143,7 @@ dns_requestmgr_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, for (size_t i = 0; i < nloops; i++) { ISC_LIST_INIT(requestmgr->requests[i]); - /* unreferenced in requests_shutdown() */ + /* unreferenced in requests_cancel() */ isc_loop_ref(isc_loop_get(requestmgr->loopmgr, i)); } @@ -169,7 +169,7 @@ dns_requestmgr_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, } static void -requests_shutdown(void *arg) { +requests_cancel(void *arg) { dns_requestmgr_t *requestmgr = arg; dns_request_t *request = NULL, *next = NULL; uint32_t tid = isc_tid(); @@ -181,7 +181,7 @@ requests_shutdown(void *arg) { /* The callback has been already scheduled */ continue; } - req_sendevent(request, ISC_R_SHUTTINGDOWN); + req_sendevent(request, ISC_R_CANCELED); } isc_loop_unref(isc_loop_get(requestmgr->loopmgr, tid)); @@ -217,12 +217,12 @@ dns_requestmgr_shutdown(dns_requestmgr_t *requestmgr) { if (i == tid) { /* Run the current loop synchronously */ - requests_shutdown(requestmgr); + requests_cancel(requestmgr); continue; } isc_loop_t *loop = isc_loop_get(requestmgr->loopmgr, i); - isc_async_run(loop, requests_shutdown, requestmgr); + isc_async_run(loop, requests_cancel, requestmgr); } } diff --git a/lib/dns/zone.c b/lib/dns/zone.c index 816e801bf7..845902b160 100644 --- a/lib/dns/zone.c +++ b/lib/dns/zone.c @@ -13609,6 +13609,7 @@ stub_callback(void *arg) { case ISC_R_SUCCESS: break; case ISC_R_SHUTTINGDOWN: + case ISC_R_CANCELED: goto exiting; case ISC_R_TIMEDOUT: if (!DNS_ZONE_FLAG(zone, DNS_ZONEFLG_NOEDNS)) { @@ -13953,6 +13954,7 @@ refresh_callback(void *arg) { case ISC_R_SUCCESS: break; case ISC_R_SHUTTINGDOWN: + case ISC_R_CANCELED: goto exiting; case ISC_R_TIMEDOUT: if (!DNS_ZONE_FLAG(zone, DNS_ZONEFLG_NOEDNS)) {