From 1c4a34a3ab9b0b5175d51a7030f7d1aefafc49b1 Mon Sep 17 00:00:00 2001 From: Aram Sargsyan Date: Tue, 26 Nov 2024 11:36:33 +0000 Subject: [PATCH 1/3] Clean up dns_zonemgr_unreachabledel() The results of isc_sockaddr_format() calls are not used, remove them and the local variables. --- lib/dns/zone.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/dns/zone.c b/lib/dns/zone.c index 7ef570c3d1..928183348e 100644 --- a/lib/dns/zone.c +++ b/lib/dns/zone.c @@ -19584,11 +19584,6 @@ void dns_zonemgr_unreachabledel(dns_zonemgr_t *zmgr, isc_sockaddr_t *remote, isc_sockaddr_t *local) { unsigned int i; - char primary[ISC_SOCKADDR_FORMATSIZE]; - char source[ISC_SOCKADDR_FORMATSIZE]; - - isc_sockaddr_format(remote, primary, sizeof(primary)); - isc_sockaddr_format(local, source, sizeof(source)); REQUIRE(DNS_ZONEMGR_VALID(zmgr)); From 3262ebd0f3fb53a471c7d736ea32710f26dcd509 Mon Sep 17 00:00:00 2001 From: Aram Sargsyan Date: Tue, 26 Nov 2024 12:06:03 +0000 Subject: [PATCH 2/3] xfrin: refactor and fix the ISC_R_CANCELED case handling Previously a ISC_R_CANCELED result code switch-case has been added to the zone.c:zone_xfrdone() function, which did two things: 1. Schedule a new zone transfer if there's a scheduled force reload of the zone. 2. Reset the primaries list. This proved to be not a well-thought change and causes problems, because the ISC_R_CANCELED code is used not only when the whole transfer is canceled, but also when, for example, a particular primary server is unreachable, and named still needs to continue the transfer process by trying the next server, which it now no longer does in some cases. To solve this issue, three changes are made: 1. Make sure dns_zone_refresh() runs on the zone's loop, so that the sequential calls of dns_zone_stopxfr() and dns_zone_forcexfr() functions (like done in 'rndc retransfer -force') run in intended order and don't race with each other. 2. Since starting the new transfer is now guaranteed to run after the previous transfer is shut down (see the previous change), remove the special handling of the ISC_R_CANCELED case, and let the default handler to handle it like before. This will bring back the ability to try the next primary if the current one was interrupted with a ISC_R_CANCELED result code. 3. Change the xfrin.c:xfrin_shutdown() function to pass the ISC_R_SHUTTINGDOWN result code instead of ISC_R_CANCELED, as it makes more sense. --- bin/tests/system/xfer/tests.sh | 2 +- lib/dns/xfrin.c | 4 ++-- lib/dns/zone.c | 27 ++++++++++++++------------- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/bin/tests/system/xfer/tests.sh b/bin/tests/system/xfer/tests.sh index 5818870b60..cb67e40547 100755 --- a/bin/tests/system/xfer/tests.sh +++ b/bin/tests/system/xfer/tests.sh @@ -677,7 +677,7 @@ msg="'axfr-rndc-retransfer-force/IN' from 10.53.0.1#${PORT}: received" retry_quiet 5 wait_for_message "$msg" || tmp=1 # Issue a retransfer-force command which should cancel the ongoing transfer and start a new one $RNDCCMD 10.53.0.6 retransfer -force axfr-rndc-retransfer-force 2>&1 | sed 's/^/ns6 /' | cat_i -msg="'axfr-rndc-retransfer-force/IN' from 10.53.0.1#${PORT}: Transfer status: operation canceled" +msg="'axfr-rndc-retransfer-force/IN' from 10.53.0.1#${PORT}: Transfer status: shutting down" retry_quiet 5 wait_for_message "$msg" || tmp=1 # Wait for the new transfer to complete successfully msg="'axfr-rndc-retransfer-force/IN' from 10.53.0.1#${PORT}: Transfer status: success" diff --git a/lib/dns/xfrin.c b/lib/dns/xfrin.c index d9e3a2733c..47f58aed5f 100644 --- a/lib/dns/xfrin.c +++ b/lib/dns/xfrin.c @@ -1082,7 +1082,7 @@ xfrin_shutdown(void *arg) { REQUIRE(VALID_XFRIN(xfr)); - xfrin_fail(xfr, ISC_R_CANCELED, "shut down"); + xfrin_fail(xfr, ISC_R_SHUTTINGDOWN, "shut down"); dns_xfrin_detach(&xfr); } @@ -1094,7 +1094,7 @@ dns_xfrin_shutdown(dns_xfrin_t *xfr) { dns_xfrin_ref(xfr); isc_async_run(xfr->loop, xfrin_shutdown, xfr); } else { - xfrin_fail(xfr, ISC_R_CANCELED, "shut down"); + xfrin_fail(xfr, ISC_R_SHUTTINGDOWN, "shut down"); } } diff --git a/lib/dns/zone.c b/lib/dns/zone.c index 928183348e..7889c25952 100644 --- a/lib/dns/zone.c +++ b/lib/dns/zone.c @@ -11522,11 +11522,23 @@ zone_refresh(dns_zone_t *zone) { queue_soa_query(zone); } -void -dns_zone_refresh(dns_zone_t *zone) { +static void +zone_refresh_async(void *arg) { + dns_zone_t *zone = arg; + LOCK_ZONE(zone); zone_refresh(zone); UNLOCK_ZONE(zone); + + dns_zone_detach(&zone); +} + +void +dns_zone_refresh(dns_zone_t *zone) { + REQUIRE(DNS_ZONE_VALID(zone)); + + dns_zone_ref(zone); + isc_async_run(zone->loop, zone_refresh_async, zone); } static isc_result_t @@ -17971,17 +17983,6 @@ again: inc_stats(zone, dns_zonestatscounter_xfrfail); break; - case ISC_R_CANCELED: - /* - * A new "retransfer" command with a "-force" argument could - * have canceled the current transfer in which case we should - * make sure to try again from the beginning. - */ - if (DNS_ZONE_FLAG(zone, DNS_ZONEFLG_FORCEXFER)) { - DNS_ZONE_SETFLAG(zone, DNS_ZONEFLG_REFRESH); - again = true; - } - FALLTHROUGH; case ISC_R_SHUTTINGDOWN: dns_remote_reset(&zone->primaries, true); break; From 12225d125bb7690fdf4a26536869a048874aa51d Mon Sep 17 00:00:00 2001 From: Aram Sargsyan Date: Tue, 26 Nov 2024 12:09:57 +0000 Subject: [PATCH 3/3] Test trying of the next primary server Add test cases which check that when a XoT primary server is unreachable or is already marked as unreachble then the next primary server in the list is used. --- bin/tests/system/xfer/ns1/named1.conf.in | 5 +++++ .../system/xfer/ns1/xot-primary-try-next.db | 14 ++++++++++++ bin/tests/system/xfer/ns6/named.conf.in | 6 +++++ bin/tests/system/xfer/tests.sh | 22 +++++++++++++++++++ bin/tests/system/xfer/tests_sh_xfer.py | 1 + 5 files changed, 48 insertions(+) create mode 100644 bin/tests/system/xfer/ns1/xot-primary-try-next.db diff --git a/bin/tests/system/xfer/ns1/named1.conf.in b/bin/tests/system/xfer/ns1/named1.conf.in index c7e844964e..990c911580 100644 --- a/bin/tests/system/xfer/ns1/named1.conf.in +++ b/bin/tests/system/xfer/ns1/named1.conf.in @@ -63,6 +63,11 @@ zone "axfr-rndc-retransfer-force" { file "axfr-rndc-retransfer-force.db"; }; +zone "xot-primary-try-next" { + type primary; + file "xot-primary-try-next.db"; +}; + zone "axfr-too-big" { type primary; file "axfr-too-big.db"; diff --git a/bin/tests/system/xfer/ns1/xot-primary-try-next.db b/bin/tests/system/xfer/ns1/xot-primary-try-next.db new file mode 100644 index 0000000000..dd9ed5d29b --- /dev/null +++ b/bin/tests/system/xfer/ns1/xot-primary-try-next.db @@ -0,0 +1,14 @@ +; Copyright (C) Internet Systems Consortium, Inc. ("ISC") +; +; SPDX-License-Identifier: MPL-2.0 +; +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this +; file, you can obtain one at https://mozilla.org/MPL/2.0/. +; +; See the COPYRIGHT file distributed with this work for additional +; information regarding copyright ownership. + +$TTL 3600 +@ IN SOA . . 0 0 0 0 0 +@ IN NS . diff --git a/bin/tests/system/xfer/ns6/named.conf.in b/bin/tests/system/xfer/ns6/named.conf.in index ea14584985..d6edf8a8a4 100644 --- a/bin/tests/system/xfer/ns6/named.conf.in +++ b/bin/tests/system/xfer/ns6/named.conf.in @@ -77,6 +77,12 @@ zone "axfr-rndc-retransfer-force" { file "axfr-rndc-retransfer-force.bk"; }; +zone "xot-primary-try-next" { + type secondary; + primaries { 10.53.0.99 port @EXTRAPORT1@ tls ephemeral; 10.53.0.1; }; + file "xot-primary-try-next.bk"; +}; + zone "axfr-too-big" { type secondary; max-records 30; diff --git a/bin/tests/system/xfer/tests.sh b/bin/tests/system/xfer/tests.sh index cb67e40547..a8bca3dc3a 100755 --- a/bin/tests/system/xfer/tests.sh +++ b/bin/tests/system/xfer/tests.sh @@ -660,6 +660,28 @@ wait_for_message() ( grep -F "$1" wait_for_message.$n >/dev/null ) +nextpart ns6/named.run >/dev/null + +n=$((n + 1)) +echo_i "test that named tries the next primary in the list when the first one fails (XoT -> Do53) ($n)" +tmp=0 +$RNDCCMD 10.53.0.6 retransfer xot-primary-try-next 2>&1 | sed 's/^/ns6 /' | cat_i +msg="'xot-primary-try-next/IN' from 10.53.0.1#${PORT}: Transfer status: success" +retry_quiet 60 wait_for_message "$msg" || tmp=1 +if test $tmp != 0; then echo_i "failed"; fi +status=$((status + tmp)) + +nextpart ns6/named.run >/dev/null + +n=$((n + 1)) +echo_i "test that named tries the next primary in the list when the first one is already marked as unreachable (XoT -> Do53) ($n)" +tmp=0 +$RNDCCMD 10.53.0.6 retransfer xot-primary-try-next 2>&1 | sed 's/^/ns6 /' | cat_i +msg="'xot-primary-try-next/IN' from 10.53.0.1#${PORT}: Transfer status: success" +retry_quiet 60 wait_for_message "$msg" || tmp=1 +if test $tmp != 0; then echo_i "failed"; fi +status=$((status + tmp)) + # Restart ns1 with -T transferslowly stop_server ns1 copy_setports ns1/named2.conf.in ns1/named.conf diff --git a/bin/tests/system/xfer/tests_sh_xfer.py b/bin/tests/system/xfer/tests_sh_xfer.py index d217b0becf..50efbca298 100644 --- a/bin/tests/system/xfer/tests_sh_xfer.py +++ b/bin/tests/system/xfer/tests_sh_xfer.py @@ -49,6 +49,7 @@ pytestmark = pytest.mark.extra_artifacts( "ns6/primary.db", "ns6/primary.db.jnl", "ns6/sec.bk", + "ns6/xot-primary-try-next.bk", "ns7/edns-expire.bk", "ns7/primary2.db", "ns7/sec.bk",