[9.20] fix: usr: Fix trying the next primary server when the preivous one was marked as unreachable

In some cases (there is evidence only when XoT was used) `named` failed
to try the next primary server in the list when the previous one was
marked as unreachable. This has been fixed.

Closes #5038

Backport of MR !9781

Merge branch 'backport-5038-xfr-primary-next-fix-9.20' into 'bind-9.20'

See merge request isc-projects/bind9!9788
This commit is contained in:
Arаm Sаrgsyаn 2024-11-27 13:30:13 +00:00
commit ab138bb717
7 changed files with 65 additions and 21 deletions

View file

@ -63,6 +63,11 @@ zone "axfr-rndc-retransfer-force" {
file "axfr-rndc-retransfer-force.db";
};
zone "xot-primary-try-next" {
type primary;
file "xot-primary-try-next.db";
};
zone "axfr-too-big" {
type primary;
file "axfr-too-big.db";

View file

@ -0,0 +1,14 @@
; Copyright (C) Internet Systems Consortium, Inc. ("ISC")
;
; SPDX-License-Identifier: MPL-2.0
;
; This Source Code Form is subject to the terms of the Mozilla Public
; License, v. 2.0. If a copy of the MPL was not distributed with this
; file, you can obtain one at https://mozilla.org/MPL/2.0/.
;
; See the COPYRIGHT file distributed with this work for additional
; information regarding copyright ownership.
$TTL 3600
@ IN SOA . . 0 0 0 0 0
@ IN NS .

View file

@ -77,6 +77,12 @@ zone "axfr-rndc-retransfer-force" {
file "axfr-rndc-retransfer-force.bk";
};
zone "xot-primary-try-next" {
type secondary;
primaries { 10.53.0.99 port @EXTRAPORT1@ tls ephemeral; 10.53.0.1; };
file "xot-primary-try-next.bk";
};
zone "axfr-too-big" {
type secondary;
max-records 30;

View file

@ -646,6 +646,28 @@ wait_for_message() (
grep -F "$1" wait_for_message.$n >/dev/null
)
nextpart ns6/named.run >/dev/null
n=$((n + 1))
echo_i "test that named tries the next primary in the list when the first one fails (XoT -> Do53) ($n)"
tmp=0
$RNDCCMD 10.53.0.6 retransfer xot-primary-try-next 2>&1 | sed 's/^/ns6 /' | cat_i
msg="'xot-primary-try-next/IN' from 10.53.0.1#${PORT}: Transfer status: success"
retry_quiet 60 wait_for_message "$msg" || tmp=1
if test $tmp != 0; then echo_i "failed"; fi
status=$((status + tmp))
nextpart ns6/named.run >/dev/null
n=$((n + 1))
echo_i "test that named tries the next primary in the list when the first one is already marked as unreachable (XoT -> Do53) ($n)"
tmp=0
$RNDCCMD 10.53.0.6 retransfer xot-primary-try-next 2>&1 | sed 's/^/ns6 /' | cat_i
msg="'xot-primary-try-next/IN' from 10.53.0.1#${PORT}: Transfer status: success"
retry_quiet 60 wait_for_message "$msg" || tmp=1
if test $tmp != 0; then echo_i "failed"; fi
status=$((status + tmp))
# Restart ns1 with -T transferslowly
stop_server ns1
copy_setports ns1/named2.conf.in ns1/named.conf
@ -663,7 +685,7 @@ msg="'axfr-rndc-retransfer-force/IN' from 10.53.0.1#${PORT}: received"
retry_quiet 5 wait_for_message "$msg" || tmp=1
# Issue a retransfer-force command which should cancel the ongoing transfer and start a new one
$RNDCCMD 10.53.0.6 retransfer -force axfr-rndc-retransfer-force 2>&1 | sed 's/^/ns6 /' | cat_i
msg="'axfr-rndc-retransfer-force/IN' from 10.53.0.1#${PORT}: Transfer status: operation canceled"
msg="'axfr-rndc-retransfer-force/IN' from 10.53.0.1#${PORT}: Transfer status: shutting down"
retry_quiet 5 wait_for_message "$msg" || tmp=1
# Wait for the new transfer to complete successfully
msg="'axfr-rndc-retransfer-force/IN' from 10.53.0.1#${PORT}: Transfer status: success"

View file

@ -49,6 +49,7 @@ pytestmark = pytest.mark.extra_artifacts(
"ns6/primary.db",
"ns6/primary.db.jnl",
"ns6/sec.bk",
"ns6/xot-primary-try-next.bk",
"ns7/edns-expire.bk",
"ns7/primary2.db",
"ns7/sec.bk",

View file

@ -1080,7 +1080,7 @@ xfrin_shutdown(void *arg) {
REQUIRE(VALID_XFRIN(xfr));
xfrin_fail(xfr, ISC_R_CANCELED, "shut down");
xfrin_fail(xfr, ISC_R_SHUTTINGDOWN, "shut down");
dns_xfrin_detach(&xfr);
}
@ -1092,7 +1092,7 @@ dns_xfrin_shutdown(dns_xfrin_t *xfr) {
dns_xfrin_ref(xfr);
isc_async_run(xfr->loop, xfrin_shutdown, xfr);
} else {
xfrin_fail(xfr, ISC_R_CANCELED, "shut down");
xfrin_fail(xfr, ISC_R_SHUTTINGDOWN, "shut down");
}
}

View file

@ -11474,11 +11474,23 @@ zone_refresh(dns_zone_t *zone) {
queue_soa_query(zone);
}
void
dns_zone_refresh(dns_zone_t *zone) {
static void
zone_refresh_async(void *arg) {
dns_zone_t *zone = arg;
LOCK_ZONE(zone);
zone_refresh(zone);
UNLOCK_ZONE(zone);
dns_zone_detach(&zone);
}
void
dns_zone_refresh(dns_zone_t *zone) {
REQUIRE(DNS_ZONE_VALID(zone));
dns_zone_ref(zone);
isc_async_run(zone->loop, zone_refresh_async, zone);
}
static isc_result_t
@ -17946,17 +17958,6 @@ again:
inc_stats(zone, dns_zonestatscounter_xfrfail);
break;
case ISC_R_CANCELED:
/*
* A new "retransfer" command with a "-force" argument could
* have canceled the current transfer in which case we should
* make sure to try again from the beginning.
*/
if (DNS_ZONE_FLAG(zone, DNS_ZONEFLG_FORCEXFER)) {
DNS_ZONE_SETFLAG(zone, DNS_ZONEFLG_REFRESH);
again = true;
}
FALLTHROUGH;
case ISC_R_SHUTTINGDOWN:
dns_remote_reset(&zone->primaries, true);
break;
@ -19559,11 +19560,6 @@ void
dns_zonemgr_unreachabledel(dns_zonemgr_t *zmgr, isc_sockaddr_t *remote,
isc_sockaddr_t *local) {
unsigned int i;
char primary[ISC_SOCKADDR_FORMATSIZE];
char source[ISC_SOCKADDR_FORMATSIZE];
isc_sockaddr_format(remote, primary, sizeof(primary));
isc_sockaddr_format(local, source, sizeof(source));
REQUIRE(DNS_ZONEMGR_VALID(zmgr));