fix: usr: Fix TCP fallback after repeated UDP timeouts

When an authoritative server failed to respond to two consecutive
UDP queries in a fetch, named was supposed to retry the next attempt
over TCP but in fact still sent it over UDP.  The resolver now
properly switches the transport to TCP on the third attempt to
the same server.

Closes #5529

Merge branch '5529-fix-tcp-fallback-after-udp-timeouts' into 'main'

See merge request isc-projects/bind9!12022
This commit is contained in:
Ondřej Surý 2026-05-19 11:19:04 +02:00
commit e90a828307
15 changed files with 435 additions and 90 deletions

View file

@ -0,0 +1,42 @@
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# SPDX-License-Identifier: MPL-2.0
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at https://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
from collections.abc import AsyncGenerator
from isctest.asyncserver import (
AsyncDnsServer,
DnsProtocol,
DnsResponseSend,
QueryContext,
ResponseAction,
ResponseDrop,
ResponseHandler,
)
class DropUdpHandler(ResponseHandler):
async def get_responses(
self, qctx: QueryContext
) -> AsyncGenerator[ResponseAction, None]:
if qctx.protocol == DnsProtocol.UDP:
yield ResponseDrop()
else:
yield DnsResponseSend(qctx.response)
def main() -> None:
server = AsyncDnsServer()
server.install_response_handler(DropUdpHandler())
server.run()
if __name__ == "__main__":
main()

View file

@ -0,0 +1,15 @@
; Copyright (C) Internet Systems Consortium, Inc. ("ISC")
;
; SPDX-License-Identifier: MPL-2.0
;
; This Source Code Form is subject to the terms of the Mozilla Public
; License, v. 2.0. If a copy of the MPL was not distributed with this
; file, you can obtain one at https://mozilla.org/MPL/2.0/.
;
; See the COPYRIGHT file distributed with this work for additional
; information regarding copyright ownership.
@ 3600 SOA . . 1 1 1 1 1
@ 3600 NS ns
ns 3600 A 10.53.0.4
foo 3600 A 127.0.0.1

View file

@ -3,3 +3,5 @@
ns.nil. 300 A 10.53.0.1
example. 300 NS ns.example.
ns.example. 300 A 10.53.0.2
tcp-only. 300 NS ns.tcp-only.
ns.tcp-only. 300 A 10.53.0.4

View file

@ -0,0 +1,57 @@
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# SPDX-License-Identifier: MPL-2.0
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at https://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
from re import compile as Re
from re import escape
import dns.message
import dns.name
import dns.rdataclass
import dns.rdatatype
import pytest
import isctest
pytestmark = pytest.mark.extra_artifacts(
[
"ans*/ans.run",
]
)
def _count_received(path, qname, protocol):
pattern = Re(rf"Received {escape(qname)}/IN/A .* \({protocol}\)$")
with open(path, encoding="utf-8") as fh:
return sum(1 for line in fh if pattern.search(line.rstrip()))
def test_tcponly_fallback():
"""
A resolver must fall back to TCP after repeated UDP timeouts to the
same authoritative server. ans4 drops every UDP query and answers
only over TCP; the resolver must reach the answer via the TCP
fallback path, after at least two UDP attempts have been dropped.
"""
msg = dns.message.make_query("foo.tcp-only.", "A")
res = isctest.query.udp(msg, "10.53.0.2", timeout=15)
isctest.check.noerror(res)
rdataset = res.find_rrset(
res.answer,
dns.name.from_text("foo.tcp-only."),
dns.rdataclass.IN,
dns.rdatatype.A,
)
assert str(rdataset[0]) == "127.0.0.1"
udp = _count_received("ans4/ans.run", "foo.tcp-only", "UDP")
tcp = _count_received("ans4/ans.run", "foo.tcp-only", "TCP")
assert udp == 2, f"expected exactly 2 UDP queries, got {udp}"
assert tcp == 1, f"expected exactly 1 TCP query, got {tcp}"

View file

@ -82,9 +82,11 @@ for try in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; do
burst 10.53.0.3 a $try
# fetches-per-server is at 400, but at 20qps against a lame server,
# we'll reach 200 at the tenth second, and the quota should have been
# tuned to less than that by then.
# tuned to less than that by then. Allow a small margin above 200
# to absorb the extra latency introduced by the resolver's TCP
# fallback after repeated UDP timeouts.
[ $try -le 5 ] && low=$((try * 10))
stat 10.53.0.3 20 200 || ret=1
stat 10.53.0.3 20 250 || ret=1
[ $ret -eq 1 ] && break
sleep 1
done
@ -125,7 +127,7 @@ ret=0
sendcmd 10.53.0.4 send-responses "enable"
for try in 1 2 3 4 5; do
burst 10.53.0.3 b $try
stat 10.53.0.3 0 200 || ret=1
stat 10.53.0.3 0 250 || ret=1
[ $ret -eq 1 ] && break
sleep 1
done

View file

@ -1394,23 +1394,16 @@ status=$((status + ret))
sleep 2
# Check that if we don't have stale data for a domain name, we will
# not answer anything until the resolver query timeout.
n=$((n + 1))
echo_i "check notincache.example TXT times out (max-stale-ttl default) ($n)"
ret=0
$DIG -p ${PORT} +tries=1 +timeout=3 @10.53.0.3 notfound.example TXT >dig.out.test$n 2>&1 && ret=1
grep "timed out" dig.out.test$n >/dev/null || ret=1
grep ";; no servers could be reached" dig.out.test$n >/dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
# Note: the "notincache.example TXT times out" step (the original test
# 120) has been moved to the pytest suite in serve_stale_tcp/, since
# the resolver now legitimately escalates to TCP after repeated UDP
# timeouts and the perl mock ans2 only listens on UDP.
echo_i "sending queries for tests $((n + 1))-$((n + 4))..."
$DIG -p ${PORT} @10.53.0.3 data.example TXT >dig.out.test$((n + 1)) &
$DIG -p ${PORT} @10.53.0.3 othertype.example CAA >dig.out.test$((n + 2)) &
$DIG -p ${PORT} @10.53.0.3 nodata.example TXT >dig.out.test$((n + 3)) &
$DIG -p ${PORT} @10.53.0.3 nxdomain.example TXT >dig.out.test$((n + 4)) &
$DIG -p ${PORT} @10.53.0.3 notfound.example TXT >dig.out.test$((n + 5)) &
wait
@ -1452,18 +1445,9 @@ grep "ANSWER: 0," dig.out.test$n >/dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
# The notfound.example check is different than nxdomain.example because
# we didn't send a prime query to add notfound.example to the cache.
# Independently, EDE 22 is sent as the authoritative server doesn't respond.
n=$((n + 1))
echo_i "check notfound.example TXT (max-stale-ttl default) ($n)"
ret=0
grep "status: SERVFAIL" dig.out.test$n >/dev/null || ret=1
grep "EDE: 22 (No Reachable Authority)" dig.out.test$n >/dev/null || ret=1
grep "EDE: 3 (Stale Answer)" dig.out.test$n >/dev/null && ret=1
grep "ANSWER: 0," dig.out.test$n >/dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
# Note: the "notfound.example TXT" SERVFAIL+EDE 22 step (the original
# test 125) has been moved to the pytest suite in serve_stale_tcp/;
# see the comment above where test 120 was removed.
#
# Now test server with serve-stale answers disabled.
@ -1922,10 +1906,19 @@ grep -F "#!TXT" ns5/named.stats.$n.cachedb >/dev/null && ret=1
status=$((status + ret))
if [ $ret != 0 ]; then echo_i "failed"; fi
#############################################
# Test for stale-answer-client-timeout off. #
#############################################
echo_i "test stale-answer-client-timeout (off)"
check_server_responds() {
$DIG -p ${PORT} @10.53.0.3 version.bind txt ch >dig.out.test$n || return 1
grep "status: NOERROR" dig.out.test$n >/dev/null || return 1
}
##############################################################
# Test for stale-answer-client-timeout off and CNAME record. #
##############################################################
# The standalone "stale-answer-client-timeout off" test (the original
# test 163) has been moved to the pytest suite in serve_stale_tcp/;
# see the comment where test 120 was removed. Its configuration
# (named3.conf) is still used as the base for the CNAME case below.
echo_i "test stale-answer-client-timeout (0) and CNAME record"
n=$((n + 1))
echo_i "updating ns3/named3.conf ($n)"
@ -1941,14 +1934,6 @@ rndc_reload ns3 10.53.0.3
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
# Send a query, auth server is disabled, we will enable it after a while in
# order to receive an answer before resolver-query-timeout expires. Since
# stale-answer-client-timeout is disabled we must receive an answer from
# authoritative server.
echo_i "sending query for test $((n + 2))"
$DIG -p ${PORT} @10.53.0.3 data.example TXT >dig.out.test$((n + 2)) &
sleep 3
n=$((n + 1))
echo_i "enable responses from authoritative server ($n)"
ret=0
@ -1958,28 +1943,6 @@ grep "TXT.\"1\"" dig.out.test$n >/dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
# Wait until dig is done.
wait
n=$((n + 1))
echo_i "check data.example TXT comes from authoritative server (stale-answer-client-timeout off) ($n)"
grep "status: NOERROR" dig.out.test$n >/dev/null || ret=1
grep "EDE" dig.out.test$n >/dev/null && ret=1
grep "ANSWER: 1," dig.out.test$n >/dev/null || ret=1
grep "data\.example\..*[12].*IN.*TXT.*A text record with a 2 second ttl" dig.out.test$n >/dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
check_server_responds() {
$DIG -p ${PORT} @10.53.0.3 version.bind txt ch >dig.out.test$n || return 1
grep "status: NOERROR" dig.out.test$n >/dev/null || return 1
}
##############################################################
# Test for stale-answer-client-timeout off and CNAME record. #
##############################################################
echo_i "test stale-answer-client-timeout (0) and CNAME record"
n=$((n + 1))
echo_i "prime cache shortttl.cname.example (stale-answer-client-timeout off) ($n)"
ret=0

View file

@ -0,0 +1,22 @@
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# SPDX-License-Identifier: MPL-2.0
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at https://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
from isctest.asyncserver import ControllableAsyncDnsServer, ToggleResponsesCommand
def main() -> None:
server = ControllableAsyncDnsServer()
server.install_control_command(ToggleResponsesCommand())
server.run()
if __name__ == "__main__":
main()

View file

@ -0,0 +1,15 @@
; Copyright (C) Internet Systems Consortium, Inc. ("ISC")
;
; SPDX-License-Identifier: MPL-2.0
;
; This Source Code Form is subject to the terms of the Mozilla Public
; License, v. 2.0. If a copy of the MPL was not distributed with this
; file, you can obtain one at https://mozilla.org/MPL/2.0/.
;
; See the COPYRIGHT file distributed with this work for additional
; information regarding copyright ownership.
@ 300 SOA ns.example. root.example. 1 3600 1800 604800 300
@ 300 NS ns.example.
ns.example. 300 A 10.53.0.3
data 2 TXT "A text record with a 2 second ttl"

View file

@ -0,0 +1,40 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
key rndc_key {
secret "1234abcd8765";
algorithm @DEFAULT_HMAC@;
};
controls {
inet 10.53.0.1 port @CONTROLPORT@ allow { any; } keys { rndc_key; };
};
options {
port @PORT@;
pid-file "named.pid";
listen-on { 10.53.0.1; };
listen-on-v6 { none; };
query-source address 10.53.0.1;
notify-source 10.53.0.1;
transfer-source 10.53.0.1;
recursion no;
dnssec-validation no;
};
zone "." {
type primary;
file "root.db";
};

View file

@ -0,0 +1,16 @@
; Copyright (C) Internet Systems Consortium, Inc. ("ISC")
;
; SPDX-License-Identifier: MPL-2.0
;
; This Source Code Form is subject to the terms of the Mozilla Public
; License, v. 2.0. If a copy of the MPL was not distributed with this
; file, you can obtain one at https://mozilla.org/MPL/2.0/.
;
; See the COPYRIGHT file distributed with this work for additional
; information regarding copyright ownership.
. 300 SOA . . 0 0 0 0 0
. 300 NS ns.nil.
ns.nil. 300 A 10.53.0.1
example. 300 NS ns.example.
ns.example. 300 A 10.53.0.3

View file

@ -0,0 +1,48 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
key rndc_key {
secret "1234abcd8765";
algorithm @DEFAULT_HMAC@;
};
controls {
inet 10.53.0.2 port @CONTROLPORT@ allow { any; } keys { rndc_key; };
};
options {
port @PORT@;
pid-file "named.pid";
listen-on { 10.53.0.2; };
listen-on-v6 { none; };
query-source address 10.53.0.2;
notify-source 10.53.0.2;
transfer-source 10.53.0.2;
recursion yes;
dnssec-validation no;
qname-minimization off;
stale-answer-enable yes;
stale-cache-enable yes;
stale-answer-ttl 3;
stale-refresh-time 0;
max-stale-ttl 3600;
stale-answer-client-timeout off;
};
zone "." {
type hint;
file "../../_common/root.hint";
};

View file

@ -0,0 +1,108 @@
# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
#
# SPDX-License-Identifier: MPL-2.0
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, you can obtain one at https://mozilla.org/MPL/2.0/.
#
# See the COPYRIGHT file distributed with this work for additional
# information regarding copyright ownership.
import threading
import time
import dns.edns
import dns.exception
import dns.name
import dns.rdataclass
import dns.rdatatype
import pytest
import isctest
pytestmark = pytest.mark.extra_artifacts(
[
"ans*/ans.run",
]
)
def _toggle(mode: str) -> None:
msg = isctest.query.create(f"{mode}.send-responses._control.", "TXT", dnssec=False)
isctest.query.udp(msg, "10.53.0.3", attempts=1)
def test_no_stale_data_times_out():
"""Verify the resolver does not answer until the query timeout.
With the authoritative server unresponsive and the queried name
absent from the cache, dig must time out instead of receiving a
fast SERVFAIL (the original test 120 in serve_stale/tests.sh).
"""
_toggle("disable")
msg = isctest.query.create("notincache.example.", "TXT", dnssec=False)
start = time.monotonic()
with pytest.raises(dns.exception.Timeout):
isctest.query.udp(msg, "10.53.0.2", timeout=3, attempts=1)
assert time.monotonic() - start >= 3
def test_servfail_with_ede22():
"""Verify SERVFAIL carries EDE 22 (and not EDE 3) when auth is unreachable.
With the authoritative server unresponsive and no cached data to
serve stale, the resolver must return SERVFAIL with EDE 22 (No
Reachable Authority) and must not attach EDE 3 (Stale Answer)
(the original test 125 in serve_stale/tests.sh).
"""
_toggle("disable")
msg = isctest.query.create("notfound.example.", "TXT", dnssec=False)
res = isctest.query.udp(msg, "10.53.0.2", timeout=15, attempts=1)
isctest.check.servfail(res)
isctest.check.ede(res, dns.edns.EDECode.NO_REACHABLE_AUTHORITY)
assert not any(
opt.otype == dns.edns.OptionType.EDE
and opt.code == dns.edns.EDECode.STALE_ANSWER
for opt in res.options
), "unexpected stale-answer EDE in SERVFAIL response"
assert len(res.answer) == 0
def test_authoritative_answer_after_reenable():
"""Verify the resolver waits for auth to recover instead of failing fast.
Prime the cache, let the TTL expire, disable the authoritative
server, issue a query, and re-enable the authoritative server
while the query is still in flight. The resolver must return an
authoritative NOERROR answer with no EDE attached, not a stale
answer or SERVFAIL (the original test 163 in serve_stale/tests.sh).
"""
_toggle("enable")
msg = isctest.query.create("data.example.", "TXT", dnssec=False)
isctest.check.noerror(isctest.query.udp(msg, "10.53.0.2", timeout=5))
# allow the 2s TTL to expire
time.sleep(3)
_toggle("disable")
timer = threading.Timer(1.0, _toggle, args=("enable",))
timer.start()
try:
res = isctest.query.udp(msg, "10.53.0.2", timeout=15, attempts=1)
finally:
timer.join()
isctest.check.noerror(res)
isctest.check.noede(res)
answer = res.find_rrset(
res.answer,
dns.name.from_text("data.example."),
dns.rdataclass.IN,
dns.rdatatype.TXT,
)
assert "A text record with a 2 second ttl" in str(answer[0])

View file

@ -122,18 +122,24 @@ n=$((n + 1))
ret=0
echo_i "verifying active sockets output in named.stats ($n)"
nsock1nstat=$(grep "UDP/IPv4 sockets active" $last_stats | awk '{print $1}')
[ $((nsock1nstat - nsock0nstat)) -eq 1 ] || ret=1
# After repeated UDP timeouts to the same authoritative server, the
# resolver switches to TCP, so the in-flight socket may be either UDP
# or TCP. Require at least one extra active socket of either kind.
nsock1udp=$(grep "UDP/IPv4 sockets active" $last_stats | awk '{print $1}')
nsock1tcp=$(grep "TCP/IPv4 sockets active" $last_stats | awk '{print $1}')
[ $((${nsock1udp:-0} + ${nsock1tcp:-0} - nsock0nstat)) -ge 1 ] || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
# there should be 1 UDP and no TCP queries. As the TCP counter is zero
# no status line is emitted.
# There should be 1 query in progress. After repeated UDP timeouts the
# resolver switches to TCP, so depending on which retry attempt the
# snapshot captures the query may be counted as either UDP or TCP.
ret=0
echo_i "verifying queries in progress in named.stats ($n)"
grep "1 UDP queries in progress" $last_stats >/dev/null || ret=1
grep "TCP queries in progress" $last_stats >/dev/null && ret=1
udp_in_progress=$(awk '/UDP queries in progress/ {print $1}' $last_stats)
tcp_in_progress=$(awk '/TCP queries in progress/ {print $1}' $last_stats)
[ $((${udp_in_progress:-0} + ${tcp_in_progress:-0})) -eq 1 ] || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))

View file

@ -2033,6 +2033,9 @@ fctx_setretryinterval(fetchctx_t *fctx, unsigned int rtt) {
isc_interval_set(&fctx->interval, seconds, us * NS_PER_US);
}
static struct tried *
triededns(fetchctx_t *fctx, isc_sockaddr_t *address);
static isc_result_t
fctx_query(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
unsigned int options) {
@ -2126,6 +2129,23 @@ fctx_query(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo,
}
}
/*
* If this server has already been tried at least twice in this
* fetch context after the previous attempt timed out, force TCP
* for this attempt. The decision must be made here, before the
* dispatch type is chosen below, so that the dispatch and the
* DNS_FETCHOPT_TCP flag agree.
*/
if (fctx->timeout && fctx->timeouts >= 2U &&
(options & DNS_FETCHOPT_NOEDNS0) == 0 &&
(options & DNS_FETCHOPT_TCP) == 0)
{
struct tried *tried = triededns(fctx, &sockaddr);
if (tried != NULL && tried->count >= 2U) {
options |= DNS_FETCHOPT_TCP;
}
}
/*
* Allow an additional second for the kernel to resend the SYN
* (or SYN without ECN in the case of stupid firewalls blocking
@ -2551,35 +2571,20 @@ resquery_send(resquery_t *query) {
query->options |= DNS_FETCHOPT_NOEDNS0;
}
if (fctx->timeout && (query->options & DNS_FETCHOPT_NOEDNS0) == 0) {
if (fctx->timeout && (query->options & DNS_FETCHOPT_NOEDNS0) == 0 &&
(query->options & DNS_FETCHOPT_TCP) == 0)
{
isc_sockaddr_t *sockaddr = &query->addrinfo->sockaddr;
struct tried *tried;
struct tried *tried = triededns(fctx, sockaddr);
/*
* If this is the first timeout for this server in this
* fetch context, try setting EDNS UDP buffer size to
* the largest UDP response size we have seen from this
* server so far.
*
* If this server has already timed out twice or more in
* this fetch context, force TCP.
*/
if ((tried = triededns(fctx, sockaddr)) != NULL) {
if (tried->count == 1U) {
hint = dns_adb_getudpsize(fctx->adb,
query->addrinfo);
} else if (tried->count >= 2U) {
if ((query->options & DNS_FETCHOPT_TCP) == 0) {
/*
* Inform the ADB that we're ending a
* UDP fetch, and turn the query into
* a TCP query.
*/
dns_adb_endudpfetch(fctx->adb,
query->addrinfo);
query->options |= DNS_FETCHOPT_TCP;
}
}
if (tried != NULL && tried->count == 1U) {
hint = dns_adb_getudpsize(fctx->adb, query->addrinfo);
}
}
fctx->timeout = false;
@ -4351,6 +4356,8 @@ fctx_try(fetchctx_t *fctx, bool retrying) {
FCTX_ATTR_SET(fctx, FCTX_ATTR_ADDRWAIT);
return;
default:
dns_ede_add(&fctx->edectx, DNS_EDE_NOREACHABLEAUTH,
NULL);
goto done;
}
@ -4368,6 +4375,8 @@ fctx_try(fetchctx_t *fctx, bool retrying) {
*/
if (addrinfo == NULL) {
result = DNS_R_SERVFAIL;
dns_ede_add(&fctx->edectx, DNS_EDE_NOREACHABLEAUTH,
NULL);
goto done;
}
}

View file

@ -7375,10 +7375,10 @@ query_usestale(query_ctx_t *qctx, isc_result_t result) {
}
/*
* Start the stale-refresh-time window in case there was a
* resolver query timeout.
* Start the stale-refresh-time window as there appears
* to have been a resolver query failure.
*/
if (qctx->resuming && result == ISC_R_TIMEDOUT) {
if (qctx->resuming) {
qctx->client->query.dboptions |= DNS_DBFIND_STALESTART;
}
return true;