Test resolver statistics when responses time out

Add a test to check that the timed out responses do not skew the
normal responses statistics counters, and that they do update the
timeouts counter.

(cherry picked from commit 0c7fa8d572)
This commit is contained in:
Aram Sargsyan 2025-03-06 14:37:09 +00:00
parent df0fe93fa5
commit 006130a35c
3 changed files with 42 additions and 2 deletions

View file

@ -74,6 +74,10 @@ sub handleQuery {
$packet->push("answer", new Net::DNS::RR($qname . " 300 A 10.53.0.3"));
} elsif ($qname eq "nodata.example.net") {
# Do not add a SOA RRset.
} elsif ($qname eq "noresponse.example.net") {
# Do not response.
print "RESPONSE:\n";
return "";
} elsif ($qname eq "nxdomain.example.net") {
# Do not add a SOA RRset.
$packet->header->rcode(NXDOMAIN);
@ -185,8 +189,12 @@ for (;;) {
print "TCP request\n";
my $result = handleQuery($buf);
$len = length($result);
$conn->syswrite(pack("n", $len), 2);
$n = $conn->syswrite($result, $len);
if ($len != 0) {
$conn->syswrite(pack("n", $len), 2);
$n = $conn->syswrite($result, $len);
} else {
$n = 0;
}
print " Sent: $n chars via TCP\n";
}
$conn->close;

View file

@ -85,6 +85,37 @@ if [ -x "${RESOLVE}" ]; then
status=$((status + ret))
fi
rndccmd 10.53.0.1 stats || ret=1 # Get the responses, RTT and timeout statistics before the following timeout tests
grep -F 'responses received' ns1/named.stats >ns1/named.stats.responses-before || true
grep -F 'queries with RTT' ns1/named.stats >ns1/named.stats.rtt-before || true
grep -F 'query timeouts' ns1/named.stats >ns1/named.stats.timeouts-before || true
mv ns1/named.stats ns1/named.stats-before
# Checking if the "hung fetch" timer kicks in and interrupts a non-responsive query.
n=$((n + 1))
echo_i "checking no response timeout handling ($n)"
ret=0
dig_with_opts +tcp +tries=1 +timeout=15 noresponse.example.net @10.53.0.1 a >dig.out.ns1.test${n} || ret=1
grep -F "status: SERVFAIL" dig.out.ns1.test${n} >/dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
echo_i "checking that the timeout didn't skew the resolver responses counters and did update the timeout counter ($n)"
ret=0
rndccmd 10.53.0.1 stats || ret=1
grep -F 'responses received' ns1/named.stats >ns1/named.stats.responses-after || true
grep -F 'queries with RTT' ns1/named.stats >ns1/named.stats.rtt-after || true
grep -F 'query timeouts' ns1/named.stats >ns1/named.stats.timeouts-after || true
mv ns1/named.stats ns1/named.stats-after
diff ns1/named.stats.responses-before ns1/named.stats.responses-after >/dev/null || ret=1
diff ns1/named.stats.rtt-before ns1/named.stats.rtt-after >/dev/null || ret=1
# The following check is disabled in this branch, because TCP timeouts don't
# work well here, and instead the "hung fetch" timer interrupts the query.
#diff ns1/named.stats.timeouts-before ns1/named.stats.timeouts-after >/dev/null && ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status + ret))
n=$((n + 1))
echo_i "checking handling of bogus referrals ($n)"
# If the server has the "INSIST(!external)" bug, this query will kill it.

View file

@ -22,6 +22,7 @@ pytestmark = pytest.mark.extra_artifacts(
"resolve.out.*",
"ans*/ans.run",
"ans*/query.log",
"ns1/named.stats*",
"ns4/tld.db",
"ns5/trusted.conf",
"ns6/K*",