DiG: use the same retry and fail-over logic for different failure types

DiG implements different logic in the `recv_done()` callback function
when processing a failure:

1. For a timed-out query it applies the "retries" logic first, then,
   when it fails, fail-overs to the next server.

2. For an EOF (end-of-file, or unexpected disconnect) error it tries to
   make a single retry attempt (even if the user has requested more
   retries), then, when it fails, fail-overs to the next server.

3. For other types of failures, DiG does not apply the "retries" logic,
   and tries to fail-over to the next servers (again, even if the user
   has requested to make retries).

Simplify the logic and apply the same logic (1) of first retries, and
then fail-over, for different types of failures in `recv_done()`.

(cherry picked from commit abfd0d363f4f495de00dc4bd41f74726370369ba)
This commit is contained in:
Aram Sargsyan 2022-06-15 13:41:10 +00:00
parent 6efb73d4bb
commit 29057d9911
6 changed files with 27 additions and 84 deletions

View file

@ -811,7 +811,6 @@ clone_lookup(dig_lookup_t *lookold, bool servers) {
looknew->done_as_is = lookold->done_as_is;
looknew->dscp = lookold->dscp;
looknew->rrcomments = lookold->rrcomments;
looknew->eoferr = lookold->eoferr;
if (lookold->ecs_addr != NULL) {
size_t len = sizeof(isc_sockaddr_t);
@ -3367,8 +3366,7 @@ force_next(dig_query_t *query) {
dighost_error("no response from %s\n", buf);
} else {
printf("%s", l->cmdline);
dighost_error("connection timed out; "
"no servers could be reached\n");
dighost_error("no servers could be reached\n");
}
if (exitcode < 9) {
@ -3382,28 +3380,6 @@ force_next(dig_query_t *query) {
UNLOCK_LOOKUP;
}
/*%
* Called when a peer closes a TCP socket prematurely.
*/
static void
requeue_or_update_exitcode(dig_lookup_t *lookup) {
if (lookup->eoferr == 0U && lookup->retries > 1) {
--lookup->retries;
/*
* Peer closed the connection prematurely for the first time
* for this lookup. Try again, keeping track of this failure.
*/
dig_lookup_t *requeued_lookup = requeue_lookup(lookup, true);
requeued_lookup->eoferr++;
} else {
/*
* Peer closed the connection prematurely and it happened
* previously for this lookup. Indicate an error.
*/
exitcode = 9;
}
}
/*%
* For transfers that involve multiple recvs (XFR's in particular),
* launch the next recv.
@ -3968,7 +3944,13 @@ recv_done(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region,
}
}
if (eresult == ISC_R_TIMEDOUT) {
if (eresult != ISC_R_SUCCESS) {
char sockstr[ISC_SOCKADDR_FORMATSIZE];
isc_sockaddr_format(&query->sockaddr, sockstr, sizeof(sockstr));
dighost_warning("communications error to %s: %s", sockstr,
isc_result_totext(eresult));
if (l->retries > 1 && !l->tcp_mode) {
dig_query_t *newq = NULL;
@ -4039,8 +4021,8 @@ recv_done(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region,
* and cancel the lookup.
*/
printf("%s", l->cmdline);
dighost_error("connection timed out; "
"no servers could be reached\n");
dighost_error("no servers could be reached\n");
if (exitcode < 9) {
exitcode = 9;
}
@ -4051,52 +4033,6 @@ recv_done(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region,
goto cancel_lookup;
}
} else if (eresult != ISC_R_SUCCESS) {
dig_query_t *next = ISC_LIST_NEXT(query, link);
char sockstr[ISC_SOCKADDR_FORMATSIZE];
isc_sockaddr_format(&query->sockaddr, sockstr, sizeof(sockstr));
/*
* There was a communication error with the current query,
* go to the next query, if there is one.
*/
if (next != NULL) {
if (l->current_query == query) {
query_detach(&l->current_query);
}
if (l->current_query == NULL) {
debug("starting next query %p", next);
if (l->tcp_mode) {
start_tcp(next);
} else {
start_udp(next);
}
}
if (check_if_queries_done(l, query)) {
goto cancel_lookup;
}
goto detach_query;
}
/*
* Otherwise, print an error message and cancel the
* lookup.
*/
dighost_error("communications error to %s: %s\n", sockstr,
isc_result_totext(eresult));
if (keep != NULL) {
isc_nmhandle_detach(&keep);
}
if (eresult == ISC_R_EOF) {
requeue_or_update_exitcode(l);
} else if (exitcode < 9) {
exitcode = 9;
}
goto cancel_lookup;
}
isc_buffer_init(&b, region->base, region->length);

View file

@ -169,7 +169,6 @@ struct dig_lookup {
unsigned int ednsflags;
dns_opcode_t opcode;
int rrcomments;
unsigned int eoferr;
uint16_t qid;
struct {
bool http_plain;

View file

@ -161,7 +161,8 @@ grep "status: NOERROR" dig.out.3.${t} > /dev/null 2>&1 || ret=1
$DIG -p ${PORT} soa example. \
@10.53.0.2 -b 10.53.0.8 > dig.out.4.${t}
grep "status: NOERROR" dig.out.4.${t} > /dev/null 2>&1 && ret=1
grep "connection timed out" dig.out.4.${t} > /dev/null 2>&1 || ret=1
grep "timed out" dig.out.4.${t} > /dev/null 2>&1 || ret=1
grep ";; no servers could be reached" dig.out.4.${t} > /dev/null 2>&1 || ret=1
[ $ret -eq 0 ] || echo_i "failed"
status=`expr $status + $ret`

View file

@ -106,7 +106,8 @@ n=`expr $n + 1`
echo_i "checking drop edns server setup ($n)"
ret=0
$DIG $DIGOPTS +edns @10.53.0.2 dropedns soa > dig.out.1.test$n && ret=1
grep "connection timed out; no servers could be reached" dig.out.1.test$n > /dev/null || ret=1
grep "timed out" dig.out.1.test$n > /dev/null || ret=1
grep ";; no servers could be reached" dig.out.1.test$n > /dev/null || ret=1
$DIG $DIGOPTS +noedns @10.53.0.2 dropedns soa > dig.out.2.test$n || ret=1
grep "status: NOERROR" dig.out.2.test$n > /dev/null || ret=1
grep "EDNS: version:" dig.out.2.test$n > /dev/null && ret=1
@ -114,7 +115,8 @@ $DIG $DIGOPTS +noedns +tcp @10.53.0.2 dropedns soa > dig.out.3.test$n || ret=1
grep "status: NOERROR" dig.out.3.test$n > /dev/null || ret=1
grep "EDNS: version:" dig.out.3.test$n > /dev/null && ret=1
$DIG $DIGOPTS +edns +tcp @10.53.0.2 dropedns soa > dig.out.4.test$n && ret=1
grep "connection timed out; no servers could be reached" dig.out.4.test$n > /dev/null || ret=1
grep "timed out" dig.out.4.test$n > /dev/null || ret=1
grep ";; no servers could be reached" dig.out.4.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
@ -129,7 +131,8 @@ n=`expr $n + 1`
echo_i "checking drop edns + no tcp server setup ($n)"
ret=0
$DIG $DIGOPTS +edns @10.53.0.3 dropedns-notcp soa > dig.out.1.test$n && ret=1
grep "connection timed out; no servers could be reached" dig.out.1.test$n > /dev/null || ret=1
grep "timed out" dig.out.1.test$n > /dev/null || ret=1
grep ";; no servers could be reached" dig.out.1.test$n > /dev/null || ret=1
$DIG $DIGOPTS +noedns +tcp @10.53.0.3 dropedns-notcp soa > dig.out.2.test$n && ret=1
grep "connection refused" dig.out.2.test$n > /dev/null || ret=1
$DIG $DIGOPTS +noedns @10.53.0.3 dropedns-notcp soa > dig.out.3.test$n || ret=1
@ -192,7 +195,8 @@ $DIG $DIGOPTS +edns +tcp @10.53.0.6 edns512 txt > dig.out.2.test$n || ret=1
grep "status: NOERROR" dig.out.2.test$n > /dev/null || ret=1
grep "EDNS: version:" dig.out.2.test$n > /dev/null || ret=1
$DIG $DIGOPTS +edns +dnssec @10.53.0.6 edns512 txt > dig.out.3.test$n && ret=1
grep "connection timed out; no servers could be reached" dig.out.3.test$n > /dev/null || ret=1
grep "timed out" dig.out.3.test$n > /dev/null || ret=1
grep ";; no servers could be reached" dig.out.3.test$n > /dev/null || ret=1
$DIG $DIGOPTS +edns +dnssec +bufsize=512 +ignore @10.53.0.6 edns512 soa > dig.out.4.test$n || ret=1
grep "status: NOERROR" dig.out.4.test$n > /dev/null || ret=1
grep "EDNS: version:" dig.out.4.test$n > /dev/null || ret=1
@ -216,7 +220,8 @@ grep "EDNS: version:" dig.out.1.test$n > /dev/null || ret=1
$DIG $DIGOPTS +edns +tcp @10.53.0.7 edns512-notcp soa > dig.out.2.test$n && ret=1
grep "connection refused" dig.out.2.test$n > /dev/null || ret=1
$DIG $DIGOPTS +edns +dnssec @10.53.0.7 edns512-notcp soa > dig.out.3.test$n && ret=1
grep "connection timed out; no servers could be reached" dig.out.3.test$n > /dev/null || ret=1
grep "timed out" dig.out.3.test$n > /dev/null || ret=1
grep ";; no servers could be reached" dig.out.3.test$n > /dev/null || ret=1
$DIG $DIGOPTS +edns +dnssec +bufsize=512 +ignore @10.53.0.7 edns512-notcp soa > dig.out.4.test$n || ret=1
grep "status: NOERROR" dig.out.4.test$n > /dev/null || ret=1
grep "EDNS: version:" dig.out.4.test$n > /dev/null || ret=1

View file

@ -423,7 +423,7 @@ here () {
}
# check dropped response
DROPPED='^;; connection timed out; no servers could be reached'
DROPPED='^;; no servers could be reached'
drop () {
make_dignm
digcmd $* >$DIGNM

View file

@ -1146,7 +1146,8 @@ n=$((n+1))
echo_i "check notincache.example times out (max-stale-ttl default) ($n)"
ret=0
$DIG -p ${PORT} +tries=1 +timeout=3 @10.53.0.3 notfound.example TXT > dig.out.test$n 2>&1
grep "connection timed out" dig.out.test$n > /dev/null || ret=1
grep "timed out" dig.out.test$n > /dev/null || ret=1
grep ";; no servers could be reached" dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status+ret))
@ -1781,7 +1782,8 @@ n=$((n+1))
echo_i "check not in cache longttl.example times out (stale-answer-client-timeout 1.8) ($n)"
ret=0
wait_for_log 4 "longttl.example client timeout, stale answer unavailable" ns3/named.run || ret=1
grep "connection timed out" dig.out.test$n > /dev/null || ret=1
grep "timed out" dig.out.test$n > /dev/null || ret=1
grep ";; no servers could be reached" dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=$((status+ret))