diff --git a/CHANGES b/CHANGES index 77094587cb..f696edc7e8 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,7 @@ +5832. [bug] When timing-out or having other types of socket errors + during a query, dig wasn't trying to perform the lookup + using other servers, in case they exist. [GL #3128] + 5831. [bug] When resending a UDP request in the result of a timeout, the recv_done() function in dighost.c was prepending the new query into the loookup's queries list instead diff --git a/bin/dig/dighost.c b/bin/dig/dighost.c index 360e8a3b97..88d355f100 100644 --- a/bin/dig/dighost.c +++ b/bin/dig/dighost.c @@ -3280,9 +3280,9 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { } if (l->retries > 1) { + l->retries--; debug("making new TCP request, %d tries left", l->retries); - l->retries--; requeue_lookup(l, true); next = NULL; } else if ((l->current_query != NULL) && @@ -3622,50 +3622,119 @@ recv_done(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, TIME_NOW(&query->time_recv); } - if (eresult == ISC_R_TIMEDOUT && !l->tcp_mode && l->retries > 1) { - dig_query_t *newq = NULL; - - l->retries--; - debug("making new UDP request, %d tries left", l->retries); - newq = new_query(l, query->servname, query->userarg); - - ISC_LIST_INSERTAFTER(l->q, query, newq, link); - if (l->current_query == query) { - query_detach(&l->current_query); - } - start_udp(newq); - goto detach_query; - } - if ((!l->pending && !l->ns_search_only) || atomic_load(&cancel_now)) { debug("no longer pending. Got %s", isc_result_totext(eresult)); goto next_lookup; } - if (eresult != ISC_R_SUCCESS) { + if (eresult == ISC_R_TIMEDOUT) { + if (l->retries > 1 && !l->tcp_mode) { + dig_query_t *newq = NULL; + + /* + * For UDP, insert a copy of the current query just + * after itself in the list, and start it to retry the + * request. + */ + newq = new_query(l, query->servname, query->userarg); + ISC_LIST_INSERTAFTER(l->q, query, newq, link); + if (l->current_query == query) { + query_detach(&l->current_query); + } + if (l->current_query == NULL) { + l->retries--; + debug("making new UDP request, %d tries left", + l->retries); + start_udp(newq); + } + + goto detach_query; + } else if (l->retries > 1 && l->tcp_mode) { + /* + * For TCP, we have to requeue the whole lookup, see + * the comments above the start_tcp() function. + */ + l->retries--; + debug("making new TCP request, %d tries left", + l->retries); + requeue_lookup(l, true); + + if (keep != NULL) { + isc_nmhandle_detach(&keep); + } + + goto cancel_lookup; + } else { + dig_query_t *next = ISC_LIST_NEXT(query, link); + + /* + * No retries left, go to the next query, if there is + * one. + */ + if (next != NULL) { + if (l->current_query == query) { + query_detach(&l->current_query); + } + if (l->current_query == NULL) { + debug("starting next query %p", next); + if (l->tcp_mode) { + start_tcp(next); + } else { + start_udp(next); + } + } + goto detach_query; + } + + /* + * Otherwise, print the cmdline and an error message, + * and cancel the lookup. + */ + printf("%s", l->cmdline); + dighost_error("connection timed out; " + "no servers could be reached\n"); + if (exitcode < 9) { + exitcode = 9; + } + + if (keep != NULL) { + isc_nmhandle_detach(&keep); + } + + goto cancel_lookup; + } + } else if (eresult != ISC_R_SUCCESS) { + dig_query_t *next = ISC_LIST_NEXT(query, link); char sockstr[ISC_SOCKADDR_FORMATSIZE]; isc_sockaddr_format(&query->sockaddr, sockstr, sizeof(sockstr)); - if (eresult == ISC_R_TIMEDOUT) { - if (l->retries > 1) { - debug("making new TCP request, %d tries left", - l->retries); - l->retries--; - requeue_lookup(l, true); - } else { - printf("%s", l->cmdline); - dighost_error("connection timed out; " - "no servers could be reached\n"); - if (exitcode < 9) { - exitcode = 9; + /* + * There was a communication error with the current query, + * go to the next query, if there is one. + */ + if (next != NULL) { + if (l->current_query == query) { + query_detach(&l->current_query); + } + if (l->current_query == NULL) { + debug("starting next query %p", next); + if (l->tcp_mode) { + start_tcp(next); + } else { + start_udp(next); } } - } else { - dighost_error("communications error to %s: %s\n", - sockstr, isc_result_totext(eresult)); + goto detach_query; } + /* + * Otherwise, print an error message and cancel the + * lookup. + */ + dighost_error("communications error to %s: %s\n", sockstr, + isc_result_totext(eresult)); + if (keep != NULL) { isc_nmhandle_detach(&keep); } diff --git a/bin/tests/system/digdelv/tests.sh b/bin/tests/system/digdelv/tests.sh index f7e852a1c9..7e34527288 100644 --- a/bin/tests/system/digdelv/tests.sh +++ b/bin/tests/system/digdelv/tests.sh @@ -1014,6 +1014,24 @@ if [ -x "$DIG" ] ; then grep "status: SERVFAIL" dig.out.test$n > /dev/null || ret=1 if [ $ret -ne 0 ]; then echo_i "failed"; fi status=$((status+ret)) + + n=$((n+1)) + echo_i "check that dig tries the next server after a connection error ($n)" + ret=0 + dig_with_opts -d @10.53.0.99 @10.53.0.3 a.example > dig.out.test$n 2>&1 || ret=1 + grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 + if [ $ret -ne 0 ]; then echo_i "failed"; fi + status=$((status+ret)) + + n=$((n+1)) + echo_i "check that dig tries the next server after timeouts ($n)" + # Ask ans4 to not respond to queries + echo "//" | sendcmd 10.53.0.4 + ret=0 + dig_with_opts -d @10.53.0.4 @10.53.0.3 a.example > dig.out.test$n 2>&1 || ret=1 + grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 + if [ $ret -ne 0 ]; then echo_i "failed"; fi + status=$((status+ret)) else echo_i "$DIG is needed, so skipping these dig tests" fi