From 992997967bb5104429afb7e7701b8859ea0c47b6 Mon Sep 17 00:00:00 2001 From: Aram Sargsyan Date: Fri, 11 Mar 2022 19:37:27 +0000 Subject: [PATCH] After dig request errors, try to use other servers when they exist When timing-out or having other types of socket errors during a query, dig isn't trying to perform the lookup using other servers which exist in the lookup's queries list. After configured amount of timeout retries, or after a socket error, check if there are other queries/servers in the lookup's queries list, and start the next one if it exists, instead of unconditionally failing. (cherry picked from commit bc203d6082821626d854712d6faa10e6cd825884) --- bin/dig/dighost.c | 133 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 101 insertions(+), 32 deletions(-) diff --git a/bin/dig/dighost.c b/bin/dig/dighost.c index 360e8a3b97..88d355f100 100644 --- a/bin/dig/dighost.c +++ b/bin/dig/dighost.c @@ -3280,9 +3280,9 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { } if (l->retries > 1) { + l->retries--; debug("making new TCP request, %d tries left", l->retries); - l->retries--; requeue_lookup(l, true); next = NULL; } else if ((l->current_query != NULL) && @@ -3622,50 +3622,119 @@ recv_done(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, TIME_NOW(&query->time_recv); } - if (eresult == ISC_R_TIMEDOUT && !l->tcp_mode && l->retries > 1) { - dig_query_t *newq = NULL; - - l->retries--; - debug("making new UDP request, %d tries left", l->retries); - newq = new_query(l, query->servname, query->userarg); - - ISC_LIST_INSERTAFTER(l->q, query, newq, link); - if (l->current_query == query) { - query_detach(&l->current_query); - } - start_udp(newq); - goto detach_query; - } - if ((!l->pending && !l->ns_search_only) || atomic_load(&cancel_now)) { debug("no longer pending. Got %s", isc_result_totext(eresult)); goto next_lookup; } - if (eresult != ISC_R_SUCCESS) { + if (eresult == ISC_R_TIMEDOUT) { + if (l->retries > 1 && !l->tcp_mode) { + dig_query_t *newq = NULL; + + /* + * For UDP, insert a copy of the current query just + * after itself in the list, and start it to retry the + * request. + */ + newq = new_query(l, query->servname, query->userarg); + ISC_LIST_INSERTAFTER(l->q, query, newq, link); + if (l->current_query == query) { + query_detach(&l->current_query); + } + if (l->current_query == NULL) { + l->retries--; + debug("making new UDP request, %d tries left", + l->retries); + start_udp(newq); + } + + goto detach_query; + } else if (l->retries > 1 && l->tcp_mode) { + /* + * For TCP, we have to requeue the whole lookup, see + * the comments above the start_tcp() function. + */ + l->retries--; + debug("making new TCP request, %d tries left", + l->retries); + requeue_lookup(l, true); + + if (keep != NULL) { + isc_nmhandle_detach(&keep); + } + + goto cancel_lookup; + } else { + dig_query_t *next = ISC_LIST_NEXT(query, link); + + /* + * No retries left, go to the next query, if there is + * one. + */ + if (next != NULL) { + if (l->current_query == query) { + query_detach(&l->current_query); + } + if (l->current_query == NULL) { + debug("starting next query %p", next); + if (l->tcp_mode) { + start_tcp(next); + } else { + start_udp(next); + } + } + goto detach_query; + } + + /* + * Otherwise, print the cmdline and an error message, + * and cancel the lookup. + */ + printf("%s", l->cmdline); + dighost_error("connection timed out; " + "no servers could be reached\n"); + if (exitcode < 9) { + exitcode = 9; + } + + if (keep != NULL) { + isc_nmhandle_detach(&keep); + } + + goto cancel_lookup; + } + } else if (eresult != ISC_R_SUCCESS) { + dig_query_t *next = ISC_LIST_NEXT(query, link); char sockstr[ISC_SOCKADDR_FORMATSIZE]; isc_sockaddr_format(&query->sockaddr, sockstr, sizeof(sockstr)); - if (eresult == ISC_R_TIMEDOUT) { - if (l->retries > 1) { - debug("making new TCP request, %d tries left", - l->retries); - l->retries--; - requeue_lookup(l, true); - } else { - printf("%s", l->cmdline); - dighost_error("connection timed out; " - "no servers could be reached\n"); - if (exitcode < 9) { - exitcode = 9; + /* + * There was a communication error with the current query, + * go to the next query, if there is one. + */ + if (next != NULL) { + if (l->current_query == query) { + query_detach(&l->current_query); + } + if (l->current_query == NULL) { + debug("starting next query %p", next); + if (l->tcp_mode) { + start_tcp(next); + } else { + start_udp(next); } } - } else { - dighost_error("communications error to %s: %s\n", - sockstr, isc_result_totext(eresult)); + goto detach_query; } + /* + * Otherwise, print an error message and cancel the + * lookup. + */ + dighost_error("communications error to %s: %s\n", sockstr, + isc_result_totext(eresult)); + if (keep != NULL) { isc_nmhandle_detach(&keep); }