From 24847cfd3e30f9c88c5a6e7c54ab8f067f3315de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Mon, 19 Dec 2022 13:01:38 +0100 Subject: [PATCH 1/8] Backport ISC_REFCOUNT{,_TRACE}_{DECL,IMPL} macros Backport macros that can be used to implement generic attach, detach, ref, and unref functions, so they don't have to be repeated over and over in each unit that uses reference counting. --- lib/isc/include/isc/refcount.h | 92 ++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/lib/isc/include/isc/refcount.h b/lib/isc/include/isc/refcount.h index 37f05b9cdd..6b1f7cdece 100644 --- a/lib/isc/include/isc/refcount.h +++ b/lib/isc/include/isc/refcount.h @@ -149,4 +149,96 @@ isc_refcount_decrement(isc_refcount_t *target) { ISC_INSIST(_refs > 0); \ } while (0) +#define ISC_REFCOUNT_TRACE_DECL(name) \ + name##_t *name##__ref(name##_t *ptr, const char *func, \ + const char *file, unsigned int line); \ + void name##__unref(name##_t *ptr, const char *func, const char *file, \ + unsigned int line); \ + void name##__attach(name##_t *ptr, name##_t **ptrp, const char *func, \ + const char *file, unsigned int line); \ + void name##__detach(name##_t **ptrp, const char *func, \ + const char *file, unsigned int line) + +#define ISC_REFCOUNT_TRACE_IMPL(name, destroy) \ + name##_t *name##__ref(name##_t *ptr, const char *func, \ + const char *file, unsigned int line) { \ + REQUIRE(ptr != NULL); \ + uint_fast32_t refs = \ + isc_refcount_increment(&ptr->references) + 1; \ + fprintf(stderr, \ + "%s:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", \ + __func__, func, file, line, ptr, refs); \ + return (ptr); \ + } \ + \ + void name##__unref(name##_t *ptr, const char *func, const char *file, \ + unsigned int line) { \ + REQUIRE(ptr != NULL); \ + uint_fast32_t refs = \ + isc_refcount_decrement(&ptr->references) - 1; \ + if (refs == 0) { \ + destroy(ptr); \ + } \ + fprintf(stderr, \ + "%s:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", \ + __func__, func, file, line, ptr, refs); \ + } \ + void name##__attach(name##_t *ptr, name##_t **ptrp, const char *func, \ + const char *file, unsigned int line) { \ + REQUIRE(ptrp != NULL && *ptrp == NULL); \ + uint_fast32_t refs = \ + isc_refcount_increment(&ptr->references) + 1; \ + fprintf(stderr, \ + "%s:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", \ + __func__, func, file, line, ptr, refs); \ + *ptrp = ptr; \ + } \ + \ + void name##__detach(name##_t **ptrp, const char *func, \ + const char *file, unsigned int line) { \ + REQUIRE(ptrp != NULL && *ptrp != NULL); \ + name##_t *ptr = *ptrp; \ + *ptrp = NULL; \ + uint_fast32_t refs = \ + isc_refcount_decrement(&ptr->references) - 1; \ + if (refs == 0) { \ + destroy(ptr); \ + } \ + fprintf(stderr, \ + "%s:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", \ + __func__, func, file, line, ptr, refs); \ + } + +#define ISC_REFCOUNT_DECL(name) \ + name##_t *name##_ref(name##_t *ptr); \ + void name##_unref(name##_t *ptr); \ + void name##_attach(name##_t *ptr, name##_t **ptrp); \ + void name##_detach(name##_t **ptrp) + +#define ISC_REFCOUNT_IMPL(name, destroy) \ + name##_t *name##_ref(name##_t *ptr) { \ + REQUIRE(ptr != NULL); \ + isc_refcount_increment(&ptr->references); \ + return (ptr); \ + } \ + \ + void name##_unref(name##_t *ptr) { \ + REQUIRE(ptr != NULL); \ + if (isc_refcount_decrement(&ptr->references) == 1) { \ + destroy(ptr); \ + } \ + } \ + void name##_attach(name##_t *ptr, name##_t **ptrp) { \ + REQUIRE(ptrp != NULL && *ptrp == NULL); \ + name##_ref(ptr); \ + *ptrp = ptr; \ + } \ + \ + void name##_detach(name##_t **ptrp) { \ + REQUIRE(ptrp != NULL && *ptrp != NULL); \ + name##_t *ptr = *ptrp; \ + *ptrp = NULL; \ + name##_unref(ptr); \ + } + ISC_LANG_ENDDECLS From 5cc12ab92cc1e84f629c13887c50e0a1ed39b6c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Wed, 30 Nov 2022 17:58:35 +0100 Subject: [PATCH 2/8] Fix the thread safety in the dns_dispatch unit The dispatches are not thread-bound, and used freely between various threads (see the dns_resolver and dns_request units for details). This refactoring make sure that all non-const dns_dispatch_t and dns_dispentry_t members are accessed under a lock, and both object now track their internal state (NONE, CONNECTING, CONNECTED, CANCELED) instead of guessing the state from the state of various struct members. During the refactoring, the artificial limit DNS_DISPATCH_SOCKSQUOTA on UDP sockets per dispatch was removed as the limiting needs to happen and happens on in dns_resolver and limiting the number of UDP sockets artificially in dispatch could lead to unpredictable behaviour in case one dispatch has the limit exhausted by others are idle. The TCP artificial limit of DNS_DISPATCH_MAXREQUESTS makes even less sense as the TCP connections are only reused in the dns_request API that's not a heavy user of the outgoing connections. As a side note, the fact that UDP and TCP dispatch pretends to be same thing, but in fact the connected UDP is handled from dns_dispentry_t and dns_dispatch_t acts as a broker, but connected TCP is handled from dns_dispatch_t and dns_dispatchmgr_t acts as a broker doesn't really help the clarity of this unit. This refactoring kept to API almost same - only dns_dispatch_cancel() and dns_dispatch_done() were merged into dns_dispatch_done() as we need to cancel active netmgr handles in any case to not leave dangling connections around. The functions handling UDP and TCP have been mostly split to their matching counterparts and the dns_dispatch_ functions are now thing wrappers that call _dispatch_ based on the socket type. More debugging-level logging was added to the unit to accomodate for this fact. (cherry picked from commit 6f317f27ea57a96cfb67d7b02fcb8aa21be37371) --- lib/dns/dispatch.c | 1619 ++++++++++++++++++-------------- lib/dns/include/dns/dispatch.h | 84 +- lib/dns/request.c | 46 +- lib/dns/resolver.c | 2 +- 4 files changed, 990 insertions(+), 761 deletions(-) diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index 115cc78476..faa52fdc2b 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -70,11 +70,19 @@ struct dns_dispatchmgr { unsigned int nv6ports; /*%< # of available ports for IPv4 */ }; +typedef enum { + DNS_DISPATCHSTATE_NONE = 0UL, + DNS_DISPATCHSTATE_CONNECTING, + DNS_DISPATCHSTATE_CONNECTED, + DNS_DISPATCHSTATE_CANCELED, +} dns_dispatchstate_t; + struct dns_dispentry { unsigned int magic; isc_refcount_t references; dns_dispatch_t *disp; isc_nmhandle_t *handle; /*%< netmgr handle for UDP connection */ + dns_dispatchstate_t state; unsigned int bucket; unsigned int retries; unsigned int timeout; @@ -87,26 +95,14 @@ struct dns_dispentry { dispatch_cb_t sent; dispatch_cb_t response; void *arg; - bool canceled; + bool reading; + isc_result_t result; ISC_LINK(dns_dispentry_t) link; ISC_LINK(dns_dispentry_t) alink; ISC_LINK(dns_dispentry_t) plink; ISC_LINK(dns_dispentry_t) rlink; }; -/*% - * Fixed UDP buffer size. - */ -#ifndef DNS_DISPATCH_UDPBUFSIZE -#define DNS_DISPATCH_UDPBUFSIZE 4096 -#endif /* ifndef DNS_DISPATCH_UDPBUFSIZE */ - -typedef enum { - DNS_DISPATCHSTATE_NONE = 0UL, - DNS_DISPATCHSTATE_CONNECTING, - DNS_DISPATCHSTATE_CONNECTED -} dns_dispatchstate_t; - struct dns_dispatch { /* Unlocked. */ unsigned int magic; /*%< magic */ @@ -122,17 +118,15 @@ struct dns_dispatch { /* Locked by "lock". */ isc_mutex_t lock; /*%< locks all below */ isc_socktype_t socktype; - atomic_uint_fast32_t tcpstate; - atomic_bool tcpreading; + dns_dispatchstate_t state; isc_refcount_t references; - unsigned int shutdown_out : 1; + + bool reading; dns_displist_t pending; dns_displist_t active; - unsigned int nsockets; - unsigned int requests; /*%< how many requests we have */ - unsigned int tcpbuffers; /*%< allocated buffers */ + unsigned int requests; /*%< how many requests we have */ unsigned int timedout; }; @@ -152,26 +146,6 @@ struct dns_dispatch { #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r') #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC) -/*% - * Quota to control the number of UDP dispatch sockets. If a dispatch has - * more than the quota of sockets, new queries will purge oldest ones, so - * that a massive number of outstanding queries won't prevent subsequent - * queries (especially if the older ones take longer time and result in - * timeout). - */ -#ifndef DNS_DISPATCH_SOCKSQUOTA -#define DNS_DISPATCH_SOCKSQUOTA 3072 -#endif /* ifndef DNS_DISPATCH_SOCKSQUOTA */ - -/*% - * Quota to control the number of concurrent requests that can be handled - * by each TCP dispatch. (UDP dispatches do not currently support socket - * sharing.) - */ -#ifndef DNS_DISPATCH_MAXREQUESTS -#define DNS_DISPATCH_MAXREQUESTS 32768 -#endif /* ifndef DNS_DISPATCH_MAXREQUESTS */ - /*% * Number of buckets in the QID hash table, and the value to * increment the QID by when attempting to avoid collisions. @@ -185,6 +159,20 @@ struct dns_dispatch { #define DNS_QID_INCREMENT 16433 #endif /* ifndef DNS_QID_INCREMENT */ +#if DNS_DISPATCH_TRACE +#define dns_dispentry_ref(ptr) \ + dns_dispentry__ref(ptr, __func__, __FILE__, __LINE__) +#define dns_dispentry_unref(ptr) \ + dns_dispentry__unref(ptr, __func__, __FILE__, __LINE__) +#define dns_dispentry_attach(ptr, ptrp) \ + dns_dispentry__attach(ptr, ptrp, __func__, __FILE__, __LINE__) +#define dns_dispentry_detach(ptrp) \ + dns_dispentry__detach(ptrp, __func__, __FILE__, __LINE__) +ISC_REFCOUNT_TRACE_DECL(dns_dispentry); +#else +ISC_REFCOUNT_DECL(dns_dispentry); +#endif + /* * Statics. */ @@ -200,10 +188,13 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, static void tcp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, void *arg); +static void +tcp_recv_done(dns_dispentry_t *resp, isc_result_t eresult, + isc_region_t *region); static uint32_t dns_hash(dns_qid_t *, const isc_sockaddr_t *, dns_messageid_t, in_port_t); static void -dispatch_free(dns_dispatch_t **dispp); +dispentry_cancel(dns_dispentry_t *resp, isc_result_t result); static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, dns_dispatch_t **dispp); @@ -212,12 +203,48 @@ qid_allocate(dns_dispatchmgr_t *mgr, dns_qid_t **qidp); static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp); static void -startrecv(isc_nmhandle_t *handle, dns_dispatch_t *disp, dns_dispentry_t *resp); -void -dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp, int32_t timeout); +udp_startrecv(isc_nmhandle_t *handle, dns_dispentry_t *resp); +static void +tcp_startrecv(isc_nmhandle_t *handle, dns_dispatch_t *disp, + dns_dispentry_t *resp); +static void +tcp_dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp, + int32_t timeout); +static void +udp_dispatch_getnext(dns_dispentry_t *resp, int32_t timeout); #define LVL(x) ISC_LOG_DEBUG(x) +static const char * +socktype2str(dns_dispentry_t *resp) { + dns_dispatch_t *disp = resp->disp; + + switch (disp->socktype) { + case isc_socktype_udp: + return ("UDP"); + case isc_socktype_tcp: + return ("TCP"); + default: + return (""); + } +} + +static const char * +state2str(dns_dispatchstate_t state) { + switch (state) { + case DNS_DISPATCHSTATE_NONE: + return ("none"); + case DNS_DISPATCHSTATE_CONNECTING: + return ("connecting"); + case DNS_DISPATCHSTATE_CONNECTED: + return ("connected"); + case DNS_DISPATCHSTATE_CANCELED: + return ("canceled"); + default: + return (""); + } +} + static void mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) ISC_FORMAT_PRINTF(3, 4); @@ -262,13 +289,20 @@ static void dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) { char msgbuf[2048]; va_list ap; + int r; if (!isc_log_wouldlog(dns_lctx, level)) { return; } va_start(ap, fmt); - vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); + r = vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); + if (r < 0) { + msgbuf[0] = '\0'; + } else if ((unsigned int)r >= sizeof(msgbuf)) { + /* Truncated */ + msgbuf[sizeof(msgbuf) - 1] = '\0'; + } va_end(ap); isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, @@ -276,6 +310,34 @@ dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) { msgbuf); } +static void +dispentry_log(dns_dispentry_t *resp, int level, const char *fmt, ...) + ISC_FORMAT_PRINTF(3, 4); + +static void +dispentry_log(dns_dispentry_t *resp, int level, const char *fmt, ...) { + char msgbuf[2048]; + va_list ap; + int r; + + if (!isc_log_wouldlog(dns_lctx, level)) { + return; + } + + va_start(ap, fmt); + r = vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); + if (r < 0) { + msgbuf[0] = '\0'; + } else if ((unsigned int)r >= sizeof(msgbuf)) { + /* Truncated */ + msgbuf[sizeof(msgbuf) - 1] = '\0'; + } + va_end(ap); + + dispatch_log(resp->disp, level, "%s response %p: %s", + socktype2str(resp), resp, msgbuf); +} + /* * Return a hash of the destination and message id. */ @@ -320,8 +382,6 @@ setup_socket(dns_dispatch_t *disp, dns_dispentry_t *resp, return (ISC_R_ADDRNOTAVAIL); } - disp->nsockets++; - resp->local = disp->local; resp->peer = *dest; @@ -334,26 +394,6 @@ setup_socket(dns_dispatch_t *disp, dns_dispentry_t *resp, return (ISC_R_SUCCESS); } -/*% - * Deactivate the socket for a dispatch entry. - * The dispatch must be locked. - */ -static void -deactivate_dispentry(dns_dispatch_t *disp, dns_dispentry_t *resp) { - if (ISC_LINK_LINKED(resp, alink)) { - ISC_LIST_UNLINK(disp->active, resp, alink); - } - - if (resp->handle != NULL) { - INSIST(disp->socktype == isc_socktype_udp); - - isc_nm_cancelread(resp->handle); - isc_nmhandle_detach(&resp->handle); - } - - disp->nsockets--; -} - /* * Find an entry for query ID 'id', socket address 'dest', and port number * 'port'. @@ -381,55 +421,48 @@ entry_search(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id, return (NULL); } -static void -dispentry_attach(dns_dispentry_t *resp, dns_dispentry_t **respp) { - REQUIRE(VALID_RESPONSE(resp)); - REQUIRE(respp != NULL && *respp == NULL); - - isc_refcount_increment(&resp->references); - - *respp = resp; -} - static void dispentry_destroy(dns_dispentry_t *resp) { dns_dispatch_t *disp = resp->disp; - resp->magic = 0; + /* + * We need to call this from here in case there's an external event that + * shuts down our dispatch (like ISC_R_SHUTTINGDOWN). + */ + dispentry_cancel(resp, ISC_R_CANCELED); - if (ISC_LINK_LINKED(resp, plink)) { - ISC_LIST_UNLINK(disp->pending, resp, plink); - } - - INSIST(!ISC_LINK_LINKED(resp, alink)); - INSIST(!ISC_LINK_LINKED(resp, rlink)); - - if (resp->handle != NULL) { - isc_nmhandle_detach(&resp->handle); - } + LOCK(&disp->lock); + INSIST(disp->requests > 0); + disp->requests--; + UNLOCK(&disp->lock); isc_refcount_destroy(&resp->references); + resp->magic = 0; + + INSIST(!ISC_LINK_LINKED(resp, link)); + INSIST(!ISC_LINK_LINKED(resp, plink)); + INSIST(!ISC_LINK_LINKED(resp, alink)); + INSIST(!ISC_LINK_LINKED(resp, rlink)); + + dispentry_log(resp, LVL(90), "destroying"); + + if (resp->handle != NULL) { + dispentry_log(resp, LVL(90), "detaching handle %p from %p", + resp->handle, &resp->handle); + isc_nmhandle_detach(&resp->handle); + } + isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp)); - dns_dispatch_detach(&disp); + dns_dispatch_detach(&disp); /* DISPATCH001 */ } -static void -dispentry_detach(dns_dispentry_t **respp) { - dns_dispentry_t *resp = NULL; - uint_fast32_t ref; - - REQUIRE(respp != NULL && VALID_RESPONSE(*respp)); - - resp = *respp; - *respp = NULL; - - ref = isc_refcount_decrement(&resp->references); - if (ref == 1) { - dispentry_destroy(resp); - } -} +#if DNS_DISPATCH_TRACE +ISC_REFCOUNT_TRACE_IMPL(dns_dispentry, dispentry_destroy); +#else +ISC_REFCOUNT_IMPL(dns_dispentry, dispentry_destroy); +#endif /* * How long in milliseconds has it been since this dispentry @@ -473,7 +506,7 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, unsigned int flags; isc_sockaddr_t peer; isc_netaddr_t netaddr; - int match, timeout; + int match, timeout = 0; dispatch_cb_t response = NULL; REQUIRE(VALID_RESPONSE(resp)); @@ -482,20 +515,24 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, disp = resp->disp; LOCK(&disp->lock); + INSIST(resp->reading); + resp->reading = false; - dispatch_log(disp, LVL(90), "UDP response %p:%s:requests %d", resp, - isc_result_totext(eresult), disp->requests); + response = resp->response; - /* - * The resp may have been deactivated by shutdown; if - * so, we can skip the response callback. - */ - if (ISC_LINK_LINKED(resp, alink)) { - response = resp->response; - } else if (eresult == ISC_R_SUCCESS) { + if (resp->state == DNS_DISPATCHSTATE_CANCELED) { + /* + * Nobody is interested in the callback if the response + * has been canceled already. Detach from the response + * and the handle. + */ + response = NULL; eresult = ISC_R_CANCELED; } + dispentry_log(resp, LVL(90), "read callback:%s, requests %d", + isc_result_totext(eresult), disp->requests); + if (eresult != ISC_R_SUCCESS) { /* * This is most likely a network error on a connected @@ -521,8 +558,8 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, char netaddrstr[ISC_NETADDR_FORMATSIZE]; isc_netaddr_format(&netaddr, netaddrstr, sizeof(netaddrstr)); - dispatch_log(disp, LVL(10), "blackholed packet from %s", - netaddrstr); + dispentry_log(resp, LVL(10), + "blackholed packet from %s", netaddrstr); } goto next; } @@ -535,13 +572,16 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, isc_buffer_add(&source, region->length); dres = dns_message_peekheader(&source, &id, &flags); if (dres != ISC_R_SUCCESS) { - dispatch_log(disp, LVL(10), "got garbage packet"); + char netaddrstr[ISC_NETADDR_FORMATSIZE]; + isc_netaddr_format(&netaddr, netaddrstr, sizeof(netaddrstr)); + dispentry_log(resp, LVL(10), "got garbage packet from %s", + netaddrstr); goto next; } - dispatch_log(disp, LVL(92), - "got valid DNS message header, /QR %c, id %u", - (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id); + dispentry_log(resp, LVL(92), + "got valid DNS message header, /QR %c, id %u", + (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id); /* * Look at the message flags. If it's a query, ignore it. @@ -554,7 +594,7 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, * The QID and the address must match the expected ones. */ if (resp->id != id || !isc_sockaddr_equal(&peer, &resp->peer)) { - dispatch_log(disp, LVL(90), "response doesn't match"); + dispentry_log(resp, LVL(90), "response doesn't match"); inc_stats(disp->mgr, dns_resstatscounter_mismatch); goto next; } @@ -585,26 +625,25 @@ next: * proper response to arrive until the original timeout fires. */ response = NULL; - dispatch_getnext(disp, resp, resp->timeout - dispentry_runtime(resp)); + udp_dispatch_getnext(resp, timeout); done: UNLOCK(&disp->lock); if (response != NULL) { + dispentry_log(resp, LVL(90), "UDP read callback on %p: %s", + handle, isc_result_totext(eresult)); response(eresult, region, resp->arg); } - dispentry_detach(&resp); + dns_dispentry_detach(&resp); /* DISPENTRY003 */ } static isc_result_t -tcp_recv_timeout(dns_dispatch_t *disp, dns_dispentry_t **respp) { - dns_dispentry_t *resp = ISC_LIST_HEAD(disp->active); +tcp_recv_oldest(dns_dispatch_t *disp, dns_dispentry_t **respp) { + dns_dispentry_t *resp = NULL; + resp = ISC_LIST_HEAD(disp->active); if (resp != NULL) { - dispentry_attach(resp, &(dns_dispentry_t *){ NULL }); - ISC_LIST_UNLINK(disp->active, resp, alink); - ISC_LIST_APPEND(disp->active, resp, alink); - disp->timedout++; *respp = resp; @@ -624,7 +663,7 @@ tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, dns_qid_t *qid, isc_result_t result = ISC_R_SUCCESS; dns_dispentry_t *resp = NULL; - dispatch_log(disp, LVL(90), "success, length == %d, addr = %p", + dispatch_log(disp, LVL(90), "TCP read success, length == %d, addr = %p", region->length, region->base); /* @@ -659,30 +698,15 @@ tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, dns_qid_t *qid, LOCK(&qid->lock); resp = entry_search(qid, peer, id, disp->localport, bucket); if (resp != NULL) { - dispentry_attach(resp, &(dns_dispentry_t *){ NULL }); - *respp = resp; - } else if (disp->timedout > 0) { - /* There was active query that timed-out before */ - disp->timedout--; - - resp = ISC_LIST_HEAD(disp->active); - if (resp != NULL) { - /* - * It's a DNS response, but didn't match any outstanding - * queries. It's possible we would have timed out by - * now, but non-matching responses prevented it, so we - * check the age of the oldest active resp. - */ - int timeout = resp->timeout - dispentry_runtime(resp); - if (timeout <= 0) { - result = tcp_recv_timeout(disp, respp); - } + if (resp->reading) { + *respp = resp; } else { - result = ISC_R_NOTFOUND; + /* We already got our DNS message. */ + result = ISC_R_UNEXPECTED; } } else { /* We are not expecting this DNS message */ - result = ISC_R_UNEXPECTED; + result = ISC_R_NOTFOUND; } dispatch_log(disp, LVL(90), "search for response in bucket %d: %s", bucket, isc_result_totext(result)); @@ -692,7 +716,19 @@ tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, dns_qid_t *qid, } static void -tcp_recv_shutdown(dns_dispatch_t *disp, dns_displist_t *resps) { +tcp_recv_add(dns_displist_t *resps, dns_dispentry_t *resp, + isc_result_t result) { + dns_dispentry_ref(resp); /* DISPENTRY009 */ + ISC_LIST_UNLINK(resp->disp->active, resp, alink); + ISC_LIST_APPEND(*resps, resp, rlink); + INSIST(resp->reading); + resp->reading = false; + resp->result = result; +} + +static void +tcp_recv_shutdown(dns_dispatch_t *disp, dns_displist_t *resps, + isc_result_t result) { dns_dispentry_t *resp = NULL, *next = NULL; /* @@ -700,29 +736,28 @@ tcp_recv_shutdown(dns_dispatch_t *disp, dns_displist_t *resps) { */ for (resp = ISC_LIST_HEAD(disp->active); resp != NULL; resp = next) { next = ISC_LIST_NEXT(resp, alink); - dispentry_attach(resp, &(dns_dispentry_t *){ NULL }); - ISC_LIST_UNLINK(disp->active, resp, alink); - ISC_LIST_APPEND(*resps, resp, rlink); + tcp_recv_add(resps, resp, result); } } static void tcp_recv_done(dns_dispentry_t *resp, isc_result_t eresult, isc_region_t *region) { + dispentry_log(resp, LVL(90), "read callback: %s", + isc_result_totext(eresult)); + resp->response(eresult, region, resp->arg); - dispentry_detach(&resp); + dns_dispentry_detach(&resp); /* DISPENTRY009 */ } static void -tcp_recv_cancelall(dns_displist_t *resps, isc_region_t *region, - isc_result_t result) { +tcp_recv_processall(dns_displist_t *resps, isc_region_t *region) { dns_dispentry_t *resp = NULL, *next = NULL; for (resp = ISC_LIST_HEAD(*resps); resp != NULL; resp = next) { next = ISC_LIST_NEXT(resp, rlink); ISC_LIST_UNLINK(*resps, resp, rlink); - resp->response(result, region, resp->arg); - dispentry_detach(&resp); + tcp_recv_done(resp, resp->result, region); } } @@ -732,8 +767,6 @@ tcp_recv_cancelall(dns_displist_t *resps, isc_region_t *region, * If I/O result == CANCELED, EOF, or error, notify everyone as the * various queues drain. * - * If query, restart. - * * If response: * Allocate event, fill in details. * If cannot allocate, restart. @@ -749,25 +782,82 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, dns_qid_t *qid = NULL; char buf[ISC_SOCKADDR_FORMATSIZE]; isc_sockaddr_t peer; - dns_displist_t resps; + dns_displist_t resps = ISC_LIST_INITIALIZER; REQUIRE(VALID_DISPATCH(disp)); - atomic_store(&disp->tcpreading, false); - qid = disp->mgr->qid; - ISC_LIST_INIT(resps); - LOCK(&disp->lock); + INSIST(disp->reading); + disp->reading = false; - dispatch_log(disp, LVL(90), "TCP read:%s:requests %d, buffers %d", - isc_result_totext(result), disp->requests, - disp->tcpbuffers); + dispatch_log(disp, LVL(90), "TCP read:%s:requests %u", + isc_result_totext(result), disp->requests); peer = isc_nmhandle_peeraddr(handle); + /* + * Phase 1: Process timeout and success. + */ switch (result) { + case ISC_R_TIMEDOUT: + /* + * Time out the oldest response in the active queue. + */ + result = tcp_recv_oldest(disp, &resp); + break; + case ISC_R_SUCCESS: + /* We got an answer */ + result = tcp_recv_success(disp, region, qid, &peer, &resp); + break; + + default: + break; + } + + if (resp != NULL) { + tcp_recv_add(&resps, resp, result); + } + + /* + * Phase 2: Look if we timed out before. + */ + + if (result == ISC_R_NOTFOUND) { + if (disp->timedout > 0) { + /* There was active query that timed-out before */ + disp->timedout--; + } else { + result = ISC_R_UNEXPECTED; + } + } + + /* + * Phase 3: Trigger timeouts. It's possible that the responses would + * have been timedout out already, but non-matching TCP reads have + * prevented this. + */ + dns_dispentry_t *next = NULL; + for (resp = ISC_LIST_HEAD(disp->active); resp != NULL; resp = next) { + next = ISC_LIST_NEXT(resp, alink); + + /* FIXME: dispentry_runtime is always 0 for TCP */ + int timeout = resp->timeout - dispentry_runtime(resp); + if (timeout <= 0) { + tcp_recv_add(&resps, resp, ISC_R_TIMEDOUT); + } + } + + /* + * Phase 4: log if we errored out. + */ + switch (result) { + case ISC_R_SUCCESS: + case ISC_R_TIMEDOUT: + case ISC_R_NOTFOUND: + break; + case ISC_R_SHUTTINGDOWN: case ISC_R_CANCELED: case ISC_R_EOF: @@ -775,68 +865,33 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, isc_sockaddr_format(&peer, buf, sizeof(buf)); dispatch_log(disp, LVL(90), "shutting down TCP: %s: %s", buf, isc_result_totext(result)); - tcp_recv_shutdown(disp, &resps); + tcp_recv_shutdown(disp, &resps, result); break; - - case ISC_R_TIMEDOUT: - /* - * Time out the oldest response in the active queue, - * and move it to the end. (We don't remove it from the - * active queue immediately, though, because the callback - * might decide to keep waiting and leave it active.) - */ - result = tcp_recv_timeout(disp, &resp); - break; - - case ISC_R_SUCCESS: - /* We got an answer */ - result = tcp_recv_success(disp, region, qid, &peer, &resp); - if (result != ISC_R_UNEXPECTED) { - /* - * It's a valid DNS response, which may or may not - * have matched an outstanding query. - */ - - dispatch_getnext(disp, NULL, -1); - break; - } - - /* Got an invalid DNS response, terminate the connection */ - FALLTHROUGH; default: isc_sockaddr_format(&peer, buf, sizeof(buf)); dispatch_log(disp, ISC_LOG_ERROR, "shutting down due to TCP " "receive error: %s: %s", buf, isc_result_totext(result)); - tcp_recv_shutdown(disp, &resps); + tcp_recv_shutdown(disp, &resps, result); break; } + /* + * Phase 5: Resume reading if there are still active responses + */ + if (!ISC_LIST_EMPTY(disp->active)) { + tcp_startrecv(NULL, disp, ISC_LIST_HEAD(disp->active)); + } + UNLOCK(&disp->lock); - switch (result) { - case ISC_R_SUCCESS: - case ISC_R_TIMEDOUT: - /* - * Either we found a matching response, or we timed out - * and canceled the oldest resp. - */ - INSIST(resp != NULL); - tcp_recv_done(resp, result, region); - break; - case ISC_R_NOTFOUND: - /* - * Either we got a response that didn't match any active - * resps, or we timed out but there *were* no active resps. - */ - break; - default: - /* We're being shut down; cancel all outstanding resps. */ - tcp_recv_cancelall(&resps, region, result); - } + /* + * Phase 6: Process all scheduled callbacks. + */ + tcp_recv_processall(&resps, region); - dns_dispatch_detach(&disp); + dns_dispatch_detach(&disp); /* DISPATCH002 */ } /*% @@ -919,6 +974,10 @@ dns_dispatchmgr_create(isc_mem_t *mctx, isc_nm_t *nm, mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t)); *mgr = (dns_dispatchmgr_t){ .magic = 0 }; +#if DNS_DISPATCH_TRACE + fprintf(stderr, "dns_dispatchmgr__init:%s:%s:%d:%p->references = 1\n", + __func__, __FILE__, __LINE__, mgr); +#endif isc_refcount_init(&mgr->references, 1); isc_mem_attach(mctx, &mgr->mctx); @@ -943,31 +1002,11 @@ dns_dispatchmgr_create(isc_mem_t *mctx, isc_nm_t *nm, return (ISC_R_SUCCESS); } -void -dns_dispatchmgr_attach(dns_dispatchmgr_t *mgr, dns_dispatchmgr_t **mgrp) { - REQUIRE(VALID_DISPATCHMGR(mgr)); - REQUIRE(mgrp != NULL && *mgrp == NULL); - - isc_refcount_increment(&mgr->references); - - *mgrp = mgr; -} - -void -dns_dispatchmgr_detach(dns_dispatchmgr_t **mgrp) { - dns_dispatchmgr_t *mgr = NULL; - uint_fast32_t ref; - - REQUIRE(mgrp != NULL && VALID_DISPATCHMGR(*mgrp)); - - mgr = *mgrp; - *mgrp = NULL; - - ref = isc_refcount_decrement(&mgr->references); - if (ref == 1) { - dispatchmgr_destroy(mgr); - } -} +#if DNS_DISPATCH_TRACE +ISC_REFCOUNT_TRACE_IMPL(dns_dispatchmgr, dispatchmgr_destroy); +#else +ISC_REFCOUNT_IMPL(dns_dispatchmgr, dispatchmgr_destroy); +#endif void dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) { @@ -1089,46 +1128,25 @@ dispatch_allocate(dns_dispatchmgr_t *mgr, isc_socktype_t type, */ disp = isc_mem_get(mgr->mctx, sizeof(*disp)); - *disp = (dns_dispatch_t){ .socktype = type }; + *disp = (dns_dispatch_t){ + .socktype = type, + .link = ISC_LINK_INITIALIZER, + .active = ISC_LIST_INITIALIZER, + .pending = ISC_LIST_INITIALIZER, + .magic = DISPATCH_MAGIC, + }; dns_dispatchmgr_attach(mgr, &disp->mgr); - isc_refcount_init(&disp->references, 1); - ISC_LINK_INIT(disp, link); - ISC_LIST_INIT(disp->active); - ISC_LIST_INIT(disp->pending); - +#if DNS_DISPATCH_TRACE + fprintf(stderr, "dns_dispatch__init:%s:%s:%d:%p->references = 1\n", + __func__, __FILE__, __LINE__, disp); +#endif + isc_refcount_init(&disp->references, 1); /* DISPATCH000 */ isc_mutex_init(&disp->lock); - disp->magic = DISPATCH_MAGIC; - *dispp = disp; } -/* - * MUST be unlocked, and not used by anything. - */ -static void -dispatch_free(dns_dispatch_t **dispp) { - dns_dispatch_t *disp = NULL; - dns_dispatchmgr_t *mgr = NULL; - - REQUIRE(VALID_DISPATCH(*dispp)); - disp = *dispp; - *dispp = NULL; - - disp->magic = 0; - - mgr = disp->mgr; - REQUIRE(VALID_DISPATCHMGR(mgr)); - - INSIST(disp->requests == 0); - INSIST(ISC_LIST_EMPTY(disp->active)); - - isc_mutex_destroy(&disp->lock); - - isc_mem_put(mgr->mctx, disp, sizeof(*disp)); -} - isc_result_t dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, const isc_sockaddr_t *destaddr, isc_dscp_t dscp, @@ -1163,8 +1181,17 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, ISC_LIST_APPEND(mgr->list, disp, link); UNLOCK(&mgr->lock); - mgr_log(mgr, LVL(90), "dns_dispatch_createtcp: created TCP dispatch %p", - disp); + if (isc_log_wouldlog(dns_lctx, 90)) { + char addrbuf[ISC_SOCKADDR_FORMATSIZE]; + + isc_sockaddr_format(localaddr, addrbuf, + ISC_SOCKADDR_FORMATSIZE); + + mgr_log(mgr, LVL(90), + "dns_dispatch_createtcp: created TCP dispatch %p for " + "%s", + disp, addrbuf); + } *dispp = disp; return (ISC_R_SUCCESS); @@ -1172,15 +1199,13 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, isc_result_t dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, - const isc_sockaddr_t *localaddr, bool *connected, - dns_dispatch_t **dispp) { + const isc_sockaddr_t *localaddr, dns_dispatch_t **dispp) { dns_dispatch_t *disp_connected = NULL; dns_dispatch_t *disp_fallback = NULL; isc_result_t result = ISC_R_NOTFOUND; REQUIRE(VALID_DISPATCHMGR(mgr)); REQUIRE(destaddr != NULL); - REQUIRE(connected != NULL); REQUIRE(dispp != NULL && *dispp == NULL); LOCK(&mgr->lock); @@ -1212,40 +1237,52 @@ dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, (localaddr == NULL || isc_sockaddr_eqaddr(localaddr, &sockname))) { - if (atomic_load(&disp->tcpstate) == - DNS_DISPATCHSTATE_CONNECTED) - { - /* We found connected dispatch */ - disp_connected = disp; - UNLOCK(&disp->lock); + switch (disp->state) { + case DNS_DISPATCHSTATE_CONNECTED: + /* We found a connected dispatch */ + dns_dispatch_attach(disp, &disp_connected); break; + case DNS_DISPATCHSTATE_NONE: + case DNS_DISPATCHSTATE_CONNECTING: + /* We found "a" dispatch, store it for later */ + if (disp_fallback == NULL) { + dns_dispatch_attach(disp, + &disp_fallback); + } + break; + case DNS_DISPATCHSTATE_CANCELED: + /* + * We found a canceled dispatch, help its + * removal from the list, and skip it. + */ + ISC_LIST_UNLINK(disp->mgr->list, disp, link); + break; + default: + UNREACHABLE(); } - - /* We found "a" dispatch, store it for later */ - if (disp_fallback == NULL) { - disp_fallback = disp; - } - - UNLOCK(&disp->lock); - continue; } UNLOCK(&disp->lock); + + if (disp_connected != NULL) { + break; + } } if (disp_connected != NULL) { /* We found connected dispatch */ INSIST(disp_connected->handle != NULL); - *connected = true; - dns_dispatch_attach(disp_connected, dispp); + *dispp = disp_connected; + disp_connected = NULL; result = ISC_R_SUCCESS; - } else if (disp_fallback != NULL) { - /* We found matching dispatch */ - *connected = false; - dns_dispatch_attach(disp_fallback, dispp); + if (disp_fallback != NULL) { + dns_dispatch_detach(&disp_fallback); + } + } else if (disp_fallback != NULL) { + *dispp = disp_fallback; result = ISC_R_SUCCESS; } @@ -1282,8 +1319,6 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, dns_dispatch_t *disp = NULL; isc_sockaddr_t sa_any; - dispatch_allocate(mgr, isc_socktype_udp, &disp); - /* * Check whether this address/port is available locally. */ @@ -1291,58 +1326,66 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) { result = isc_nm_checkaddr(localaddr, isc_socktype_udp); if (result != ISC_R_SUCCESS) { - goto cleanup; + return (result); } } + dispatch_allocate(mgr, isc_socktype_udp, &disp); + if (isc_log_wouldlog(dns_lctx, 90)) { char addrbuf[ISC_SOCKADDR_FORMATSIZE]; isc_sockaddr_format(localaddr, addrbuf, ISC_SOCKADDR_FORMATSIZE); mgr_log(mgr, LVL(90), - "dispatch_createudp: created UDP dispatch for %s", - addrbuf); + "dispatch_createudp: created UDP dispatch %p for %s", + disp, addrbuf); } disp->local = *localaddr; /* - * Append it to the dispatcher list. + * Don't append it to the dispatcher list, we don't care about UDP, only + * TCP should be searched + * + * ISC_LIST_APPEND(mgr->list, disp, link); */ - ISC_LIST_APPEND(mgr->list, disp, link); - - mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp); *dispp = disp; return (result); - - /* - * Error returns. - */ -cleanup: - dispatch_free(&disp); - - return (result); } static void dispatch_destroy(dns_dispatch_t *disp) { dns_dispatchmgr_t *mgr = disp->mgr; + isc_refcount_destroy(&disp->references); + disp->magic = 0; + LOCK(&mgr->lock); - ISC_LIST_UNLINK(mgr->list, disp, link); + if (ISC_LINK_LINKED(disp, link)) { + ISC_LIST_UNLINK(disp->mgr->list, disp, link); + } UNLOCK(&mgr->lock); - dispatch_log(disp, LVL(90), "shutting down; detaching from handle %p", - disp->handle); + INSIST(disp->requests == 0); + INSIST(ISC_LIST_EMPTY(disp->pending)); + INSIST(ISC_LIST_EMPTY(disp->active)); - if (disp->handle != NULL) { + INSIST(!ISC_LINK_LINKED(disp, link)); + + dispatch_log(disp, LVL(90), "destroying dispatch %p", disp); + + if (disp->handle) { + dispatch_log(disp, LVL(90), "detaching TCP handle %p from %p", + disp->handle, &disp->handle); isc_nmhandle_detach(&disp->handle); } - dispatch_free(&disp); + isc_mutex_destroy(&disp->lock); + + isc_mem_put(mgr->mctx, disp, sizeof(*disp)); /* * Because dispatch uses mgr->mctx, we must detach after freeing @@ -1351,106 +1394,73 @@ dispatch_destroy(dns_dispatch_t *disp) { dns_dispatchmgr_detach(&mgr); } -void -dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) { - REQUIRE(VALID_DISPATCH(disp)); - REQUIRE(dispp != NULL && *dispp == NULL); - - isc_refcount_increment(&disp->references); - - *dispp = disp; -} - -void -dns_dispatch_detach(dns_dispatch_t **dispp) { - dns_dispatch_t *disp = NULL; - uint_fast32_t ref; - - REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp)); - - disp = *dispp; - *dispp = NULL; - - ref = isc_refcount_decrement(&disp->references); - dispatch_log(disp, LVL(90), "detach: refcount %" PRIuFAST32, ref - 1); - if (ref == 1) { - LOCK(&disp->lock); - INSIST(ISC_LIST_EMPTY(disp->pending)); - INSIST(ISC_LIST_EMPTY(disp->active)); - UNLOCK(&disp->lock); - - dispatch_destroy(disp); - } -} +#if DNS_DISPATCH_TRACE +ISC_REFCOUNT_TRACE_IMPL(dns_dispatch, dispatch_destroy); +#else +ISC_REFCOUNT_IMPL(dns_dispatch, dispatch_destroy); +#endif isc_result_t dns_dispatch_add(dns_dispatch_t *disp, unsigned int options, unsigned int timeout, const isc_sockaddr_t *dest, dispatch_cb_t connected, dispatch_cb_t sent, dispatch_cb_t response, void *arg, dns_messageid_t *idp, - dns_dispentry_t **resp) { - dns_dispentry_t *res = NULL; + dns_dispentry_t **respp) { + dns_dispentry_t *resp = NULL; dns_qid_t *qid = NULL; in_port_t localport = 0; dns_messageid_t id; unsigned int bucket; bool ok = false; int i = 0; - dispatch_cb_t oldest_response = NULL; REQUIRE(VALID_DISPATCH(disp)); REQUIRE(dest != NULL); - REQUIRE(resp != NULL && *resp == NULL); + REQUIRE(respp != NULL && *respp == NULL); REQUIRE(idp != NULL); REQUIRE(disp->socktype == isc_socktype_tcp || disp->socktype == isc_socktype_udp); + REQUIRE(connected != NULL); + REQUIRE(response != NULL); + REQUIRE(sent != NULL); LOCK(&disp->lock); - if (disp->requests >= DNS_DISPATCH_MAXREQUESTS) { + if (disp->state == DNS_DISPATCHSTATE_CANCELED) { UNLOCK(&disp->lock); - return (ISC_R_QUOTA); + return (ISC_R_CANCELED); } qid = disp->mgr->qid; - if (disp->socktype == isc_socktype_udp && - disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) - { - dns_dispentry_t *oldest = NULL; + resp = isc_mem_get(disp->mgr->mctx, sizeof(*resp)); - /* - * Kill oldest outstanding query if the number of sockets - * exceeds the quota to keep the room for new queries. - */ - oldest = ISC_LIST_HEAD(disp->active); - if (oldest != NULL) { - oldest_response = oldest->response; - inc_stats(disp->mgr, dns_resstatscounter_dispabort); - } - } + *resp = (dns_dispentry_t){ + .port = localport, + .timeout = timeout, + .peer = *dest, + .connected = connected, + .sent = sent, + .response = response, + .arg = arg, + .link = ISC_LINK_INITIALIZER, + .alink = ISC_LINK_INITIALIZER, + .plink = ISC_LINK_INITIALIZER, + .rlink = ISC_LINK_INITIALIZER, + .magic = RESPONSE_MAGIC, + }; - res = isc_mem_get(disp->mgr->mctx, sizeof(*res)); - - *res = (dns_dispentry_t){ .port = localport, - .timeout = timeout, - .peer = *dest, - .connected = connected, - .sent = sent, - .response = response, - .arg = arg }; - - isc_refcount_init(&res->references, 1); - - ISC_LINK_INIT(res, link); - ISC_LINK_INIT(res, alink); - ISC_LINK_INIT(res, plink); - ISC_LINK_INIT(res, rlink); +#if DNS_DISPATCH_TRACE + fprintf(stderr, "dns_dispentry__init:%s:%s:%d:%p->references = 1\n", + __func__, __FILE__, __LINE__, res); +#endif + isc_refcount_init(&resp->references, 1); /* DISPENTRY000 */ if (disp->socktype == isc_socktype_udp) { - isc_result_t result = setup_socket(disp, res, dest, &localport); + isc_result_t result = setup_socket(disp, resp, dest, + &localport); if (result != ISC_R_SUCCESS) { - isc_mem_put(disp->mgr->mctx, res, sizeof(*res)); + isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp)); UNLOCK(&disp->lock); inc_stats(disp->mgr, dns_resstatscounter_dispsockfail); return (result); @@ -1477,383 +1487,555 @@ dns_dispatch_add(dns_dispatch_t *disp, unsigned int options, ok = true; break; } + if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) { + /* When using fixed ID, we either must use it or fail */ + break; + } id += qid->qid_increment; id &= 0x0000ffff; } while (i++ < 64); + + if (ok) { + resp->id = id; + resp->bucket = bucket; + ISC_LIST_APPEND(qid->qid_table[bucket], resp, link); + } UNLOCK(&qid->lock); if (!ok) { - isc_mem_put(disp->mgr->mctx, res, sizeof(*res)); + isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp)); UNLOCK(&disp->lock); return (ISC_R_NOMORE); } - dns_dispatch_attach(disp, &res->disp); - - res->id = id; - res->bucket = bucket; - res->magic = RESPONSE_MAGIC; + dns_dispatch_attach(disp, &resp->disp); /* DISPATCH001 */ disp->requests++; - LOCK(&qid->lock); - ISC_LIST_APPEND(qid->qid_table[bucket], res, link); - UNLOCK(&qid->lock); - inc_stats(disp->mgr, (disp->socktype == isc_socktype_udp) ? dns_resstatscounter_disprequdp : dns_resstatscounter_dispreqtcp); - ISC_LIST_APPEND(disp->active, res, alink); - UNLOCK(&disp->lock); - if (oldest_response != NULL) { - oldest_response(ISC_R_CANCELED, NULL, res->arg); - } - *idp = id; - *resp = res; + *respp = resp; return (ISC_R_SUCCESS); } -void -dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp, int32_t timeout) { - REQUIRE(timeout <= (int32_t)UINT16_MAX); - - switch (disp->socktype) { - case isc_socktype_udp: - REQUIRE(resp != NULL); - - dispentry_attach(resp, &(dns_dispentry_t *){ NULL }); - if (timeout > 0) { - isc_nmhandle_settimeout(resp->handle, timeout); - } - isc_nm_read(resp->handle, udp_recv, resp); - break; - - case isc_socktype_tcp: - if (atomic_compare_exchange_strong(&disp->tcpreading, - &(bool){ false }, true)) - { - dns_dispatch_attach(disp, &(dns_dispatch_t *){ NULL }); - if (timeout > 0) { - isc_nmhandle_settimeout(disp->handle, timeout); - } - isc_nm_read(disp->handle, tcp_recv, disp); - } - break; - - default: - UNREACHABLE(); - } -} - isc_result_t dns_dispatch_getnext(dns_dispentry_t *resp) { - dns_dispatch_t *disp = NULL; - int32_t timeout; - REQUIRE(VALID_RESPONSE(resp)); + REQUIRE(VALID_DISPATCH(resp->disp)); - disp = resp->disp; + dns_dispatch_t *disp = resp->disp; + isc_result_t result = ISC_R_SUCCESS; + int32_t timeout = -1; - REQUIRE(VALID_DISPATCH(disp)); - - if (disp->socktype == isc_socktype_udp) { + LOCK(&disp->lock); + switch (disp->socktype) { + case isc_socktype_udp: { timeout = resp->timeout - dispentry_runtime(resp); if (timeout <= 0) { - return (ISC_R_TIMEDOUT); + result = ISC_R_TIMEDOUT; + break; } - } else { - timeout = -1; + udp_dispatch_getnext(resp, timeout); + break; + } + case isc_socktype_tcp: + tcp_dispatch_getnext(disp, resp, timeout); + break; + default: + UNREACHABLE(); } - LOCK(&disp->lock); - dispatch_getnext(disp, resp, timeout); UNLOCK(&disp->lock); - return (ISC_R_SUCCESS); + + return (result); } -void -dns_dispatch_cancel(dns_dispentry_t **respp) { - dns_dispentry_t *resp = NULL; - dns_dispatch_t *disp = NULL; - - REQUIRE(respp != NULL); - - resp = *respp; - *respp = NULL; - +static void +udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { REQUIRE(VALID_RESPONSE(resp)); + REQUIRE(VALID_DISPATCH(resp->disp)); + REQUIRE(VALID_DISPATCHMGR(resp->disp->mgr)); - disp = resp->disp; - resp->canceled = true; - - /* Connected UDP. */ - if (resp->handle != NULL) { - isc_nm_cancelread(resp->handle); - goto done; - } + dns_dispatch_t *disp = resp->disp; + dns_dispatchmgr_t *mgr = disp->mgr; + dns_qid_t *qid = mgr->qid; + dispatch_cb_t connected = NULL; + dispatch_cb_t response = NULL; LOCK(&disp->lock); - /* TCP pending connection. */ - if (ISC_LINK_LINKED(resp, plink)) { - dns_dispentry_t *copy = resp; + dispentry_log(resp, LVL(90), + "canceling response: %s, %s/%s (%s/%s), " + "requests %u", + isc_result_totext(result), state2str(resp->state), + resp->reading ? "reading" : "not reading", + state2str(disp->state), + disp->reading ? "reading" : "not reading", + disp->requests); - ISC_LIST_UNLINK(disp->pending, resp, plink); - if (resp->connected != NULL) { - resp->connected(ISC_R_CANCELED, NULL, resp->arg); - } - - /* - * We need to detach twice if we were pending - * connection - once to take the place of the - * detach in tcp_connected() or udp_connected() - * that we won't reach, and again later in - * dns_dispatch_done(). - */ - dispentry_detach(©); - UNLOCK(&disp->lock); - goto done; - } - - /* - * Connected TCP, or unconnected UDP. - * - * If TCP, we don't want to cancel the dispatch - * unless this is the last resp waiting. - */ if (ISC_LINK_LINKED(resp, alink)) { ISC_LIST_UNLINK(disp->active, resp, alink); - if (ISC_LIST_EMPTY(disp->active) && disp->handle != NULL) { - isc_nm_cancelread(disp->handle); - } else if (resp->response != NULL) { - resp->response(ISC_R_CANCELED, NULL, resp->arg); - } } - UNLOCK(&disp->lock); -done: - dns_dispatch_done(&resp); -} - -void -dns_dispatch_done(dns_dispentry_t **respp) { - dns_dispatchmgr_t *mgr = NULL; - dns_dispatch_t *disp = NULL; - dns_dispentry_t *resp = NULL; - dns_qid_t *qid = NULL; - - REQUIRE(respp != NULL); - - resp = *respp; - - REQUIRE(VALID_RESPONSE(resp)); - - disp = resp->disp; - - REQUIRE(VALID_DISPATCH(disp)); - - mgr = disp->mgr; - - REQUIRE(VALID_DISPATCHMGR(mgr)); - - qid = mgr->qid; - - LOCK(&disp->lock); - INSIST(disp->requests > 0); - disp->requests--; - - dec_stats(disp->mgr, (disp->socktype == isc_socktype_udp) - ? dns_resstatscounter_disprequdp - : dns_resstatscounter_dispreqtcp); - - deactivate_dispentry(disp, resp); - - LOCK(&qid->lock); - ISC_LIST_UNLINK(qid->qid_table[resp->bucket], resp, link); - UNLOCK(&qid->lock); - UNLOCK(&disp->lock); - - dispentry_detach(respp); -} - -/* - * disp must be locked. - */ -static void -startrecv(isc_nmhandle_t *handle, dns_dispatch_t *disp, dns_dispentry_t *resp) { - switch (disp->socktype) { - case isc_socktype_udp: - REQUIRE(resp != NULL && resp->handle == NULL); - - TIME_NOW(&resp->start); - isc_nmhandle_attach(handle, &resp->handle); - dispentry_attach(resp, &(dns_dispentry_t *){ NULL }); - isc_nm_read(resp->handle, udp_recv, resp); + switch (resp->state) { + case DNS_DISPATCHSTATE_NONE: break; - case isc_socktype_tcp: - REQUIRE(disp != NULL); - - LOCK(&disp->lock); - REQUIRE(disp->handle == NULL); - atomic_compare_exchange_enforced( - &disp->tcpstate, - &(uint_fast32_t){ DNS_DISPATCHSTATE_CONNECTING }, - DNS_DISPATCHSTATE_CONNECTED); - - isc_nmhandle_attach(handle, &disp->handle); - dns_dispatch_attach(disp, &(dns_dispatch_t *){ NULL }); - isc_nm_read(disp->handle, tcp_recv, disp); - UNLOCK(&disp->lock); - + case DNS_DISPATCHSTATE_CONNECTING: + dns_dispentry_ref(resp); /* DISPENTRY008 */ + ISC_LIST_UNLINK(disp->pending, resp, plink); + connected = resp->connected; break; + case DNS_DISPATCHSTATE_CONNECTED: + if (resp->reading) { + dns_dispentry_ref(resp); /* DISPENTRY003 */ + response = resp->response; + + dispentry_log(resp, LVL(90), "canceling read on %p", + resp->handle); + isc_nm_cancelread(resp->handle); + } + break; + + case DNS_DISPATCHSTATE_CANCELED: + goto unlock; + default: UNREACHABLE(); } + + dec_stats(disp->mgr, dns_resstatscounter_disprequdp); + + LOCK(&qid->lock); + ISC_LIST_UNLINK(qid->qid_table[resp->bucket], resp, link); + UNLOCK(&qid->lock); + resp->state = DNS_DISPATCHSTATE_CANCELED; + +unlock: + UNLOCK(&disp->lock); + + if (connected) { + dispentry_log(resp, LVL(90), "connect callback: %s", + isc_result_totext(result)); + connected(result, NULL, resp->arg); + dns_dispentry_detach(&resp); /* DISPENTRY008 */ + } + if (response) { + dispentry_log(resp, LVL(90), "read callback: %s", + isc_result_totext(result)); + response(result, NULL, resp->arg); + dns_dispentry_detach(&resp); /* DISPENTRY003 */ + } +} + +static void +tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { + REQUIRE(VALID_RESPONSE(resp)); + REQUIRE(VALID_DISPATCH(resp->disp)); + REQUIRE(VALID_DISPATCHMGR(resp->disp->mgr)); + + dns_dispatch_t *disp = resp->disp; + dns_dispatchmgr_t *mgr = disp->mgr; + dns_qid_t *qid = mgr->qid; + dispatch_cb_t connected = NULL; + dns_displist_t resps = ISC_LIST_INITIALIZER; + + LOCK(&disp->lock); + dispentry_log(resp, LVL(90), + "canceling response: %s, %s/%s (%s/%s), " + "requests %u", + isc_result_totext(result), state2str(resp->state), + resp->reading ? "reading" : "not reading", + state2str(disp->state), + disp->reading ? "reading" : "not reading", + disp->requests); + + switch (resp->state) { + case DNS_DISPATCHSTATE_NONE: + break; + + case DNS_DISPATCHSTATE_CONNECTING: + ISC_LIST_UNLINK(disp->pending, resp, plink); + connected = resp->connected; + break; + + case DNS_DISPATCHSTATE_CONNECTED: + if (resp->reading) { + tcp_recv_add(&resps, resp, ISC_R_CANCELED); + } + + INSIST(!ISC_LINK_LINKED(resp, alink)); + + if (ISC_LIST_EMPTY(disp->active)) { + INSIST(disp->handle != NULL); + +#if DISPATCH_TCP_KEEPALIVE + /* + * This is an experimental code that keeps the TCP + * connection open for 1 second before it is finally + * closed. By keeping the TCP connection open, it can + * be reused by dns_request that uses + * dns_dispatch_gettcp() to join existing TCP + * connections. + * + * It is disabled for now, because it changes the + * behaviour, but I am keeping the code here for future + * reference when we improve the dns_dispatch to reuse + * the TCP connections also in the resolver. + * + * The TCP connection reuse should be seamless and not + * require any extra handling on the client side though. + */ + isc_nmhandle_cleartimeout(disp->handle); + isc_nmhandle_settimeout(disp->handle, 1000); + + if (!disp->reading) { + dispentry_log(resp, LVL(90), + "final 1 second timeout on %p", + disp->handle); + tcp_startrecv(NULL, disp, NULL); + } +#else + if (disp->reading) { + dispentry_log(resp, LVL(90), + "canceling read on %p", + disp->handle); + isc_nm_cancelread(disp->handle); + } +#endif + } + break; + + case DNS_DISPATCHSTATE_CANCELED: + goto unlock; + + default: + UNREACHABLE(); + } + + dec_stats(disp->mgr, dns_resstatscounter_dispreqtcp); + + LOCK(&qid->lock); + ISC_LIST_UNLINK(qid->qid_table[resp->bucket], resp, link); + UNLOCK(&qid->lock); + resp->state = DNS_DISPATCHSTATE_CANCELED; + +unlock: + UNLOCK(&disp->lock); + + /* + * NOTE: Calling the connected and response callbacks directly from here + * should be done asynchronously, as the dns_dispatch_done() is usually + * called directly from the response callback, so there's a slight + * chance that the call stack will get higher here, but it's mitigated + * by the ".reading" flag, so we don't ever go into a loop. + */ + + if (connected) { + dispentry_log(resp, LVL(90), "connect callback: %s", + isc_result_totext(result)); + connected(result, NULL, resp->arg); + dns_dispentry_detach(&resp); /* DISPENTRY005 */ + } + + tcp_recv_processall(&resps, NULL); +} + +static void +dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { + REQUIRE(VALID_RESPONSE(resp)); + REQUIRE(VALID_DISPATCH(resp->disp)); + + dns_dispatch_t *disp = resp->disp; + + switch (disp->socktype) { + case isc_socktype_udp: + udp_dispentry_cancel(resp, result); + break; + case isc_socktype_tcp: + tcp_dispentry_cancel(resp, result); + break; + default: + UNREACHABLE(); + } +} + +void +dns_dispatch_done(dns_dispentry_t **respp) { + REQUIRE(VALID_RESPONSE(*respp)); + + dns_dispentry_t *resp = *respp; + *respp = NULL; + + dispentry_cancel(resp, ISC_R_CANCELED); + dns_dispentry_detach(&resp); /* DISPENTRY000 */ +} + +static void +udp_startrecv(isc_nmhandle_t *handle, dns_dispentry_t *resp) { + REQUIRE(VALID_RESPONSE(resp)); + + TIME_NOW(&resp->start); + dispentry_log(resp, LVL(90), "attaching handle %p to %p", handle, + &resp->handle); + isc_nmhandle_attach(handle, &resp->handle); + dns_dispentry_ref(resp); /* DISPENTRY003 */ + dispentry_log(resp, LVL(90), "reading"); + isc_nm_read(resp->handle, udp_recv, resp); + resp->reading = true; +} + +static void +tcp_startrecv(isc_nmhandle_t *handle, dns_dispatch_t *disp, + dns_dispentry_t *resp) { + REQUIRE(VALID_DISPATCH(disp)); + REQUIRE(disp->socktype == isc_socktype_tcp); + + if (handle != NULL) { + isc_nmhandle_attach(handle, &disp->handle); + } + dns_dispatch_ref(disp); /* DISPATCH002 */ + if (resp != NULL) { + dispentry_log(resp, LVL(90), "reading from %p", disp->handle); + } else { + dispatch_log(disp, LVL(90), + "TCP reading without response from %p", + disp->handle); + } + isc_nm_read(disp->handle, tcp_recv, disp); + disp->reading = true; } static void tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { dns_dispatch_t *disp = (dns_dispatch_t *)arg; - dns_dispentry_t *resp = NULL, *next = NULL; - dns_displist_t resps; + dns_dispentry_t *resp = NULL; + dns_dispentry_t *next = NULL; + dns_displist_t resps = ISC_LIST_INITIALIZER; - dispatch_log(disp, LVL(90), "TCP connected (%p): %s", disp, - isc_result_totext(eresult)); + if (isc_log_wouldlog(dns_lctx, 90)) { + char localbuf[ISC_SOCKADDR_FORMATSIZE]; + char peerbuf[ISC_SOCKADDR_FORMATSIZE]; + if (handle != NULL) { + isc_sockaddr_t local = isc_nmhandle_localaddr(handle); + isc_sockaddr_t peer = isc_nmhandle_peeraddr(handle); - ISC_LIST_INIT(resps); + isc_sockaddr_format(&local, localbuf, + ISC_SOCKADDR_FORMATSIZE); + isc_sockaddr_format(&peer, peerbuf, + ISC_SOCKADDR_FORMATSIZE); + } else { + isc_sockaddr_format(&disp->local, localbuf, + ISC_SOCKADDR_FORMATSIZE); + isc_sockaddr_format(&disp->peer, peerbuf, + ISC_SOCKADDR_FORMATSIZE); + } + + dispatch_log(disp, LVL(90), "connected from %s to %s: %s", + localbuf, peerbuf, isc_result_totext(eresult)); + } + + LOCK(&disp->lock); + INSIST(disp->state = DNS_DISPATCHSTATE_CONNECTING); if (eresult == ISC_R_SUCCESS) { - startrecv(handle, disp, NULL); + disp->state = DNS_DISPATCHSTATE_CONNECTED; + tcp_startrecv(handle, disp, resp); + } else { + disp->state = DNS_DISPATCHSTATE_NONE; } /* * If there are pending responses, call the connect * callbacks for all of them. */ - LOCK(&disp->lock); for (resp = ISC_LIST_HEAD(disp->pending); resp != NULL; resp = next) { next = ISC_LIST_NEXT(resp, plink); ISC_LIST_UNLINK(disp->pending, resp, plink); - ISC_LIST_APPEND(resps, resp, plink); + ISC_LIST_APPEND(resps, resp, rlink); + + if (eresult == ISC_R_SUCCESS) { + resp->state = DNS_DISPATCHSTATE_CONNECTED; + ISC_LIST_APPEND(disp->active, resp, alink); + resp->reading = true; + dispentry_log(resp, LVL(90), "start reading"); + } else { + resp->state = DNS_DISPATCHSTATE_NONE; + } } UNLOCK(&disp->lock); for (resp = ISC_LIST_HEAD(resps); resp != NULL; resp = next) { - next = ISC_LIST_NEXT(resp, plink); - ISC_LIST_UNLINK(resps, resp, plink); + next = ISC_LIST_NEXT(resp, rlink); + ISC_LIST_UNLINK(resps, resp, rlink); - if (resp->connected != NULL) { - resp->connected(eresult, NULL, resp->arg); - } - dispentry_detach(&resp); + dispentry_log(resp, LVL(90), "connect callback: %s", + isc_result_totext(eresult)); + resp->connected(eresult, NULL, resp->arg); + dns_dispentry_detach(&resp); /* DISPENTRY005 */ } - dns_dispatch_detach(&disp); + dns_dispatch_detach(&disp); /* DISPATCH003 */ } +static void +udp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp); + static void udp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { dns_dispentry_t *resp = (dns_dispentry_t *)arg; dns_dispatch_t *disp = resp->disp; - dispatch_log(disp, LVL(90), "UDP connected (%p): %s", resp, - isc_result_totext(eresult)); + dispentry_log(resp, LVL(90), "connected: %s", + isc_result_totext(eresult)); - if (eresult == ISC_R_SUCCESS && resp->canceled) { - eresult = ISC_R_CANCELED; - } else if (eresult == ISC_R_SUCCESS) { - startrecv(handle, disp, resp); - } else if (eresult == ISC_R_ADDRINUSE) { + LOCK(&disp->lock); + + switch (resp->state) { + case DNS_DISPATCHSTATE_CANCELED: + UNLOCK(&disp->lock); + goto detach; + case DNS_DISPATCHSTATE_CONNECTING: + ISC_LIST_UNLINK(disp->pending, resp, plink); + break; + default: + UNREACHABLE(); + } + + switch (eresult) { + case ISC_R_SUCCESS: + resp->state = DNS_DISPATCHSTATE_CONNECTED; + udp_startrecv(handle, resp); + break; + case ISC_R_ADDRINUSE: { in_port_t localport = 0; isc_result_t result; /* probably a port collision; try a different one */ - disp->nsockets--; result = setup_socket(disp, resp, &resp->peer, &localport); if (result == ISC_R_SUCCESS) { - dns_dispatch_connect(resp); + UNLOCK(&disp->lock); + udp_dispatch_connect(disp, resp); goto detach; } + resp->state = DNS_DISPATCHSTATE_NONE; + break; + } + default: + resp->state = DNS_DISPATCHSTATE_NONE; + break; } - if (resp->connected != NULL) { - resp->connected(eresult, NULL, resp->arg); - } + UNLOCK(&disp->lock); + + dispentry_log(resp, LVL(90), "connect callback: %s", + isc_result_totext(eresult)); + resp->connected(eresult, NULL, resp->arg); detach: - dispentry_detach(&resp); + dns_dispentry_detach(&resp); /* DISPENTRY004 */ +} + +static void +udp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { + LOCK(&disp->lock); + resp->state = DNS_DISPATCHSTATE_CONNECTING; + dns_dispentry_ref(resp); /* DISPENTRY004 */ + ISC_LIST_APPEND(disp->pending, resp, plink); + UNLOCK(&disp->lock); + isc_nm_udpconnect(disp->mgr->nm, &resp->local, &resp->peer, + udp_connected, resp, resp->timeout, 0); +} + +static isc_result_t +tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { + /* Check whether the dispatch is already connecting or connected. */ + LOCK(&disp->lock); + switch (disp->state) { + case DNS_DISPATCHSTATE_NONE: + /* First connection, continue with connecting */ + disp->state = DNS_DISPATCHSTATE_CONNECTING; + resp->state = DNS_DISPATCHSTATE_CONNECTING; + dns_dispentry_ref(resp); /* DISPENTRY005 */ + ISC_LIST_APPEND(disp->pending, resp, plink); + UNLOCK(&disp->lock); + + char localbuf[ISC_SOCKADDR_FORMATSIZE]; + char peerbuf[ISC_SOCKADDR_FORMATSIZE]; + + isc_sockaddr_format(&disp->local, localbuf, + ISC_SOCKADDR_FORMATSIZE); + isc_sockaddr_format(&disp->peer, peerbuf, + ISC_SOCKADDR_FORMATSIZE); + + dns_dispatch_ref(disp); /* DISPATCH003 */ + dispentry_log(resp, LVL(90), + "connecting from %s to %s, timeout %u", localbuf, + peerbuf, resp->timeout); + + isc_nm_tcpdnsconnect(disp->mgr->nm, &disp->local, &disp->peer, + tcp_connected, disp, resp->timeout, 0); + break; + + case DNS_DISPATCHSTATE_CONNECTING: + /* Connection pending; add resp to the list */ + resp->state = DNS_DISPATCHSTATE_CONNECTING; + dns_dispentry_ref(resp); /* DISPENTRY005 */ + ISC_LIST_APPEND(disp->pending, resp, plink); + UNLOCK(&disp->lock); + break; + + case DNS_DISPATCHSTATE_CONNECTED: + resp->state = DNS_DISPATCHSTATE_CONNECTED; + + /* Add the resp to the reading list */ + ISC_LIST_APPEND(disp->active, resp, alink); + dispentry_log(resp, LVL(90), "already connected; attaching"); + resp->reading = true; + + if (!disp->reading) { + /* Restart the reading */ + tcp_startrecv(NULL, disp, resp); + } + + UNLOCK(&disp->lock); + /* We are already connected; call the connected cb */ + dispentry_log(resp, LVL(90), "connect callback: %s", + isc_result_totext(ISC_R_SUCCESS)); + resp->connected(ISC_R_SUCCESS, NULL, resp->arg); + break; + + default: + UNREACHABLE(); + } + + return (ISC_R_SUCCESS); } isc_result_t dns_dispatch_connect(dns_dispentry_t *resp) { - dns_dispatch_t *disp = NULL; - uint_fast32_t tcpstate = DNS_DISPATCHSTATE_NONE; - REQUIRE(VALID_RESPONSE(resp)); + REQUIRE(VALID_DISPATCH(resp->disp)); - disp = resp->disp; - - /* This will be detached once we've connected. */ - dispentry_attach(resp, &(dns_dispentry_t *){ NULL }); + dns_dispatch_t *disp = resp->disp; switch (disp->socktype) { case isc_socktype_tcp: - /* - * Check whether the dispatch is already connecting - * or connected. - */ - atomic_compare_exchange_strong(&disp->tcpstate, - (uint_fast32_t *)&tcpstate, - DNS_DISPATCHSTATE_CONNECTING); - switch (tcpstate) { - case DNS_DISPATCHSTATE_NONE: - /* First connection, continue with connecting */ - LOCK(&disp->lock); - ISC_LIST_APPEND(disp->pending, resp, plink); - UNLOCK(&disp->lock); - dns_dispatch_attach(disp, &(dns_dispatch_t *){ NULL }); - isc_nm_tcpdnsconnect(disp->mgr->nm, &disp->local, - &disp->peer, tcp_connected, disp, - resp->timeout, 0); - break; - - case DNS_DISPATCHSTATE_CONNECTING: - /* Connection pending; add resp to the list */ - LOCK(&disp->lock); - ISC_LIST_APPEND(disp->pending, resp, plink); - UNLOCK(&disp->lock); - break; - - case DNS_DISPATCHSTATE_CONNECTED: - /* We are already connected; call the connected cb */ - if (resp->connected != NULL) { - resp->connected(ISC_R_SUCCESS, NULL, resp->arg); - } - dispentry_detach(&resp); - break; - - default: - UNREACHABLE(); - } - - break; + return (tcp_dispatch_connect(disp, resp)); case isc_socktype_udp: - isc_nm_udpconnect(disp->mgr->nm, &resp->local, &resp->peer, - udp_connected, resp, resp->timeout, 0); - break; + udp_dispatch_connect(disp, resp); + return (ISC_R_SUCCESS); default: - return (ISC_R_NOTIMPLEMENTED); + UNREACHABLE(); } - - return (ISC_R_SUCCESS); } static void @@ -1862,36 +2044,98 @@ send_done(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { REQUIRE(VALID_RESPONSE(resp)); + dns_dispatch_t *disp = resp->disp; + + REQUIRE(VALID_DISPATCH(disp)); + + dispentry_log(resp, LVL(90), "sent: %s", isc_result_totext(result)); + resp->sent(result, NULL, resp->arg); if (result != ISC_R_SUCCESS) { - isc_nm_cancelread(handle); + dispentry_cancel(resp, result); } - dispentry_detach(&resp); + dns_dispentry_detach(&resp); /* DISPENTRY007 */ + isc_nmhandle_detach(&handle); +} + +static void +tcp_dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp, + int32_t timeout) { + REQUIRE(timeout <= INT16_MAX); + + if (disp->reading) { + return; + } + + if (timeout > 0) { + isc_nmhandle_settimeout(disp->handle, timeout); + } + + dispentry_log(resp, LVL(90), "continue reading"); + + dns_dispatch_ref(disp); /* DISPATCH002 */ + isc_nm_read(disp->handle, tcp_recv, disp); + disp->reading = true; + + ISC_LIST_APPEND(disp->active, resp, alink); + resp->reading = true; +} + +static void +udp_dispatch_getnext(dns_dispentry_t *resp, int32_t timeout) { + REQUIRE(timeout <= INT16_MAX); + + if (resp->reading) { + return; + } + + if (timeout > 0) { + isc_nmhandle_settimeout(resp->handle, timeout); + } + + dispentry_log(resp, LVL(90), "continue reading"); + + dns_dispentry_ref(resp); /* DISPENTRY003 */ + isc_nm_read(resp->handle, udp_recv, resp); + resp->reading = true; } void dns_dispatch_resume(dns_dispentry_t *resp, uint16_t timeout) { - dns_dispatch_t *disp = NULL; - REQUIRE(VALID_RESPONSE(resp)); + REQUIRE(VALID_DISPATCH(resp->disp)); - disp = resp->disp; + dns_dispatch_t *disp = resp->disp; - REQUIRE(VALID_DISPATCH(disp)); + LOCK(&disp->lock); + switch (disp->socktype) { + case isc_socktype_udp: { + udp_dispatch_getnext(resp, timeout); + break; + } + case isc_socktype_tcp: + INSIST(disp->timedout > 0); + disp->timedout--; + tcp_dispatch_getnext(disp, resp, timeout); + break; + default: + UNREACHABLE(); + } - dispatch_getnext(disp, resp, timeout); + UNLOCK(&disp->lock); } void dns_dispatch_send(dns_dispentry_t *resp, isc_region_t *r, isc_dscp_t dscp) { - isc_nmhandle_t *handle = NULL; - REQUIRE(VALID_RESPONSE(resp)); - + REQUIRE(VALID_DISPATCH(resp->disp)); UNUSED(dscp); + dns_dispatch_t *disp = resp->disp; + isc_nmhandle_t *sendhandle = NULL; + #if 0 /* XXX: no DSCP support */ if (dscp == -1) { @@ -1906,14 +2150,19 @@ dns_dispatch_send(dns_dispentry_t *resp, isc_region_t *r, isc_dscp_t dscp) { } #endif - if (resp->disp->socktype == isc_socktype_tcp) { - handle = resp->disp->handle; - } else { - handle = resp->handle; + dispentry_log(resp, LVL(90), "sending"); + switch (disp->socktype) { + case isc_socktype_udp: + isc_nmhandle_attach(resp->handle, &sendhandle); + break; + case isc_socktype_tcp: + isc_nmhandle_attach(disp->handle, &sendhandle); + break; + default: + UNREACHABLE(); } - - dispentry_attach(resp, &(dns_dispentry_t *){ NULL }); - isc_nm_send(handle, r, send_done, resp); + dns_dispentry_ref(resp); /* DISPENTRY007 */ + isc_nm_send(sendhandle, r, send_done, resp); } isc_result_t @@ -1931,19 +2180,21 @@ dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) { isc_result_t dns_dispentry_getlocaladdress(dns_dispentry_t *resp, isc_sockaddr_t *addrp) { REQUIRE(VALID_RESPONSE(resp)); + REQUIRE(VALID_DISPATCH(resp->disp)); REQUIRE(addrp != NULL); - if (resp->disp->socktype == isc_socktype_tcp) { - *addrp = resp->disp->local; - return (ISC_R_SUCCESS); - } + dns_dispatch_t *disp = resp->disp; - if (resp->handle != NULL) { + switch (disp->socktype) { + case isc_socktype_tcp: + *addrp = disp->local; + return (ISC_R_SUCCESS); + case isc_socktype_udp: *addrp = isc_nmhandle_localaddr(resp->handle); return (ISC_R_SUCCESS); + default: + UNREACHABLE(); } - - return (ISC_R_NOTIMPLEMENTED); } dns_dispatch_t * @@ -1990,7 +2241,7 @@ dns_dispatchset_create(isc_mem_t *mctx, dns_dispatch_t *source, isc_mem_attach(mctx, &dset->mctx); dset->dispatches[0] = NULL; - dns_dispatch_attach(source, &dset->dispatches[0]); + dns_dispatch_attach(source, &dset->dispatches[0]); /* DISPATCH004 */ LOCK(&mgr->lock); for (i = 1; i < n; i++) { @@ -2011,7 +2262,7 @@ fail: UNLOCK(&mgr->lock); for (j = 0; j < i; j++) { - dns_dispatch_detach(&(dset->dispatches[j])); + dns_dispatch_detach(&(dset->dispatches[j])); /* DISPATCH004 */ } isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n); if (dset->mctx == mctx) { @@ -2033,7 +2284,7 @@ dns_dispatchset_destroy(dns_dispatchset_t **dsetp) { dset = *dsetp; *dsetp = NULL; for (i = 0; i < dset->ndisp; i++) { - dns_dispatch_detach(&(dset->dispatches[i])); + dns_dispatch_detach(&(dset->dispatches[i])); /* DISPATCH004 */ } isc_mem_put(dset->mctx, dset->dispatches, sizeof(dns_dispatch_t *) * dset->ndisp); diff --git a/lib/dns/include/dns/dispatch.h b/lib/dns/include/dns/dispatch.h index b82a46c5a9..bcf5b97db4 100644 --- a/lib/dns/include/dns/dispatch.h +++ b/lib/dns/include/dns/dispatch.h @@ -53,10 +53,13 @@ #include #include #include +#include #include #include +#undef DNS_DISPATCH_TRACE + ISC_LANG_BEGINDECLS /*% @@ -94,25 +97,22 @@ dns_dispatchmgr_create(isc_mem_t *mctx, isc_nm_t *nm, dns_dispatchmgr_t **mgrp); *\li anything else -- failure */ -void -dns_dispatchmgr_attach(dns_dispatchmgr_t *mgr, dns_dispatchmgr_t **mgrp); -/*%< - * Attach to a dispatch manger. - * - * Requires: - *\li is valid. - * - *\li mgrp != NULL && *mgrp == NULL - */ +#if DNS_DISPATCH_TRACE +#define dns_dispatchmgr_ref(ptr) \ + dns_dispatchmgr__ref(ptr, __func__, __FILE__, __LINE__) +#define dns_dispatchmgr_unref(ptr) \ + dns_dispatchmgr__unref(ptr, __func__, __FILE__, __LINE__) +#define dns_dispatchmgr_attach(ptr, ptrp) \ + dns_dispatchmgr__attach(ptr, ptrp, __func__, __FILE__, __LINE__) +#define dns_dispatchmgr_detach(ptrp) \ + dns_dispatchmgr__detach(ptrp, __func__, __FILE__, __LINE__) +ISC_REFCOUNT_TRACE_DECL(dns_dispatchmgr); +#else +ISC_REFCOUNT_DECL(dns_dispatchmgr); +#endif -void -dns_dispatchmgr_detach(dns_dispatchmgr_t **mgrp); /*%< - * Detach from the dispatch manager, and destroy it if no references - * remain. - * - * Requires: - *\li mgrp != NULL && *mgrp is a valid dispatchmgr. + * Attach/Detach to a dispatch manager. */ void @@ -201,10 +201,21 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, *\li Anything else -- failure. */ -void -dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp); +#if DNS_DISPATCH_TRACE +#define dns_dispatch_ref(ptr) \ + dns_dispatch__ref(ptr, __func__, __FILE__, __LINE__) +#define dns_dispatch_unref(ptr) \ + dns_dispatch__unref(ptr, __func__, __FILE__, __LINE__) +#define dns_dispatch_attach(ptr, ptrp) \ + dns_dispatch__attach(ptr, ptrp, __func__, __FILE__, __LINE__) +#define dns_dispatch_detach(ptrp) \ + dns_dispatch__detach(ptrp, __func__, __FILE__, __LINE__) +ISC_REFCOUNT_TRACE_DECL(dns_dispatch); +#else +ISC_REFCOUNT_DECL(dns_dispatch); +#endif /*%< - * Attach to a dispatch handle. + * Attach/Detach to a dispatch handle. * * Requires: *\li disp is valid. @@ -212,15 +223,6 @@ dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp); *\li dispp != NULL && *dispp == NULL */ -void -dns_dispatch_detach(dns_dispatch_t **dispp); -/*%< - * Detaches from the dispatch. - * - * Requires: - *\li dispp != NULL and *dispp be a valid dispatch. - */ - isc_result_t dns_dispatch_connect(dns_dispentry_t *resp); /*%< @@ -253,11 +255,9 @@ dns_dispatch_resume(dns_dispentry_t *resp, uint16_t timeout); isc_result_t dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, - const isc_sockaddr_t *localaddr, bool *connected, - dns_dispatch_t **dispp); + const isc_sockaddr_t *localaddr, dns_dispatch_t **dispp); /* - * Attempt to connect to a existing TCP connection (connection completed - * if connected == NULL). + * Attempt to connect to a existing TCP connection. */ typedef void (*dispatch_cb_t)(isc_result_t eresult, isc_region_t *region, @@ -307,22 +307,10 @@ dns_dispatch_add(dns_dispatch_t *disp, unsigned int options, void dns_dispatch_done(dns_dispentry_t **respp); -/*%< - * Disconnects a dispatch response entry from its dispatch and shuts it - * down. This is called when the dispatch is complete; use - * dns_dispatch_cancel() if it is still pending. - * - * Requires: - *\li "resp" != NULL and "*resp" contain a value previously allocated - * by dns_dispatch_add(); - */ +/*< + * Disconnect a dispatch response entry from its dispatch, cancel all + * pending connects and reads in a dispatch entry and shut it down. -void -dns_dispatch_cancel(dns_dispentry_t **respp); -/*%< - * Cancel all pending connects and reads in a dispatch entry, - * then call dns_dispatch_done(). This is used when the caller - * cancels a dispatch response before it has completed. * * Requires: *\li "resp" != NULL and "*resp" contain a value previously allocated diff --git a/lib/dns/request.c b/lib/dns/request.c index 075ac10d81..e52039107a 100644 --- a/lib/dns/request.c +++ b/lib/dns/request.c @@ -400,20 +400,18 @@ isblackholed(dns_dispatchmgr_t *dispatchmgr, const isc_sockaddr_t *destaddr) { static isc_result_t tcp_dispatch(bool newtcp, dns_requestmgr_t *requestmgr, const isc_sockaddr_t *srcaddr, const isc_sockaddr_t *destaddr, - isc_dscp_t dscp, bool *connected, dns_dispatch_t **dispatchp) { + isc_dscp_t dscp, dns_dispatch_t **dispatchp) { isc_result_t result; if (!newtcp) { result = dns_dispatch_gettcp(requestmgr->dispatchmgr, destaddr, - srcaddr, connected, dispatchp); + srcaddr, dispatchp); if (result == ISC_R_SUCCESS) { char peer[ISC_SOCKADDR_FORMATSIZE]; isc_sockaddr_format(destaddr, peer, sizeof(peer)); req_log(ISC_LOG_DEBUG(1), - "attached to %s TCP " - "connection to %s", - *connected ? "existing" : "pending", peer); + "attached to TCP connection to %s", peer); return (result); } } @@ -455,12 +453,12 @@ udp_dispatch(dns_requestmgr_t *requestmgr, const isc_sockaddr_t *srcaddr, static isc_result_t get_dispatch(bool tcp, bool newtcp, dns_requestmgr_t *requestmgr, const isc_sockaddr_t *srcaddr, const isc_sockaddr_t *destaddr, - isc_dscp_t dscp, bool *connected, dns_dispatch_t **dispatchp) { + isc_dscp_t dscp, dns_dispatch_t **dispatchp) { isc_result_t result; if (tcp) { result = tcp_dispatch(newtcp, requestmgr, srcaddr, destaddr, - dscp, connected, dispatchp); + dscp, dispatchp); } else { result = udp_dispatch(requestmgr, srcaddr, destaddr, dispatchp); } @@ -482,7 +480,6 @@ dns_request_createraw(dns_requestmgr_t *requestmgr, isc_buffer_t *msgbuf, bool tcp = false; bool newtcp = false; isc_region_t r; - bool connected = false; unsigned int dispopt = 0; REQUIRE(VALID_REQUESTMGR(requestmgr)); @@ -556,7 +553,7 @@ dns_request_createraw(dns_requestmgr_t *requestmgr, isc_buffer_t *msgbuf, again: result = get_dispatch(tcp, newtcp, requestmgr, srcaddr, destaddr, dscp, - &connected, &request->dispatch); + &request->dispatch); if (result != ISC_R_SUCCESS) { goto detach; } @@ -573,7 +570,6 @@ again: if (result != ISC_R_SUCCESS) { if ((options & DNS_REQUESTOPT_FIXEDID) != 0 && !newtcp) { newtcp = true; - connected = false; dns_dispatch_detach(&request->dispatch); goto again; } @@ -593,21 +589,15 @@ again: UNLOCK(&requestmgr->lock); request->destaddr = *destaddr; - if (tcp && connected) { - req_send(request); - /* no need to call req_connected(), detach here */ - req_detach(&(dns_request_t *){ request }); - } else { - request->flags |= DNS_REQUEST_F_CONNECTING; - if (tcp) { - request->flags |= DNS_REQUEST_F_TCP; - } + request->flags |= DNS_REQUEST_F_CONNECTING; + if (tcp) { + request->flags |= DNS_REQUEST_F_TCP; + } - result = dns_dispatch_connect(request->dispentry); - if (result != ISC_R_SUCCESS) { - goto unlink; - } + result = dns_dispatch_connect(request->dispentry); + if (result != ISC_R_SUCCESS) { + goto unlink; } req_log(ISC_LOG_DEBUG(3), "dns_request_createraw: request %p", request); @@ -718,7 +708,7 @@ dns_request_create(dns_requestmgr_t *requestmgr, dns_message_t *message, again: result = get_dispatch(tcp, false, requestmgr, srcaddr, destaddr, dscp, - &connected, &request->dispatch); + &request->dispatch); if (result != ISC_R_SUCCESS) { goto detach; } @@ -903,7 +893,7 @@ request_cancel(dns_request_t *request) { request->flags &= ~DNS_REQUEST_F_CONNECTING; if (request->dispentry != NULL) { - dns_dispatch_cancel(&request->dispentry); + dns_dispatch_done(&request->dispentry); } dns_dispatch_detach(&request->dispatch); @@ -1061,13 +1051,13 @@ static void req_response(isc_result_t result, isc_region_t *region, void *arg) { dns_request_t *request = (dns_request_t *)arg; - req_log(ISC_LOG_DEBUG(3), "req_response: request %p: %s", request, - isc_result_totext(result)); - if (result == ISC_R_CANCELED) { return; } + req_log(ISC_LOG_DEBUG(3), "req_response: request %p: %s", request, + isc_result_totext(result)); + if (result == ISC_R_TIMEDOUT) { LOCK(&request->requestmgr->locks[request->hash]); if (request->udpcount != 0) { diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index 7ab22e7869..6f95acd9b2 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -1499,7 +1499,7 @@ fctx_cancelquery(resquery_t **queryp, isc_time_t *finish, bool no_response, * exist, cancel them. */ if (query->dispentry != NULL) { - dns_dispatch_cancel(&query->dispentry); + dns_dispatch_done(&query->dispentry); } LOCK(&fctx->res->buckets[fctx->bucketnum].lock); From 3edccaf0c7477cbc5f0589dd36323687e428694c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ondr=CC=8Cej=20Sury=CC=81?= Date: Mon, 19 Dec 2022 14:26:31 +0100 Subject: [PATCH 3/8] Ignore TCP dispatches in DNS_DISPATCHSTATE_NONE state The TCP dispatches in DNS_DISPATCHSTATE_NONE could be either very fresh or those could be dispatches that failed connecting to the destination. Ignore them when trying to connect to an existing TCP dispatch via dns_dispatch_gettcp(). --- lib/dns/dispatch.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index faa52fdc2b..97e7fc9191 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -1238,11 +1238,13 @@ dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, isc_sockaddr_eqaddr(localaddr, &sockname))) { switch (disp->state) { + case DNS_DISPATCHSTATE_NONE: + /* Dispatch in indeterminate state */ + break; case DNS_DISPATCHSTATE_CONNECTED: /* We found a connected dispatch */ dns_dispatch_attach(disp, &disp_connected); break; - case DNS_DISPATCHSTATE_NONE: case DNS_DISPATCHSTATE_CONNECTING: /* We found "a" dispatch, store it for later */ if (disp_fallback == NULL) { From b714033731027e78e3140e398b0fd3fec536f270 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ondr=CC=8Cej=20Sury=CC=81?= Date: Mon, 19 Dec 2022 17:28:39 +0100 Subject: [PATCH 4/8] Don't remove dispatches in CANCELED state from the list In dns_dispatch_gettcp(), we can't remove canceled dispatches from the mgr->list because ISC_LIST_NEXT() would fail in the next iteration. --- lib/dns/dispatch.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index 97e7fc9191..cf426f32eb 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -1239,7 +1239,7 @@ dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, { switch (disp->state) { case DNS_DISPATCHSTATE_NONE: - /* Dispatch in indeterminate state */ + /* Dispatch in indeterminate state, skip it */ break; case DNS_DISPATCHSTATE_CONNECTED: /* We found a connected dispatch */ @@ -1254,10 +1254,8 @@ dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, break; case DNS_DISPATCHSTATE_CANCELED: /* - * We found a canceled dispatch, help its - * removal from the list, and skip it. + * We found a canceled dispatch, skip it. */ - ISC_LIST_UNLINK(disp->mgr->list, disp, link); break; default: UNREACHABLE(); From 3f4970da1a915d3ba83830abf5e4bf6e576a3cbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ondr=CC=8Cej=20Sury=CC=81?= Date: Mon, 19 Dec 2022 18:17:46 +0100 Subject: [PATCH 5/8] Fix assignment vs comparison typo in tcp_connected() In tcp_connected() a typo has turned a DbC check into an assignment breaking the state machine and making the dns_dispatch_gettcp() try to attach to dispatch in process of destruction. --- lib/dns/dispatch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index cf426f32eb..3fbdb7f148 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -1842,7 +1842,7 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { } LOCK(&disp->lock); - INSIST(disp->state = DNS_DISPATCHSTATE_CONNECTING); + INSIST(disp->state == DNS_DISPATCHSTATE_CONNECTING); if (eresult == ISC_R_SUCCESS) { disp->state = DNS_DISPATCHSTATE_CONNECTED; From 87ad3ecaf0a9a3bc0df91f4ba7a6c185ea4fe248 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Tue, 20 Dec 2022 06:11:26 +0100 Subject: [PATCH 6/8] Ignore TCP dispatches that have zero references The TCP dispatches are removed from the dispatchmgr->list in the dispatch_destroy() and there's a brief period of time where dns_dispatch_gettcp() can find a dispatch in connected state that's being destroyed. Set the dispatch state to DNS_DISPATCHSTATE_NONE in the TCP connection callback if there are no responses waiting, and ignore TCP dispatches with zero references in dns_dispatch_gettcp(). (cherry picked from commit 3fac4ca57ee5d38b220a77e8089ac7e4bdb8cef2) --- lib/dns/dispatch.c | 64 +++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index 3fbdb7f148..af796eebd3 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -738,6 +738,7 @@ tcp_recv_shutdown(dns_dispatch_t *disp, dns_displist_t *resps, next = ISC_LIST_NEXT(resp, alink); tcp_recv_add(resps, resp, result); } + disp->state = DNS_DISPATCHSTATE_CANCELED; } static void @@ -1232,34 +1233,42 @@ dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, * 2. destination address is same * 3. local address is either NULL or same */ - if (disp->socktype == isc_socktype_tcp && - isc_sockaddr_equal(destaddr, &peeraddr) && - (localaddr == NULL || - isc_sockaddr_eqaddr(localaddr, &sockname))) + if (disp->socktype != isc_socktype_tcp || + !isc_sockaddr_equal(destaddr, &peeraddr) || + (localaddr != NULL && + !isc_sockaddr_eqaddr(localaddr, &sockname))) { - switch (disp->state) { - case DNS_DISPATCHSTATE_NONE: - /* Dispatch in indeterminate state, skip it */ + UNLOCK(&disp->lock); + continue; + } + + switch (disp->state) { + case DNS_DISPATCHSTATE_NONE: + /* A dispatch in indeterminate state, skip it */ + break; + case DNS_DISPATCHSTATE_CONNECTED: + if (ISC_LIST_EMPTY(disp->active)) { + /* Ignore dispatch with no responses */ break; - case DNS_DISPATCHSTATE_CONNECTED: - /* We found a connected dispatch */ - dns_dispatch_attach(disp, &disp_connected); - break; - case DNS_DISPATCHSTATE_CONNECTING: - /* We found "a" dispatch, store it for later */ - if (disp_fallback == NULL) { - dns_dispatch_attach(disp, - &disp_fallback); - } - break; - case DNS_DISPATCHSTATE_CANCELED: - /* - * We found a canceled dispatch, skip it. - */ - break; - default: - UNREACHABLE(); } + /* We found a connected dispatch */ + dns_dispatch_attach(disp, &disp_connected); + break; + case DNS_DISPATCHSTATE_CONNECTING: + if (ISC_LIST_EMPTY(disp->pending)) { + /* Ignore dispatch with no responses */ + break; + } + /* We found "a" dispatch, store it for later */ + if (disp_fallback == NULL) { + dns_dispatch_attach(disp, &disp_fallback); + } + break; + case DNS_DISPATCHSTATE_CANCELED: + /* A canceled dispatch, skip it. */ + break; + default: + UNREACHABLE(); } UNLOCK(&disp->lock); @@ -1844,7 +1853,10 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { LOCK(&disp->lock); INSIST(disp->state == DNS_DISPATCHSTATE_CONNECTING); - if (eresult == ISC_R_SUCCESS) { + if (ISC_LIST_EMPTY(disp->pending)) { + /* All responses have been canceled */ + disp->state = DNS_DISPATCHSTATE_CANCELED; + } else if (eresult == ISC_R_SUCCESS) { disp->state = DNS_DISPATCHSTATE_CONNECTED; tcp_startrecv(handle, disp, resp); } else { From be3cf85cfe8b22b6e0fe75ed139d0df021b950c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Tue, 20 Dec 2022 08:39:36 +0100 Subject: [PATCH 7/8] Call the connected dns_dispatch callback asynchronously The dns_request code is very sensitive about calling the connected and deadlocks when the timing is "right" in several places. Move the call to the connected callback to the (udp|tcp)_connected() functions, so they are called asynchronously instead of directly from the (udp|tcp)_dispentry_cancel() functions. (cherry picked from commit 9dd8deaf018cf7f93dc1d2ba08cc35764e76cc0f) --- lib/dns/dispatch.c | 85 ++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 49 deletions(-) diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index af796eebd3..31b80c329a 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -1574,7 +1574,6 @@ udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { dns_dispatch_t *disp = resp->disp; dns_dispatchmgr_t *mgr = disp->mgr; dns_qid_t *qid = mgr->qid; - dispatch_cb_t connected = NULL; dispatch_cb_t response = NULL; LOCK(&disp->lock); @@ -1596,9 +1595,6 @@ udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { break; case DNS_DISPATCHSTATE_CONNECTING: - dns_dispentry_ref(resp); /* DISPENTRY008 */ - ISC_LIST_UNLINK(disp->pending, resp, plink); - connected = resp->connected; break; case DNS_DISPATCHSTATE_CONNECTED: @@ -1629,12 +1625,6 @@ udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { unlock: UNLOCK(&disp->lock); - if (connected) { - dispentry_log(resp, LVL(90), "connect callback: %s", - isc_result_totext(result)); - connected(result, NULL, resp->arg); - dns_dispentry_detach(&resp); /* DISPENTRY008 */ - } if (response) { dispentry_log(resp, LVL(90), "read callback: %s", isc_result_totext(result)); @@ -1652,7 +1642,6 @@ tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { dns_dispatch_t *disp = resp->disp; dns_dispatchmgr_t *mgr = disp->mgr; dns_qid_t *qid = mgr->qid; - dispatch_cb_t connected = NULL; dns_displist_t resps = ISC_LIST_INITIALIZER; LOCK(&disp->lock); @@ -1670,8 +1659,6 @@ tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { break; case DNS_DISPATCHSTATE_CONNECTING: - ISC_LIST_UNLINK(disp->pending, resp, plink); - connected = resp->connected; break; case DNS_DISPATCHSTATE_CONNECTED: @@ -1739,20 +1726,13 @@ unlock: UNLOCK(&disp->lock); /* - * NOTE: Calling the connected and response callbacks directly from here - * should be done asynchronously, as the dns_dispatch_done() is usually - * called directly from the response callback, so there's a slight - * chance that the call stack will get higher here, but it's mitigated - * by the ".reading" flag, so we don't ever go into a loop. + * NOTE: Calling the response callback directly from here should be done + * asynchronously, as the dns_dispatch_done() is usually called directly + * from the response callback, so there's a slight chance that the call + * stack will get higher here, but it's mitigated by the ".reading" + * flag, so we don't ever go into a loop. */ - if (connected) { - dispentry_log(resp, LVL(90), "connect callback: %s", - isc_result_totext(result)); - connected(result, NULL, resp->arg); - dns_dispentry_detach(&resp); /* DISPENTRY005 */ - } - tcp_recv_processall(&resps, NULL); } @@ -1853,7 +1833,29 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { LOCK(&disp->lock); INSIST(disp->state == DNS_DISPATCHSTATE_CONNECTING); - if (ISC_LIST_EMPTY(disp->pending)) { + /* + * If there are pending responses, call the connect + * callbacks for all of them. + */ + for (resp = ISC_LIST_HEAD(disp->pending); resp != NULL; resp = next) { + next = ISC_LIST_NEXT(resp, plink); + ISC_LIST_UNLINK(disp->pending, resp, plink); + ISC_LIST_APPEND(resps, resp, rlink); + resp->result = eresult; + + if (resp->state == DNS_DISPATCHSTATE_CANCELED) { + resp->result = ISC_R_CANCELED; + } else if (eresult == ISC_R_SUCCESS) { + resp->state = DNS_DISPATCHSTATE_CONNECTED; + ISC_LIST_APPEND(disp->active, resp, alink); + resp->reading = true; + dispentry_log(resp, LVL(90), "start reading"); + } else { + resp->state = DNS_DISPATCHSTATE_NONE; + } + } + + if (ISC_LIST_EMPTY(disp->active)) { /* All responses have been canceled */ disp->state = DNS_DISPATCHSTATE_CANCELED; } else if (eresult == ISC_R_SUCCESS) { @@ -1863,24 +1865,6 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { disp->state = DNS_DISPATCHSTATE_NONE; } - /* - * If there are pending responses, call the connect - * callbacks for all of them. - */ - for (resp = ISC_LIST_HEAD(disp->pending); resp != NULL; resp = next) { - next = ISC_LIST_NEXT(resp, plink); - ISC_LIST_UNLINK(disp->pending, resp, plink); - ISC_LIST_APPEND(resps, resp, rlink); - - if (eresult == ISC_R_SUCCESS) { - resp->state = DNS_DISPATCHSTATE_CONNECTED; - ISC_LIST_APPEND(disp->active, resp, alink); - resp->reading = true; - dispentry_log(resp, LVL(90), "start reading"); - } else { - resp->state = DNS_DISPATCHSTATE_NONE; - } - } UNLOCK(&disp->lock); for (resp = ISC_LIST_HEAD(resps); resp != NULL; resp = next) { @@ -1888,8 +1872,8 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { ISC_LIST_UNLINK(resps, resp, rlink); dispentry_log(resp, LVL(90), "connect callback: %s", - isc_result_totext(eresult)); - resp->connected(eresult, NULL, resp->arg); + isc_result_totext(resp->result)); + resp->connected(resp->result, NULL, resp->arg); dns_dispentry_detach(&resp); /* DISPENTRY005 */ } @@ -1911,8 +1895,9 @@ udp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { switch (resp->state) { case DNS_DISPATCHSTATE_CANCELED: - UNLOCK(&disp->lock); - goto detach; + eresult = ISC_R_CANCELED; + ISC_LIST_UNLINK(disp->pending, resp, plink); + goto unlock; case DNS_DISPATCHSTATE_CONNECTING: ISC_LIST_UNLINK(disp->pending, resp, plink); break; @@ -1921,6 +1906,8 @@ udp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { } switch (eresult) { + case ISC_R_CANCELED: + break; case ISC_R_SUCCESS: resp->state = DNS_DISPATCHSTATE_CONNECTED; udp_startrecv(handle, resp); @@ -1943,7 +1930,7 @@ udp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { resp->state = DNS_DISPATCHSTATE_NONE; break; } - +unlock: UNLOCK(&disp->lock); dispentry_log(resp, LVL(90), "connect callback: %s", From 3492dc2f0e41133edacec4b26c8ec4f3f3ae722b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Wed, 30 Nov 2022 18:48:06 +0100 Subject: [PATCH 8/8] Add CHANGES and release note for [GL #3178] and [GL #3636] (cherry picked from commit 2df311eb214baa999badb5d425e9bc349316ba6d) --- CHANGES | 3 +++ doc/notes/notes-current.rst | 3 +++ 2 files changed, 6 insertions(+) diff --git a/CHANGES b/CHANGES index d425956fb3..f713777143 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,6 @@ +6051. [bug] Improve thread safety in the dns_dispatch unit. + [GL #3178] [GL #3636] + 6050. [bug] Changes to the RPZ response-policy min-update-interval and add-soa options now take effect as expected when named is reconfigured. [GL #3740] diff --git a/doc/notes/notes-current.rst b/doc/notes/notes-current.rst index 7b925d0526..0f4f0e8c01 100644 --- a/doc/notes/notes-current.rst +++ b/doc/notes/notes-current.rst @@ -49,6 +49,9 @@ Bug Fixes structures used for "housekeeping") and exclude recently used (<= 10 seconds) ADB names and entries from the overmem memory cleaner. :gl:`#3739` +- Fix a rare assertion failure in the outgoing TCP DNS connection handling. + :gl:`#3178` :gl:`#3636` + Known Issues ~~~~~~~~~~~~