diff --git a/bin/tests/system/cookie/tests.sh b/bin/tests/system/cookie/tests.sh index 72bd193aa5..f8f2e73fa5 100755 --- a/bin/tests/system/cookie/tests.sh +++ b/bin/tests/system/cookie/tests.sh @@ -221,7 +221,7 @@ nextpart ns8/named.run > /dev/null $DIG $DIGOPTS +cookie soa from-no-cookie-server.example @10.53.0.8 > dig.out.test$n grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 wait_for_log_peek 3 "missing required cookie from 10.53.0.7#" ns8/named.run || ret=1 -wait_for_log_peek 3 "TCP connected" ns8/named.run || ret=1 +wait_for_log_peek 3 "connected from" ns8/named.run || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=`expr $status + $ret` diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index 246d0acdd8..fc326970f3 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -72,11 +72,19 @@ struct dns_dispatchmgr { unsigned int nv6ports; /*%< # of available ports for IPv4 */ }; +typedef enum { + DNS_DISPATCHSTATE_NONE = 0UL, + DNS_DISPATCHSTATE_CONNECTING, + DNS_DISPATCHSTATE_CONNECTED, + DNS_DISPATCHSTATE_CANCELED, +} dns_dispatchstate_t; + struct dns_dispentry { unsigned int magic; isc_refcount_t references; dns_dispatch_t *disp; isc_nmhandle_t *handle; /*%< netmgr handle for UDP connection */ + dns_dispatchstate_t state; dns_transport_t *transport; isc_tlsctx_cache_t *tlsctx_cache; unsigned int bucket; @@ -91,26 +99,14 @@ struct dns_dispentry { dispatch_cb_t sent; dispatch_cb_t response; void *arg; - bool canceled; + bool reading; + isc_result_t result; ISC_LINK(dns_dispentry_t) link; ISC_LINK(dns_dispentry_t) alink; ISC_LINK(dns_dispentry_t) plink; ISC_LINK(dns_dispentry_t) rlink; }; -/*% - * Fixed UDP buffer size. - */ -#ifndef DNS_DISPATCH_UDPBUFSIZE -#define DNS_DISPATCH_UDPBUFSIZE 4096 -#endif /* ifndef DNS_DISPATCH_UDPBUFSIZE */ - -typedef enum { - DNS_DISPATCHSTATE_NONE = 0UL, - DNS_DISPATCHSTATE_CONNECTING, - DNS_DISPATCHSTATE_CONNECTED -} dns_dispatchstate_t; - struct dns_dispatch { /* Unlocked. */ unsigned int magic; /*%< magic */ @@ -126,17 +122,15 @@ struct dns_dispatch { /* Locked by "lock". */ isc_mutex_t lock; /*%< locks all below */ isc_socktype_t socktype; - atomic_uint_fast32_t tcpstate; - atomic_bool tcpreading; + dns_dispatchstate_t state; isc_refcount_t references; - unsigned int shutdown_out : 1; + + bool reading; dns_displist_t pending; dns_displist_t active; - unsigned int nsockets; - unsigned int requests; /*%< how many requests we have */ - unsigned int tcpbuffers; /*%< allocated buffers */ + unsigned int requests; /*%< how many requests we have */ unsigned int timedout; }; @@ -156,26 +150,6 @@ struct dns_dispatch { #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r') #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC) -/*% - * Quota to control the number of UDP dispatch sockets. If a dispatch has - * more than the quota of sockets, new queries will purge oldest ones, so - * that a massive number of outstanding queries won't prevent subsequent - * queries (especially if the older ones take longer time and result in - * timeout). - */ -#ifndef DNS_DISPATCH_SOCKSQUOTA -#define DNS_DISPATCH_SOCKSQUOTA 3072 -#endif /* ifndef DNS_DISPATCH_SOCKSQUOTA */ - -/*% - * Quota to control the number of concurrent requests that can be handled - * by each TCP dispatch. (UDP dispatches do not currently support socket - * sharing.) - */ -#ifndef DNS_DISPATCH_MAXREQUESTS -#define DNS_DISPATCH_MAXREQUESTS 32768 -#endif /* ifndef DNS_DISPATCH_MAXREQUESTS */ - /*% * Number of buckets in the QID hash table, and the value to * increment the QID by when attempting to avoid collisions. @@ -189,6 +163,20 @@ struct dns_dispatch { #define DNS_QID_INCREMENT 16433 #endif /* ifndef DNS_QID_INCREMENT */ +#if DNS_DISPATCH_TRACE +#define dns_dispentry_ref(ptr) \ + dns_dispentry__ref(ptr, __func__, __FILE__, __LINE__) +#define dns_dispentry_unref(ptr) \ + dns_dispentry__unref(ptr, __func__, __FILE__, __LINE__) +#define dns_dispentry_attach(ptr, ptrp) \ + dns_dispentry__attach(ptr, ptrp, __func__, __FILE__, __LINE__) +#define dns_dispentry_detach(ptrp) \ + dns_dispentry__detach(ptrp, __func__, __FILE__, __LINE__) +ISC_REFCOUNT_TRACE_DECL(dns_dispentry); +#else +ISC_REFCOUNT_DECL(dns_dispentry); +#endif + /* * Statics. */ @@ -204,10 +192,13 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, static void tcp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, void *arg); +static void +tcp_recv_done(dns_dispentry_t *resp, isc_result_t eresult, + isc_region_t *region); static uint32_t dns_hash(dns_qid_t *, const isc_sockaddr_t *, dns_messageid_t, in_port_t); static void -dispatch_free(dns_dispatch_t **dispp); +dispentry_cancel(dns_dispentry_t *resp, isc_result_t result); static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, dns_dispatch_t **dispp); @@ -216,12 +207,62 @@ qid_allocate(dns_dispatchmgr_t *mgr, dns_qid_t **qidp); static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp); static void -startrecv(isc_nmhandle_t *handle, dns_dispatch_t *disp, dns_dispentry_t *resp); -void -dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp, int32_t timeout); +udp_startrecv(isc_nmhandle_t *handle, dns_dispentry_t *resp); +static void +tcp_startrecv(isc_nmhandle_t *handle, dns_dispatch_t *disp, + dns_dispentry_t *resp); +static void +tcp_dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp, + int32_t timeout); +static void +udp_dispatch_getnext(dns_dispentry_t *resp, int32_t timeout); #define LVL(x) ISC_LOG_DEBUG(x) +static const char * +socktype2str(dns_dispentry_t *resp) { + dns_transport_type_t transport_type = DNS_TRANSPORT_UDP; + dns_dispatch_t *disp = resp->disp; + + if (disp->socktype == isc_socktype_tcp) { + if (resp->transport != NULL) { + transport_type = + dns_transport_get_type(resp->transport); + } else { + transport_type = DNS_TRANSPORT_TCP; + } + } + + switch (transport_type) { + case DNS_TRANSPORT_UDP: + return ("UDP"); + case DNS_TRANSPORT_TCP: + return ("TCP"); + case DNS_TRANSPORT_TLS: + return ("TLS"); + case DNS_TRANSPORT_HTTP: + return ("HTTP"); + default: + return (""); + } +} + +static const char * +state2str(dns_dispatchstate_t state) { + switch (state) { + case DNS_DISPATCHSTATE_NONE: + return ("none"); + case DNS_DISPATCHSTATE_CONNECTING: + return ("connecting"); + case DNS_DISPATCHSTATE_CONNECTED: + return ("connected"); + case DNS_DISPATCHSTATE_CANCELED: + return ("canceled"); + default: + return (""); + } +} + static void mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) ISC_FORMAT_PRINTF(3, 4); @@ -266,13 +307,20 @@ static void dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) { char msgbuf[2048]; va_list ap; + int r; if (!isc_log_wouldlog(dns_lctx, level)) { return; } va_start(ap, fmt); - vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); + r = vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); + if (r < 0) { + msgbuf[0] = '\0'; + } else if ((unsigned int)r >= sizeof(msgbuf)) { + /* Truncated */ + msgbuf[sizeof(msgbuf) - 1] = '\0'; + } va_end(ap); isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, @@ -280,6 +328,34 @@ dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) { msgbuf); } +static void +dispentry_log(dns_dispentry_t *resp, int level, const char *fmt, ...) + ISC_FORMAT_PRINTF(3, 4); + +static void +dispentry_log(dns_dispentry_t *resp, int level, const char *fmt, ...) { + char msgbuf[2048]; + va_list ap; + int r; + + if (!isc_log_wouldlog(dns_lctx, level)) { + return; + } + + va_start(ap, fmt); + r = vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); + if (r < 0) { + msgbuf[0] = '\0'; + } else if ((unsigned int)r >= sizeof(msgbuf)) { + /* Truncated */ + msgbuf[sizeof(msgbuf) - 1] = '\0'; + } + va_end(ap); + + dispatch_log(resp->disp, level, "%s response %p: %s", + socktype2str(resp), resp, msgbuf); +} + /* * Return a hash of the destination and message id. */ @@ -324,8 +400,6 @@ setup_socket(dns_dispatch_t *disp, dns_dispentry_t *resp, return (ISC_R_ADDRNOTAVAIL); } - disp->nsockets++; - resp->local = disp->local; resp->peer = *dest; @@ -338,26 +412,6 @@ setup_socket(dns_dispatch_t *disp, dns_dispentry_t *resp, return (ISC_R_SUCCESS); } -/*% - * Deactivate the socket for a dispatch entry. - * The dispatch must be locked. - */ -static void -deactivate_dispentry(dns_dispatch_t *disp, dns_dispentry_t *resp) { - if (ISC_LINK_LINKED(resp, alink)) { - ISC_LIST_UNLINK(disp->active, resp, alink); - } - - if (resp->handle != NULL) { - INSIST(disp->socktype == isc_socktype_udp); - - isc_nm_cancelread(resp->handle); - isc_nmhandle_detach(&resp->handle); - } - - disp->nsockets--; -} - /* * Find an entry for query ID 'id', socket address 'dest', and port number * 'port'. @@ -385,30 +439,35 @@ entry_search(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id, return (NULL); } -static void -dispentry_attach(dns_dispentry_t *resp, dns_dispentry_t **respp) { - REQUIRE(VALID_RESPONSE(resp)); - REQUIRE(respp != NULL && *respp == NULL); - - isc_refcount_increment(&resp->references); - - *respp = resp; -} - static void dispentry_destroy(dns_dispentry_t *resp) { dns_dispatch_t *disp = resp->disp; + /* + * We need to call this from here in case there's an external event that + * shuts down our dispatch (like ISC_R_SHUTTINGDOWN). + */ + dispentry_cancel(resp, ISC_R_CANCELED); + + LOCK(&disp->lock); + INSIST(disp->requests > 0); + disp->requests--; + UNLOCK(&disp->lock); + + isc_refcount_destroy(&resp->references); + resp->magic = 0; - if (ISC_LINK_LINKED(resp, plink)) { - ISC_LIST_UNLINK(disp->pending, resp, plink); - } - + INSIST(!ISC_LINK_LINKED(resp, link)); + INSIST(!ISC_LINK_LINKED(resp, plink)); INSIST(!ISC_LINK_LINKED(resp, alink)); INSIST(!ISC_LINK_LINKED(resp, rlink)); + dispentry_log(resp, LVL(90), "destroying"); + if (resp->handle != NULL) { + dispentry_log(resp, LVL(90), "detaching handle %p from %p", + resp->handle, &resp->handle); isc_nmhandle_detach(&resp->handle); } @@ -420,28 +479,16 @@ dispentry_destroy(dns_dispentry_t *resp) { dns_transport_detach(&resp->transport); } - isc_refcount_destroy(&resp->references); - isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp)); - dns_dispatch_detach(&disp); + dns_dispatch_detach(&disp); /* DISPATCH001 */ } -static void -dispentry_detach(dns_dispentry_t **respp) { - dns_dispentry_t *resp = NULL; - uint_fast32_t ref; - - REQUIRE(respp != NULL && VALID_RESPONSE(*respp)); - - resp = *respp; - *respp = NULL; - - ref = isc_refcount_decrement(&resp->references); - if (ref == 1) { - dispentry_destroy(resp); - } -} +#if DNS_DISPATCH_TRACE +ISC_REFCOUNT_TRACE_IMPL(dns_dispentry, dispentry_destroy); +#else +ISC_REFCOUNT_IMPL(dns_dispentry, dispentry_destroy); +#endif /* * How long in milliseconds has it been since this dispentry @@ -485,7 +532,7 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, unsigned int flags; isc_sockaddr_t peer; isc_netaddr_t netaddr; - int match, timeout; + int match, timeout = 0; dispatch_cb_t response = NULL; REQUIRE(VALID_RESPONSE(resp)); @@ -494,20 +541,24 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, disp = resp->disp; LOCK(&disp->lock); + INSIST(resp->reading); + resp->reading = false; - dispatch_log(disp, LVL(90), "UDP response %p:%s:requests %d", resp, - isc_result_totext(eresult), disp->requests); + response = resp->response; - /* - * The resp may have been deactivated by shutdown; if - * so, we can skip the response callback. - */ - if (ISC_LINK_LINKED(resp, alink)) { - response = resp->response; - } else if (eresult == ISC_R_SUCCESS) { + if (resp->state == DNS_DISPATCHSTATE_CANCELED) { + /* + * Nobody is interested in the callback if the response + * has been canceled already. Detach from the response + * and the handle. + */ + response = NULL; eresult = ISC_R_CANCELED; } + dispentry_log(resp, LVL(90), "read callback:%s, requests %d", + isc_result_totext(eresult), disp->requests); + if (eresult != ISC_R_SUCCESS) { /* * This is most likely a network error on a connected @@ -533,8 +584,8 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, char netaddrstr[ISC_NETADDR_FORMATSIZE]; isc_netaddr_format(&netaddr, netaddrstr, sizeof(netaddrstr)); - dispatch_log(disp, LVL(10), "blackholed packet from %s", - netaddrstr); + dispentry_log(resp, LVL(10), + "blackholed packet from %s", netaddrstr); } goto next; } @@ -547,13 +598,16 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, isc_buffer_add(&source, region->length); dres = dns_message_peekheader(&source, &id, &flags); if (dres != ISC_R_SUCCESS) { - dispatch_log(disp, LVL(10), "got garbage packet"); + char netaddrstr[ISC_NETADDR_FORMATSIZE]; + isc_netaddr_format(&netaddr, netaddrstr, sizeof(netaddrstr)); + dispentry_log(resp, LVL(10), "got garbage packet from %s", + netaddrstr); goto next; } - dispatch_log(disp, LVL(92), - "got valid DNS message header, /QR %c, id %u", - (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id); + dispentry_log(resp, LVL(92), + "got valid DNS message header, /QR %c, id %u", + (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id); /* * Look at the message flags. If it's a query, ignore it. @@ -566,7 +620,7 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, * The QID and the address must match the expected ones. */ if (resp->id != id || !isc_sockaddr_equal(&peer, &resp->peer)) { - dispatch_log(disp, LVL(90), "response doesn't match"); + dispentry_log(resp, LVL(90), "response doesn't match"); inc_stats(disp->mgr, dns_resstatscounter_mismatch); goto next; } @@ -597,26 +651,25 @@ next: * proper response to arrive until the original timeout fires. */ response = NULL; - dispatch_getnext(disp, resp, resp->timeout - dispentry_runtime(resp)); + udp_dispatch_getnext(resp, timeout); done: UNLOCK(&disp->lock); if (response != NULL) { + dispentry_log(resp, LVL(90), "UDP read callback on %p: %s", + handle, isc_result_totext(eresult)); response(eresult, region, resp->arg); } - dispentry_detach(&resp); + dns_dispentry_detach(&resp); /* DISPENTRY003 */ } static isc_result_t -tcp_recv_timeout(dns_dispatch_t *disp, dns_dispentry_t **respp) { - dns_dispentry_t *resp = ISC_LIST_HEAD(disp->active); +tcp_recv_oldest(dns_dispatch_t *disp, dns_dispentry_t **respp) { + dns_dispentry_t *resp = NULL; + resp = ISC_LIST_HEAD(disp->active); if (resp != NULL) { - dispentry_attach(resp, &(dns_dispentry_t *){ NULL }); - ISC_LIST_UNLINK(disp->active, resp, alink); - ISC_LIST_APPEND(disp->active, resp, alink); - disp->timedout++; *respp = resp; @@ -636,7 +689,7 @@ tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, dns_qid_t *qid, isc_result_t result = ISC_R_SUCCESS; dns_dispentry_t *resp = NULL; - dispatch_log(disp, LVL(90), "success, length == %d, addr = %p", + dispatch_log(disp, LVL(90), "TCP read success, length == %d, addr = %p", region->length, region->base); /* @@ -671,30 +724,15 @@ tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, dns_qid_t *qid, LOCK(&qid->lock); resp = entry_search(qid, peer, id, disp->localport, bucket); if (resp != NULL) { - dispentry_attach(resp, &(dns_dispentry_t *){ NULL }); - *respp = resp; - } else if (disp->timedout > 0) { - /* There was active query that timed-out before */ - disp->timedout--; - - resp = ISC_LIST_HEAD(disp->active); - if (resp != NULL) { - /* - * It's a DNS response, but didn't match any outstanding - * queries. It's possible we would have timed out by - * now, but non-matching responses prevented it, so we - * check the age of the oldest active resp. - */ - int timeout = resp->timeout - dispentry_runtime(resp); - if (timeout <= 0) { - result = tcp_recv_timeout(disp, respp); - } + if (resp->reading) { + *respp = resp; } else { - result = ISC_R_NOTFOUND; + /* We already got our DNS message. */ + result = ISC_R_UNEXPECTED; } } else { /* We are not expecting this DNS message */ - result = ISC_R_UNEXPECTED; + result = ISC_R_NOTFOUND; } dispatch_log(disp, LVL(90), "search for response in bucket %d: %s", bucket, isc_result_totext(result)); @@ -704,7 +742,19 @@ tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, dns_qid_t *qid, } static void -tcp_recv_shutdown(dns_dispatch_t *disp, dns_displist_t *resps) { +tcp_recv_add(dns_displist_t *resps, dns_dispentry_t *resp, + isc_result_t result) { + dns_dispentry_ref(resp); /* DISPENTRY009 */ + ISC_LIST_UNLINK(resp->disp->active, resp, alink); + ISC_LIST_APPEND(*resps, resp, rlink); + INSIST(resp->reading); + resp->reading = false; + resp->result = result; +} + +static void +tcp_recv_shutdown(dns_dispatch_t *disp, dns_displist_t *resps, + isc_result_t result) { dns_dispentry_t *resp = NULL, *next = NULL; /* @@ -712,29 +762,28 @@ tcp_recv_shutdown(dns_dispatch_t *disp, dns_displist_t *resps) { */ for (resp = ISC_LIST_HEAD(disp->active); resp != NULL; resp = next) { next = ISC_LIST_NEXT(resp, alink); - dispentry_attach(resp, &(dns_dispentry_t *){ NULL }); - ISC_LIST_UNLINK(disp->active, resp, alink); - ISC_LIST_APPEND(*resps, resp, rlink); + tcp_recv_add(resps, resp, result); } } static void tcp_recv_done(dns_dispentry_t *resp, isc_result_t eresult, isc_region_t *region) { + dispentry_log(resp, LVL(90), "read callback: %s", + isc_result_totext(eresult)); + resp->response(eresult, region, resp->arg); - dispentry_detach(&resp); + dns_dispentry_detach(&resp); /* DISPENTRY009 */ } static void -tcp_recv_cancelall(dns_displist_t *resps, isc_region_t *region, - isc_result_t result) { +tcp_recv_processall(dns_displist_t *resps, isc_region_t *region) { dns_dispentry_t *resp = NULL, *next = NULL; for (resp = ISC_LIST_HEAD(*resps); resp != NULL; resp = next) { next = ISC_LIST_NEXT(resp, rlink); ISC_LIST_UNLINK(*resps, resp, rlink); - resp->response(result, region, resp->arg); - dispentry_detach(&resp); + tcp_recv_done(resp, resp->result, region); } } @@ -744,8 +793,6 @@ tcp_recv_cancelall(dns_displist_t *resps, isc_region_t *region, * If I/O result == CANCELED, EOF, or error, notify everyone as the * various queues drain. * - * If query, restart. - * * If response: * Allocate event, fill in details. * If cannot allocate, restart. @@ -761,25 +808,82 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, dns_qid_t *qid = NULL; char buf[ISC_SOCKADDR_FORMATSIZE]; isc_sockaddr_t peer; - dns_displist_t resps; + dns_displist_t resps = ISC_LIST_INITIALIZER; REQUIRE(VALID_DISPATCH(disp)); - atomic_store(&disp->tcpreading, false); - qid = disp->mgr->qid; - ISC_LIST_INIT(resps); - LOCK(&disp->lock); + INSIST(disp->reading); + disp->reading = false; - dispatch_log(disp, LVL(90), "TCP read:%s:requests %d, buffers %d", - isc_result_totext(result), disp->requests, - disp->tcpbuffers); + dispatch_log(disp, LVL(90), "TCP read:%s:requests %u", + isc_result_totext(result), disp->requests); peer = isc_nmhandle_peeraddr(handle); + /* + * Phase 1: Process timeout and success. + */ switch (result) { + case ISC_R_TIMEDOUT: + /* + * Time out the oldest response in the active queue. + */ + result = tcp_recv_oldest(disp, &resp); + break; + case ISC_R_SUCCESS: + /* We got an answer */ + result = tcp_recv_success(disp, region, qid, &peer, &resp); + break; + + default: + break; + } + + if (resp != NULL) { + tcp_recv_add(&resps, resp, result); + } + + /* + * Phase 2: Look if we timed out before. + */ + + if (result == ISC_R_NOTFOUND) { + if (disp->timedout > 0) { + /* There was active query that timed-out before */ + disp->timedout--; + } else { + result = ISC_R_UNEXPECTED; + } + } + + /* + * Phase 3: Trigger timeouts. It's possible that the responses would + * have been timedout out already, but non-matching TCP reads have + * prevented this. + */ + dns_dispentry_t *next = NULL; + for (resp = ISC_LIST_HEAD(disp->active); resp != NULL; resp = next) { + next = ISC_LIST_NEXT(resp, alink); + + /* FIXME: dispentry_runtime is always 0 for TCP */ + int timeout = resp->timeout - dispentry_runtime(resp); + if (timeout <= 0) { + tcp_recv_add(&resps, resp, ISC_R_TIMEDOUT); + } + } + + /* + * Phase 4: log if we errored out. + */ + switch (result) { + case ISC_R_SUCCESS: + case ISC_R_TIMEDOUT: + case ISC_R_NOTFOUND: + break; + case ISC_R_SHUTTINGDOWN: case ISC_R_CANCELED: case ISC_R_EOF: @@ -787,68 +891,33 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, isc_sockaddr_format(&peer, buf, sizeof(buf)); dispatch_log(disp, LVL(90), "shutting down TCP: %s: %s", buf, isc_result_totext(result)); - tcp_recv_shutdown(disp, &resps); + tcp_recv_shutdown(disp, &resps, result); break; - - case ISC_R_TIMEDOUT: - /* - * Time out the oldest response in the active queue, - * and move it to the end. (We don't remove it from the - * active queue immediately, though, because the callback - * might decide to keep waiting and leave it active.) - */ - result = tcp_recv_timeout(disp, &resp); - break; - - case ISC_R_SUCCESS: - /* We got an answer */ - result = tcp_recv_success(disp, region, qid, &peer, &resp); - if (result != ISC_R_UNEXPECTED) { - /* - * It's a valid DNS response, which may or may not - * have matched an outstanding query. - */ - - dispatch_getnext(disp, NULL, -1); - break; - } - - /* Got an invalid DNS response, terminate the connection */ - FALLTHROUGH; default: isc_sockaddr_format(&peer, buf, sizeof(buf)); dispatch_log(disp, ISC_LOG_ERROR, "shutting down due to TCP " "receive error: %s: %s", buf, isc_result_totext(result)); - tcp_recv_shutdown(disp, &resps); + tcp_recv_shutdown(disp, &resps, result); break; } + /* + * Phase 5: Resume reading if there are still active responses + */ + if (!ISC_LIST_EMPTY(disp->active)) { + tcp_startrecv(NULL, disp, ISC_LIST_HEAD(disp->active)); + } + UNLOCK(&disp->lock); - switch (result) { - case ISC_R_SUCCESS: - case ISC_R_TIMEDOUT: - /* - * Either we found a matching response, or we timed out - * and canceled the oldest resp. - */ - INSIST(resp != NULL); - tcp_recv_done(resp, result, region); - break; - case ISC_R_NOTFOUND: - /* - * Either we got a response that didn't match any active - * resps, or we timed out but there *were* no active resps. - */ - break; - default: - /* We're being shut down; cancel all outstanding resps. */ - tcp_recv_cancelall(&resps, region, result); - } + /* + * Phase 6: Process all scheduled callbacks. + */ + tcp_recv_processall(&resps, region); - dns_dispatch_detach(&disp); + dns_dispatch_detach(&disp); /* DISPATCH002 */ } /*% @@ -931,6 +1000,10 @@ dns_dispatchmgr_create(isc_mem_t *mctx, isc_nm_t *nm, mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t)); *mgr = (dns_dispatchmgr_t){ .magic = 0 }; +#if DNS_DISPATCH_TRACE + fprintf(stderr, "dns_dispatchmgr__init:%s:%s:%d:%p->references = 1\n", + __func__, __FILE__, __LINE__, mgr); +#endif isc_refcount_init(&mgr->references, 1); isc_mem_attach(mctx, &mgr->mctx); @@ -955,31 +1028,11 @@ dns_dispatchmgr_create(isc_mem_t *mctx, isc_nm_t *nm, return (ISC_R_SUCCESS); } -void -dns_dispatchmgr_attach(dns_dispatchmgr_t *mgr, dns_dispatchmgr_t **mgrp) { - REQUIRE(VALID_DISPATCHMGR(mgr)); - REQUIRE(mgrp != NULL && *mgrp == NULL); - - isc_refcount_increment(&mgr->references); - - *mgrp = mgr; -} - -void -dns_dispatchmgr_detach(dns_dispatchmgr_t **mgrp) { - dns_dispatchmgr_t *mgr = NULL; - uint_fast32_t ref; - - REQUIRE(mgrp != NULL && VALID_DISPATCHMGR(*mgrp)); - - mgr = *mgrp; - *mgrp = NULL; - - ref = isc_refcount_decrement(&mgr->references); - if (ref == 1) { - dispatchmgr_destroy(mgr); - } -} +#if DNS_DISPATCH_TRACE +ISC_REFCOUNT_TRACE_IMPL(dns_dispatchmgr, dispatchmgr_destroy); +#else +ISC_REFCOUNT_IMPL(dns_dispatchmgr, dispatchmgr_destroy); +#endif void dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) { @@ -1101,46 +1154,25 @@ dispatch_allocate(dns_dispatchmgr_t *mgr, isc_socktype_t type, */ disp = isc_mem_get(mgr->mctx, sizeof(*disp)); - *disp = (dns_dispatch_t){ .socktype = type }; + *disp = (dns_dispatch_t){ + .socktype = type, + .link = ISC_LINK_INITIALIZER, + .active = ISC_LIST_INITIALIZER, + .pending = ISC_LIST_INITIALIZER, + .magic = DISPATCH_MAGIC, + }; dns_dispatchmgr_attach(mgr, &disp->mgr); - isc_refcount_init(&disp->references, 1); - ISC_LINK_INIT(disp, link); - ISC_LIST_INIT(disp->active); - ISC_LIST_INIT(disp->pending); - +#if DNS_DISPATCH_TRACE + fprintf(stderr, "dns_dispatch__init:%s:%s:%d:%p->references = 1\n", + __func__, __FILE__, __LINE__, disp); +#endif + isc_refcount_init(&disp->references, 1); /* DISPATCH000 */ isc_mutex_init(&disp->lock); - disp->magic = DISPATCH_MAGIC; - *dispp = disp; } -/* - * MUST be unlocked, and not used by anything. - */ -static void -dispatch_free(dns_dispatch_t **dispp) { - dns_dispatch_t *disp = NULL; - dns_dispatchmgr_t *mgr = NULL; - - REQUIRE(VALID_DISPATCH(*dispp)); - disp = *dispp; - *dispp = NULL; - - disp->magic = 0; - - mgr = disp->mgr; - REQUIRE(VALID_DISPATCHMGR(mgr)); - - INSIST(disp->requests == 0); - INSIST(ISC_LIST_EMPTY(disp->active)); - - isc_mutex_destroy(&disp->lock); - - isc_mem_put(mgr->mctx, disp, sizeof(*disp)); -} - isc_result_t dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, const isc_sockaddr_t *destaddr, isc_dscp_t dscp, @@ -1175,8 +1207,17 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, ISC_LIST_APPEND(mgr->list, disp, link); UNLOCK(&mgr->lock); - mgr_log(mgr, LVL(90), "dns_dispatch_createtcp: created TCP dispatch %p", - disp); + if (isc_log_wouldlog(dns_lctx, 90)) { + char addrbuf[ISC_SOCKADDR_FORMATSIZE]; + + isc_sockaddr_format(localaddr, addrbuf, + ISC_SOCKADDR_FORMATSIZE); + + mgr_log(mgr, LVL(90), + "dns_dispatch_createtcp: created TCP dispatch %p for " + "%s", + disp, addrbuf); + } *dispp = disp; return (ISC_R_SUCCESS); @@ -1184,15 +1225,13 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, isc_result_t dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, - const isc_sockaddr_t *localaddr, bool *connected, - dns_dispatch_t **dispp) { + const isc_sockaddr_t *localaddr, dns_dispatch_t **dispp) { dns_dispatch_t *disp_connected = NULL; dns_dispatch_t *disp_fallback = NULL; isc_result_t result = ISC_R_NOTFOUND; REQUIRE(VALID_DISPATCHMGR(mgr)); REQUIRE(destaddr != NULL); - REQUIRE(connected != NULL); REQUIRE(dispp != NULL && *dispp == NULL); LOCK(&mgr->lock); @@ -1224,40 +1263,52 @@ dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, (localaddr == NULL || isc_sockaddr_eqaddr(localaddr, &sockname))) { - if (atomic_load(&disp->tcpstate) == - DNS_DISPATCHSTATE_CONNECTED) - { - /* We found connected dispatch */ - disp_connected = disp; - UNLOCK(&disp->lock); + switch (disp->state) { + case DNS_DISPATCHSTATE_CONNECTED: + /* We found a connected dispatch */ + dns_dispatch_attach(disp, &disp_connected); break; + case DNS_DISPATCHSTATE_NONE: + case DNS_DISPATCHSTATE_CONNECTING: + /* We found "a" dispatch, store it for later */ + if (disp_fallback == NULL) { + dns_dispatch_attach(disp, + &disp_fallback); + } + break; + case DNS_DISPATCHSTATE_CANCELED: + /* + * We found a canceled dispatch, help its + * removal from the list, and skip it. + */ + ISC_LIST_UNLINK(disp->mgr->list, disp, link); + break; + default: + UNREACHABLE(); } - - /* We found "a" dispatch, store it for later */ - if (disp_fallback == NULL) { - disp_fallback = disp; - } - - UNLOCK(&disp->lock); - continue; } UNLOCK(&disp->lock); + + if (disp_connected != NULL) { + break; + } } if (disp_connected != NULL) { /* We found connected dispatch */ INSIST(disp_connected->handle != NULL); - *connected = true; - dns_dispatch_attach(disp_connected, dispp); + *dispp = disp_connected; + disp_connected = NULL; result = ISC_R_SUCCESS; - } else if (disp_fallback != NULL) { - /* We found matching dispatch */ - *connected = false; - dns_dispatch_attach(disp_fallback, dispp); + if (disp_fallback != NULL) { + dns_dispatch_detach(&disp_fallback); + } + } else if (disp_fallback != NULL) { + *dispp = disp_fallback; result = ISC_R_SUCCESS; } @@ -1294,8 +1345,6 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, dns_dispatch_t *disp = NULL; isc_sockaddr_t sa_any; - dispatch_allocate(mgr, isc_socktype_udp, &disp); - /* * Check whether this address/port is available locally. */ @@ -1303,58 +1352,66 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) { result = isc_nm_checkaddr(localaddr, isc_socktype_udp); if (result != ISC_R_SUCCESS) { - goto cleanup; + return (result); } } + dispatch_allocate(mgr, isc_socktype_udp, &disp); + if (isc_log_wouldlog(dns_lctx, 90)) { char addrbuf[ISC_SOCKADDR_FORMATSIZE]; isc_sockaddr_format(localaddr, addrbuf, ISC_SOCKADDR_FORMATSIZE); mgr_log(mgr, LVL(90), - "dispatch_createudp: created UDP dispatch for %s", - addrbuf); + "dispatch_createudp: created UDP dispatch %p for %s", + disp, addrbuf); } disp->local = *localaddr; /* - * Append it to the dispatcher list. + * Don't append it to the dispatcher list, we don't care about UDP, only + * TCP should be searched + * + * ISC_LIST_APPEND(mgr->list, disp, link); */ - ISC_LIST_APPEND(mgr->list, disp, link); - - mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp); *dispp = disp; return (result); - - /* - * Error returns. - */ -cleanup: - dispatch_free(&disp); - - return (result); } static void dispatch_destroy(dns_dispatch_t *disp) { dns_dispatchmgr_t *mgr = disp->mgr; + isc_refcount_destroy(&disp->references); + disp->magic = 0; + LOCK(&mgr->lock); - ISC_LIST_UNLINK(mgr->list, disp, link); + if (ISC_LINK_LINKED(disp, link)) { + ISC_LIST_UNLINK(disp->mgr->list, disp, link); + } UNLOCK(&mgr->lock); - dispatch_log(disp, LVL(90), "shutting down; detaching from handle %p", - disp->handle); + INSIST(disp->requests == 0); + INSIST(ISC_LIST_EMPTY(disp->pending)); + INSIST(ISC_LIST_EMPTY(disp->active)); - if (disp->handle != NULL) { + INSIST(!ISC_LINK_LINKED(disp, link)); + + dispatch_log(disp, LVL(90), "destroying dispatch %p", disp); + + if (disp->handle) { + dispatch_log(disp, LVL(90), "detaching TCP handle %p from %p", + disp->handle, &disp->handle); isc_nmhandle_detach(&disp->handle); } - dispatch_free(&disp); + isc_mutex_destroy(&disp->lock); + + isc_mem_put(mgr->mctx, disp, sizeof(*disp)); /* * Because dispatch uses mgr->mctx, we must detach after freeing @@ -1363,37 +1420,11 @@ dispatch_destroy(dns_dispatch_t *disp) { dns_dispatchmgr_detach(&mgr); } -void -dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) { - REQUIRE(VALID_DISPATCH(disp)); - REQUIRE(dispp != NULL && *dispp == NULL); - - isc_refcount_increment(&disp->references); - - *dispp = disp; -} - -void -dns_dispatch_detach(dns_dispatch_t **dispp) { - dns_dispatch_t *disp = NULL; - uint_fast32_t ref; - - REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp)); - - disp = *dispp; - *dispp = NULL; - - ref = isc_refcount_decrement(&disp->references); - dispatch_log(disp, LVL(90), "detach: refcount %" PRIuFAST32, ref - 1); - if (ref == 1) { - LOCK(&disp->lock); - INSIST(ISC_LIST_EMPTY(disp->pending)); - INSIST(ISC_LIST_EMPTY(disp->active)); - UNLOCK(&disp->lock); - - dispatch_destroy(disp); - } -} +#if DNS_DISPATCH_TRACE +ISC_REFCOUNT_TRACE_IMPL(dns_dispatch, dispatch_destroy); +#else +ISC_REFCOUNT_IMPL(dns_dispatch, dispatch_destroy); +#endif isc_result_t dns_dispatch_add(dns_dispatch_t *disp, unsigned int options, @@ -1401,69 +1432,62 @@ dns_dispatch_add(dns_dispatch_t *disp, unsigned int options, dns_transport_t *transport, isc_tlsctx_cache_t *tlsctx_cache, dispatch_cb_t connected, dispatch_cb_t sent, dispatch_cb_t response, void *arg, dns_messageid_t *idp, - dns_dispentry_t **resp) { - dns_dispentry_t *res = NULL; + dns_dispentry_t **respp) { + dns_dispentry_t *resp = NULL; dns_qid_t *qid = NULL; in_port_t localport = 0; dns_messageid_t id; unsigned int bucket; bool ok = false; int i = 0; - dispatch_cb_t oldest_response = NULL; REQUIRE(VALID_DISPATCH(disp)); REQUIRE(dest != NULL); - REQUIRE(resp != NULL && *resp == NULL); + REQUIRE(respp != NULL && *respp == NULL); REQUIRE(idp != NULL); REQUIRE(disp->socktype == isc_socktype_tcp || disp->socktype == isc_socktype_udp); + REQUIRE(connected != NULL); + REQUIRE(response != NULL); + REQUIRE(sent != NULL); LOCK(&disp->lock); - if (disp->requests >= DNS_DISPATCH_MAXREQUESTS) { + if (disp->state == DNS_DISPATCHSTATE_CANCELED) { UNLOCK(&disp->lock); - return (ISC_R_QUOTA); + return (ISC_R_CANCELED); } qid = disp->mgr->qid; - if (disp->socktype == isc_socktype_udp && - disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) - { - dns_dispentry_t *oldest = NULL; + resp = isc_mem_get(disp->mgr->mctx, sizeof(*resp)); - /* - * Kill oldest outstanding query if the number of sockets - * exceeds the quota to keep the room for new queries. - */ - oldest = ISC_LIST_HEAD(disp->active); - if (oldest != NULL) { - oldest_response = oldest->response; - inc_stats(disp->mgr, dns_resstatscounter_dispabort); - } - } + *resp = (dns_dispentry_t){ + .port = localport, + .timeout = timeout, + .peer = *dest, + .connected = connected, + .sent = sent, + .response = response, + .arg = arg, + .link = ISC_LINK_INITIALIZER, + .alink = ISC_LINK_INITIALIZER, + .plink = ISC_LINK_INITIALIZER, + .rlink = ISC_LINK_INITIALIZER, + .magic = RESPONSE_MAGIC, + }; - res = isc_mem_get(disp->mgr->mctx, sizeof(*res)); - - *res = (dns_dispentry_t){ .port = localport, - .timeout = timeout, - .peer = *dest, - .connected = connected, - .sent = sent, - .response = response, - .arg = arg }; - - isc_refcount_init(&res->references, 1); - - ISC_LINK_INIT(res, link); - ISC_LINK_INIT(res, alink); - ISC_LINK_INIT(res, plink); - ISC_LINK_INIT(res, rlink); +#if DNS_DISPATCH_TRACE + fprintf(stderr, "dns_dispentry__init:%s:%s:%d:%p->references = 1\n", + __func__, __FILE__, __LINE__, res); +#endif + isc_refcount_init(&resp->references, 1); /* DISPENTRY000 */ if (disp->socktype == isc_socktype_udp) { - isc_result_t result = setup_socket(disp, res, dest, &localport); + isc_result_t result = setup_socket(disp, resp, dest, + &localport); if (result != ISC_R_SUCCESS) { - isc_mem_put(disp->mgr->mctx, res, sizeof(*res)); + isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp)); UNLOCK(&disp->lock); inc_stats(disp->mgr, dns_resstatscounter_dispsockfail); return (result); @@ -1490,347 +1514,486 @@ dns_dispatch_add(dns_dispatch_t *disp, unsigned int options, ok = true; break; } + if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) { + /* When using fixed ID, we either must use it or fail */ + break; + } id += qid->qid_increment; id &= 0x0000ffff; } while (i++ < 64); + + if (ok) { + resp->id = id; + resp->bucket = bucket; + ISC_LIST_APPEND(qid->qid_table[bucket], resp, link); + } UNLOCK(&qid->lock); if (!ok) { - isc_mem_put(disp->mgr->mctx, res, sizeof(*res)); + isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp)); UNLOCK(&disp->lock); return (ISC_R_NOMORE); } if (transport != NULL) { - dns_transport_attach(transport, &res->transport); + dns_transport_attach(transport, &resp->transport); } if (tlsctx_cache != NULL) { - isc_tlsctx_cache_attach(tlsctx_cache, &res->tlsctx_cache); + isc_tlsctx_cache_attach(tlsctx_cache, &resp->tlsctx_cache); } - dns_dispatch_attach(disp, &res->disp); - - res->id = id; - res->bucket = bucket; - res->magic = RESPONSE_MAGIC; + dns_dispatch_attach(disp, &resp->disp); /* DISPATCH001 */ disp->requests++; - LOCK(&qid->lock); - ISC_LIST_APPEND(qid->qid_table[bucket], res, link); - UNLOCK(&qid->lock); - inc_stats(disp->mgr, (disp->socktype == isc_socktype_udp) ? dns_resstatscounter_disprequdp : dns_resstatscounter_dispreqtcp); - ISC_LIST_APPEND(disp->active, res, alink); - UNLOCK(&disp->lock); - if (oldest_response != NULL) { - oldest_response(ISC_R_CANCELED, NULL, res->arg); - } - *idp = id; - *resp = res; + *respp = resp; return (ISC_R_SUCCESS); } -void -dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp, int32_t timeout) { - REQUIRE(timeout <= (int32_t)UINT16_MAX); - - switch (disp->socktype) { - case isc_socktype_udp: - REQUIRE(resp != NULL); - - dispentry_attach(resp, &(dns_dispentry_t *){ NULL }); - if (timeout > 0) { - isc_nmhandle_settimeout(resp->handle, timeout); - } - isc_nm_read(resp->handle, udp_recv, resp); - break; - - case isc_socktype_tcp: - if (atomic_compare_exchange_strong(&disp->tcpreading, - &(bool){ false }, true)) - { - dns_dispatch_attach(disp, &(dns_dispatch_t *){ NULL }); - if (timeout > 0) { - isc_nmhandle_settimeout(disp->handle, timeout); - } - isc_nm_read(disp->handle, tcp_recv, disp); - } - break; - - default: - UNREACHABLE(); - } -} - isc_result_t dns_dispatch_getnext(dns_dispentry_t *resp) { - dns_dispatch_t *disp = NULL; - int32_t timeout; - REQUIRE(VALID_RESPONSE(resp)); + REQUIRE(VALID_DISPATCH(resp->disp)); - disp = resp->disp; + dns_dispatch_t *disp = resp->disp; + isc_result_t result = ISC_R_SUCCESS; + int32_t timeout = -1; - REQUIRE(VALID_DISPATCH(disp)); - - if (disp->socktype == isc_socktype_udp) { + LOCK(&disp->lock); + switch (disp->socktype) { + case isc_socktype_udp: { timeout = resp->timeout - dispentry_runtime(resp); if (timeout <= 0) { - return (ISC_R_TIMEDOUT); + result = ISC_R_TIMEDOUT; + break; } - } else { - timeout = -1; + udp_dispatch_getnext(resp, timeout); + break; + } + case isc_socktype_tcp: + tcp_dispatch_getnext(disp, resp, timeout); + break; + default: + UNREACHABLE(); } - LOCK(&disp->lock); - dispatch_getnext(disp, resp, timeout); UNLOCK(&disp->lock); - return (ISC_R_SUCCESS); + + return (result); } -void -dns_dispatch_cancel(dns_dispentry_t **respp) { - dns_dispentry_t *resp = NULL; - dns_dispatch_t *disp = NULL; - - REQUIRE(respp != NULL); - - resp = *respp; - *respp = NULL; - +static void +udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { REQUIRE(VALID_RESPONSE(resp)); + REQUIRE(VALID_DISPATCH(resp->disp)); + REQUIRE(VALID_DISPATCHMGR(resp->disp->mgr)); - disp = resp->disp; - resp->canceled = true; - - /* Connected UDP. */ - if (resp->handle != NULL) { - isc_nm_cancelread(resp->handle); - goto done; - } + dns_dispatch_t *disp = resp->disp; + dns_dispatchmgr_t *mgr = disp->mgr; + dns_qid_t *qid = mgr->qid; + dispatch_cb_t connected = NULL; + dispatch_cb_t response = NULL; LOCK(&disp->lock); - /* TCP pending connection. */ - if (ISC_LINK_LINKED(resp, plink)) { - dns_dispentry_t *copy = resp; + dispentry_log(resp, LVL(90), + "canceling response: %s, %s/%s (%s/%s), " + "requests %u", + isc_result_totext(result), state2str(resp->state), + resp->reading ? "reading" : "not reading", + state2str(disp->state), + disp->reading ? "reading" : "not reading", + disp->requests); - ISC_LIST_UNLINK(disp->pending, resp, plink); - if (resp->connected != NULL) { - resp->connected(ISC_R_CANCELED, NULL, resp->arg); - } - - /* - * We need to detach twice if we were pending - * connection - once to take the place of the - * detach in tcp_connected() or udp_connected() - * that we won't reach, and again later in - * dns_dispatch_done(). - */ - dispentry_detach(©); - UNLOCK(&disp->lock); - goto done; - } - - /* - * Connected TCP, or unconnected UDP. - * - * If TCP, we don't want to cancel the dispatch - * unless this is the last resp waiting. - */ if (ISC_LINK_LINKED(resp, alink)) { ISC_LIST_UNLINK(disp->active, resp, alink); - if (ISC_LIST_EMPTY(disp->active) && disp->handle != NULL) { - isc_nm_cancelread(disp->handle); - } else if (resp->response != NULL) { - resp->response(ISC_R_CANCELED, NULL, resp->arg); - } } - UNLOCK(&disp->lock); -done: - dns_dispatch_done(&resp); -} - -void -dns_dispatch_done(dns_dispentry_t **respp) { - dns_dispatchmgr_t *mgr = NULL; - dns_dispatch_t *disp = NULL; - dns_dispentry_t *resp = NULL; - dns_qid_t *qid = NULL; - - REQUIRE(respp != NULL); - - resp = *respp; - - REQUIRE(VALID_RESPONSE(resp)); - - disp = resp->disp; - - REQUIRE(VALID_DISPATCH(disp)); - - mgr = disp->mgr; - - REQUIRE(VALID_DISPATCHMGR(mgr)); - - qid = mgr->qid; - - LOCK(&disp->lock); - INSIST(disp->requests > 0); - disp->requests--; - - dec_stats(disp->mgr, (disp->socktype == isc_socktype_udp) - ? dns_resstatscounter_disprequdp - : dns_resstatscounter_dispreqtcp); - - deactivate_dispentry(disp, resp); - - LOCK(&qid->lock); - ISC_LIST_UNLINK(qid->qid_table[resp->bucket], resp, link); - UNLOCK(&qid->lock); - UNLOCK(&disp->lock); - - dispentry_detach(respp); -} - -/* - * disp must be locked. - */ -static void -startrecv(isc_nmhandle_t *handle, dns_dispatch_t *disp, dns_dispentry_t *resp) { - switch (disp->socktype) { - case isc_socktype_udp: - REQUIRE(resp != NULL && resp->handle == NULL); - - TIME_NOW(&resp->start); - isc_nmhandle_attach(handle, &resp->handle); - dispentry_attach(resp, &(dns_dispentry_t *){ NULL }); - isc_nm_read(resp->handle, udp_recv, resp); + switch (resp->state) { + case DNS_DISPATCHSTATE_NONE: break; - case isc_socktype_tcp: - REQUIRE(disp != NULL); - - LOCK(&disp->lock); - REQUIRE(disp->handle == NULL); - atomic_compare_exchange_enforced( - &disp->tcpstate, - &(uint_fast32_t){ DNS_DISPATCHSTATE_CONNECTING }, - DNS_DISPATCHSTATE_CONNECTED); - - isc_nmhandle_attach(handle, &disp->handle); - dns_dispatch_attach(disp, &(dns_dispatch_t *){ NULL }); - isc_nm_read(disp->handle, tcp_recv, disp); - UNLOCK(&disp->lock); - + case DNS_DISPATCHSTATE_CONNECTING: + dns_dispentry_ref(resp); /* DISPENTRY008 */ + ISC_LIST_UNLINK(disp->pending, resp, plink); + connected = resp->connected; break; + case DNS_DISPATCHSTATE_CONNECTED: + if (resp->reading) { + dns_dispentry_ref(resp); /* DISPENTRY003 */ + response = resp->response; + + dispentry_log(resp, LVL(90), "canceling read on %p", + resp->handle); + isc_nm_cancelread(resp->handle); + } + break; + + case DNS_DISPATCHSTATE_CANCELED: + goto unlock; + default: UNREACHABLE(); } + + dec_stats(disp->mgr, dns_resstatscounter_disprequdp); + + LOCK(&qid->lock); + ISC_LIST_UNLINK(qid->qid_table[resp->bucket], resp, link); + UNLOCK(&qid->lock); + resp->state = DNS_DISPATCHSTATE_CANCELED; + +unlock: + UNLOCK(&disp->lock); + + if (connected) { + dispentry_log(resp, LVL(90), "connect callback: %s", + isc_result_totext(result)); + connected(result, NULL, resp->arg); + dns_dispentry_detach(&resp); /* DISPENTRY008 */ + } + if (response) { + dispentry_log(resp, LVL(90), "read callback: %s", + isc_result_totext(result)); + response(result, NULL, resp->arg); + dns_dispentry_detach(&resp); /* DISPENTRY003 */ + } +} + +static void +tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { + REQUIRE(VALID_RESPONSE(resp)); + REQUIRE(VALID_DISPATCH(resp->disp)); + REQUIRE(VALID_DISPATCHMGR(resp->disp->mgr)); + + dns_dispatch_t *disp = resp->disp; + dns_dispatchmgr_t *mgr = disp->mgr; + dns_qid_t *qid = mgr->qid; + dispatch_cb_t connected = NULL; + dns_displist_t resps = ISC_LIST_INITIALIZER; + + LOCK(&disp->lock); + dispentry_log(resp, LVL(90), + "canceling response: %s, %s/%s (%s/%s), " + "requests %u", + isc_result_totext(result), state2str(resp->state), + resp->reading ? "reading" : "not reading", + state2str(disp->state), + disp->reading ? "reading" : "not reading", + disp->requests); + + switch (resp->state) { + case DNS_DISPATCHSTATE_NONE: + break; + + case DNS_DISPATCHSTATE_CONNECTING: + ISC_LIST_UNLINK(disp->pending, resp, plink); + connected = resp->connected; + break; + + case DNS_DISPATCHSTATE_CONNECTED: + if (resp->reading) { + tcp_recv_add(&resps, resp, ISC_R_CANCELED); + } + + INSIST(!ISC_LINK_LINKED(resp, alink)); + + if (ISC_LIST_EMPTY(disp->active)) { + INSIST(disp->handle != NULL); + +#if DISPATCH_TCP_KEEPALIVE + /* + * This is an experimental code that keeps the TCP + * connection open for 1 second before it is finally + * closed. By keeping the TCP connection open, it can + * be reused by dns_request that uses + * dns_dispatch_gettcp() to join existing TCP + * connections. + * + * It is disabled for now, because it changes the + * behaviour, but I am keeping the code here for future + * reference when we improve the dns_dispatch to reuse + * the TCP connections also in the resolver. + * + * The TCP connection reuse should be seamless and not + * require any extra handling on the client side though. + */ + isc_nmhandle_cleartimeout(disp->handle); + isc_nmhandle_settimeout(disp->handle, 1000); + + if (!disp->reading) { + dispentry_log(resp, LVL(90), + "final 1 second timeout on %p", + disp->handle); + tcp_startrecv(NULL, disp, NULL); + } +#else + if (disp->reading) { + dispentry_log(resp, LVL(90), + "canceling read on %p", + disp->handle); + isc_nm_cancelread(disp->handle); + } +#endif + } + break; + + case DNS_DISPATCHSTATE_CANCELED: + goto unlock; + + default: + UNREACHABLE(); + } + + dec_stats(disp->mgr, dns_resstatscounter_dispreqtcp); + + LOCK(&qid->lock); + ISC_LIST_UNLINK(qid->qid_table[resp->bucket], resp, link); + UNLOCK(&qid->lock); + resp->state = DNS_DISPATCHSTATE_CANCELED; + +unlock: + UNLOCK(&disp->lock); + + /* + * NOTE: Calling the connected and response callbacks directly from here + * should be done asynchronously, as the dns_dispatch_done() is usually + * called directly from the response callback, so there's a slight + * chance that the call stack will get higher here, but it's mitigated + * by the ".reading" flag, so we don't ever go into a loop. + */ + + if (connected) { + dispentry_log(resp, LVL(90), "connect callback: %s", + isc_result_totext(result)); + connected(result, NULL, resp->arg); + dns_dispentry_detach(&resp); /* DISPENTRY005 */ + } + + tcp_recv_processall(&resps, NULL); +} + +static void +dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { + REQUIRE(VALID_RESPONSE(resp)); + REQUIRE(VALID_DISPATCH(resp->disp)); + + dns_dispatch_t *disp = resp->disp; + + switch (disp->socktype) { + case isc_socktype_udp: + udp_dispentry_cancel(resp, result); + break; + case isc_socktype_tcp: + tcp_dispentry_cancel(resp, result); + break; + default: + UNREACHABLE(); + } +} + +void +dns_dispatch_done(dns_dispentry_t **respp) { + REQUIRE(VALID_RESPONSE(*respp)); + + dns_dispentry_t *resp = *respp; + *respp = NULL; + + dispentry_cancel(resp, ISC_R_CANCELED); + dns_dispentry_detach(&resp); /* DISPENTRY000 */ +} + +static void +udp_startrecv(isc_nmhandle_t *handle, dns_dispentry_t *resp) { + REQUIRE(VALID_RESPONSE(resp)); + + TIME_NOW(&resp->start); + dispentry_log(resp, LVL(90), "attaching handle %p to %p", handle, + &resp->handle); + isc_nmhandle_attach(handle, &resp->handle); + dns_dispentry_ref(resp); /* DISPENTRY003 */ + dispentry_log(resp, LVL(90), "reading"); + isc_nm_read(resp->handle, udp_recv, resp); + resp->reading = true; +} + +static void +tcp_startrecv(isc_nmhandle_t *handle, dns_dispatch_t *disp, + dns_dispentry_t *resp) { + REQUIRE(VALID_DISPATCH(disp)); + REQUIRE(disp->socktype == isc_socktype_tcp); + + if (handle != NULL) { + isc_nmhandle_attach(handle, &disp->handle); + } + dns_dispatch_ref(disp); /* DISPATCH002 */ + if (resp != NULL) { + dispentry_log(resp, LVL(90), "reading from %p", disp->handle); + } else { + dispatch_log(disp, LVL(90), + "TCP reading without response from %p", + disp->handle); + } + isc_nm_read(disp->handle, tcp_recv, disp); + disp->reading = true; } static void tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { dns_dispatch_t *disp = (dns_dispatch_t *)arg; - dns_dispentry_t *resp = NULL, *next = NULL; - dns_displist_t resps; + dns_dispentry_t *resp = NULL; + dns_dispentry_t *next = NULL; + dns_displist_t resps = ISC_LIST_INITIALIZER; - dispatch_log(disp, LVL(90), "TCP connected (%p): %s", disp, - isc_result_totext(eresult)); + if (isc_log_wouldlog(dns_lctx, 90)) { + char localbuf[ISC_SOCKADDR_FORMATSIZE]; + char peerbuf[ISC_SOCKADDR_FORMATSIZE]; + if (handle != NULL) { + isc_sockaddr_t local = isc_nmhandle_localaddr(handle); + isc_sockaddr_t peer = isc_nmhandle_peeraddr(handle); - ISC_LIST_INIT(resps); + isc_sockaddr_format(&local, localbuf, + ISC_SOCKADDR_FORMATSIZE); + isc_sockaddr_format(&peer, peerbuf, + ISC_SOCKADDR_FORMATSIZE); + } else { + isc_sockaddr_format(&disp->local, localbuf, + ISC_SOCKADDR_FORMATSIZE); + isc_sockaddr_format(&disp->peer, peerbuf, + ISC_SOCKADDR_FORMATSIZE); + } + + dispatch_log(disp, LVL(90), "connected from %s to %s: %s", + localbuf, peerbuf, isc_result_totext(eresult)); + } + + LOCK(&disp->lock); + INSIST(disp->state = DNS_DISPATCHSTATE_CONNECTING); if (eresult == ISC_R_SUCCESS) { - startrecv(handle, disp, NULL); + disp->state = DNS_DISPATCHSTATE_CONNECTED; + tcp_startrecv(handle, disp, resp); + } else { + disp->state = DNS_DISPATCHSTATE_NONE; } /* * If there are pending responses, call the connect * callbacks for all of them. */ - LOCK(&disp->lock); for (resp = ISC_LIST_HEAD(disp->pending); resp != NULL; resp = next) { next = ISC_LIST_NEXT(resp, plink); ISC_LIST_UNLINK(disp->pending, resp, plink); - ISC_LIST_APPEND(resps, resp, plink); + ISC_LIST_APPEND(resps, resp, rlink); + + if (eresult == ISC_R_SUCCESS) { + resp->state = DNS_DISPATCHSTATE_CONNECTED; + ISC_LIST_APPEND(disp->active, resp, alink); + resp->reading = true; + dispentry_log(resp, LVL(90), "start reading"); + } else { + resp->state = DNS_DISPATCHSTATE_NONE; + } } UNLOCK(&disp->lock); for (resp = ISC_LIST_HEAD(resps); resp != NULL; resp = next) { - next = ISC_LIST_NEXT(resp, plink); - ISC_LIST_UNLINK(resps, resp, plink); + next = ISC_LIST_NEXT(resp, rlink); + ISC_LIST_UNLINK(resps, resp, rlink); - if (resp->connected != NULL) { - resp->connected(eresult, NULL, resp->arg); - } - dispentry_detach(&resp); + dispentry_log(resp, LVL(90), "connect callback: %s", + isc_result_totext(eresult)); + resp->connected(eresult, NULL, resp->arg); + dns_dispentry_detach(&resp); /* DISPENTRY005 */ } - dns_dispatch_detach(&disp); + dns_dispatch_detach(&disp); /* DISPATCH003 */ } +static void +udp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp); + static void udp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { dns_dispentry_t *resp = (dns_dispentry_t *)arg; dns_dispatch_t *disp = resp->disp; - dispatch_log(disp, LVL(90), "UDP connected (%p): %s", resp, - isc_result_totext(eresult)); + dispentry_log(resp, LVL(90), "connected: %s", + isc_result_totext(eresult)); - if (eresult == ISC_R_SUCCESS && resp->canceled) { - eresult = ISC_R_CANCELED; - } else if (eresult == ISC_R_SUCCESS) { - startrecv(handle, disp, resp); - } else if (eresult == ISC_R_ADDRINUSE) { + LOCK(&disp->lock); + + switch (resp->state) { + case DNS_DISPATCHSTATE_CANCELED: + UNLOCK(&disp->lock); + goto detach; + case DNS_DISPATCHSTATE_CONNECTING: + ISC_LIST_UNLINK(disp->pending, resp, plink); + break; + default: + UNREACHABLE(); + } + + switch (eresult) { + case ISC_R_SUCCESS: + resp->state = DNS_DISPATCHSTATE_CONNECTED; + udp_startrecv(handle, resp); + break; + case ISC_R_ADDRINUSE: { in_port_t localport = 0; isc_result_t result; /* probably a port collision; try a different one */ - disp->nsockets--; result = setup_socket(disp, resp, &resp->peer, &localport); if (result == ISC_R_SUCCESS) { - dns_dispatch_connect(resp); + UNLOCK(&disp->lock); + udp_dispatch_connect(disp, resp); goto detach; } + resp->state = DNS_DISPATCHSTATE_NONE; + break; + } + default: + resp->state = DNS_DISPATCHSTATE_NONE; + break; } - if (resp->connected != NULL) { - resp->connected(eresult, NULL, resp->arg); - } + UNLOCK(&disp->lock); + + dispentry_log(resp, LVL(90), "connect callback: %s", + isc_result_totext(eresult)); + resp->connected(eresult, NULL, resp->arg); detach: - dispentry_detach(&resp); + dns_dispentry_detach(&resp); /* DISPENTRY004 */ } -isc_result_t -dns_dispatch_connect(dns_dispentry_t *resp) { - dns_dispatch_t *disp = NULL; - uint_fast32_t tcpstate = DNS_DISPATCHSTATE_NONE; - dns_transport_type_t transport_type = DNS_TRANSPORT_NONE; +static void +udp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { + LOCK(&disp->lock); + resp->state = DNS_DISPATCHSTATE_CONNECTING; + dns_dispentry_ref(resp); /* DISPENTRY004 */ + ISC_LIST_APPEND(disp->pending, resp, plink); + UNLOCK(&disp->lock); + isc_nm_udpconnect(disp->mgr->nm, &resp->local, &resp->peer, + udp_connected, resp, resp->timeout); +} + +static isc_result_t +tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { + dns_transport_type_t transport_type = DNS_TRANSPORT_TCP; isc_tlsctx_t *tlsctx = NULL; isc_tlsctx_client_session_cache_t *sess_cache = NULL; - REQUIRE(VALID_RESPONSE(resp)); - - disp = resp->disp; - - if (disp->socktype == isc_socktype_tcp) { - transport_type = DNS_TRANSPORT_TCP; - } else if (disp->socktype == isc_socktype_udp) { - transport_type = DNS_TRANSPORT_UDP; - } else { - return (ISC_R_NOTIMPLEMENTED); - } - if (resp->transport != NULL) { transport_type = dns_transport_get_type(resp->transport); } @@ -1848,108 +2011,194 @@ dns_dispatch_connect(dns_dispentry_t *resp) { INSIST(tlsctx != NULL); } - /* This will be detached once we've connected. */ - dispentry_attach(resp, &(dns_dispentry_t *){ NULL }); + /* Check whether the dispatch is already connecting or connected. */ + LOCK(&disp->lock); + switch (disp->state) { + case DNS_DISPATCHSTATE_NONE: + /* First connection, continue with connecting */ + disp->state = DNS_DISPATCHSTATE_CONNECTING; + resp->state = DNS_DISPATCHSTATE_CONNECTING; + dns_dispentry_ref(resp); /* DISPENTRY005 */ + ISC_LIST_APPEND(disp->pending, resp, plink); + UNLOCK(&disp->lock); - switch (transport_type) { - case DNS_TRANSPORT_TCP: - case DNS_TRANSPORT_TLS: - /* - * Check whether the dispatch is already connecting - * or connected. - */ - atomic_compare_exchange_strong(&disp->tcpstate, - (uint_fast32_t *)&tcpstate, - DNS_DISPATCHSTATE_CONNECTING); - switch (tcpstate) { - case DNS_DISPATCHSTATE_NONE: - /* First connection, continue with connecting */ - LOCK(&disp->lock); - ISC_LIST_APPEND(disp->pending, resp, plink); - UNLOCK(&disp->lock); - dns_dispatch_attach(disp, &(dns_dispatch_t *){ NULL }); - if (transport_type == DNS_TRANSPORT_TLS) { - isc_nm_tlsdnsconnect( - disp->mgr->nm, &disp->local, - &disp->peer, tcp_connected, disp, - resp->timeout, tlsctx, sess_cache); - } else { - isc_nm_tcpdnsconnect(disp->mgr->nm, - &disp->local, &disp->peer, - tcp_connected, disp, - resp->timeout); - } - break; + char localbuf[ISC_SOCKADDR_FORMATSIZE]; + char peerbuf[ISC_SOCKADDR_FORMATSIZE]; - case DNS_DISPATCHSTATE_CONNECTING: - /* Connection pending; add resp to the list */ - LOCK(&disp->lock); - ISC_LIST_APPEND(disp->pending, resp, plink); - UNLOCK(&disp->lock); - break; + isc_sockaddr_format(&disp->local, localbuf, + ISC_SOCKADDR_FORMATSIZE); + isc_sockaddr_format(&disp->peer, peerbuf, + ISC_SOCKADDR_FORMATSIZE); - case DNS_DISPATCHSTATE_CONNECTED: - /* We are already connected; call the connected cb */ - if (resp->connected != NULL) { - resp->connected(ISC_R_SUCCESS, NULL, resp->arg); - } - dispentry_detach(&resp); - break; + dns_dispatch_ref(disp); /* DISPATCH003 */ + dispentry_log(resp, LVL(90), + "connecting from %s to %s, timeout %u", localbuf, + peerbuf, resp->timeout); - default: - UNREACHABLE(); + if (transport_type == DNS_TRANSPORT_TLS) { + isc_nm_tlsdnsconnect(disp->mgr->nm, &disp->local, + &disp->peer, tcp_connected, disp, + resp->timeout, tlsctx, sess_cache); + } else { + isc_nm_tcpdnsconnect(disp->mgr->nm, &disp->local, + &disp->peer, tcp_connected, disp, + resp->timeout); } - break; - case DNS_TRANSPORT_UDP: - isc_nm_udpconnect(disp->mgr->nm, &resp->local, &resp->peer, - udp_connected, resp, resp->timeout); + case DNS_DISPATCHSTATE_CONNECTING: + /* Connection pending; add resp to the list */ + resp->state = DNS_DISPATCHSTATE_CONNECTING; + dns_dispentry_ref(resp); /* DISPENTRY005 */ + ISC_LIST_APPEND(disp->pending, resp, plink); + UNLOCK(&disp->lock); + break; + + case DNS_DISPATCHSTATE_CONNECTED: + resp->state = DNS_DISPATCHSTATE_CONNECTED; + + /* Add the resp to the reading list */ + ISC_LIST_APPEND(disp->active, resp, alink); + dispentry_log(resp, LVL(90), "already connected; attaching"); + resp->reading = true; + + if (!disp->reading) { + /* Restart the reading */ + tcp_startrecv(NULL, disp, resp); + } + + UNLOCK(&disp->lock); + /* We are already connected; call the connected cb */ + dispentry_log(resp, LVL(90), "connect callback: %s", + isc_result_totext(ISC_R_SUCCESS)); + resp->connected(ISC_R_SUCCESS, NULL, resp->arg); break; default: - return (ISC_R_NOTIMPLEMENTED); + UNREACHABLE(); } return (ISC_R_SUCCESS); } +isc_result_t +dns_dispatch_connect(dns_dispentry_t *resp) { + REQUIRE(VALID_RESPONSE(resp)); + REQUIRE(VALID_DISPATCH(resp->disp)); + + dns_dispatch_t *disp = resp->disp; + + switch (disp->socktype) { + case isc_socktype_tcp: + return (tcp_dispatch_connect(disp, resp)); + + case isc_socktype_udp: + udp_dispatch_connect(disp, resp); + return (ISC_R_SUCCESS); + + default: + UNREACHABLE(); + } +} + static void send_done(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { dns_dispentry_t *resp = (dns_dispentry_t *)cbarg; REQUIRE(VALID_RESPONSE(resp)); + dns_dispatch_t *disp = resp->disp; + + REQUIRE(VALID_DISPATCH(disp)); + + dispentry_log(resp, LVL(90), "sent: %s", isc_result_totext(result)); + resp->sent(result, NULL, resp->arg); if (result != ISC_R_SUCCESS) { - isc_nm_cancelread(handle); + dispentry_cancel(resp, result); } - dispentry_detach(&resp); + dns_dispentry_detach(&resp); /* DISPENTRY007 */ + isc_nmhandle_detach(&handle); +} + +static void +tcp_dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp, + int32_t timeout) { + REQUIRE(timeout <= INT16_MAX); + + if (disp->reading) { + return; + } + + if (timeout > 0) { + isc_nmhandle_settimeout(disp->handle, timeout); + } + + dispentry_log(resp, LVL(90), "continue reading"); + + dns_dispatch_ref(disp); /* DISPATCH002 */ + isc_nm_read(disp->handle, tcp_recv, disp); + disp->reading = true; + + ISC_LIST_APPEND(disp->active, resp, alink); + resp->reading = true; +} + +static void +udp_dispatch_getnext(dns_dispentry_t *resp, int32_t timeout) { + REQUIRE(timeout <= INT16_MAX); + + if (resp->reading) { + return; + } + + if (timeout > 0) { + isc_nmhandle_settimeout(resp->handle, timeout); + } + + dispentry_log(resp, LVL(90), "continue reading"); + + dns_dispentry_ref(resp); /* DISPENTRY003 */ + isc_nm_read(resp->handle, udp_recv, resp); + resp->reading = true; } void dns_dispatch_resume(dns_dispentry_t *resp, uint16_t timeout) { - dns_dispatch_t *disp = NULL; - REQUIRE(VALID_RESPONSE(resp)); + REQUIRE(VALID_DISPATCH(resp->disp)); - disp = resp->disp; + dns_dispatch_t *disp = resp->disp; - REQUIRE(VALID_DISPATCH(disp)); + LOCK(&disp->lock); + switch (disp->socktype) { + case isc_socktype_udp: { + udp_dispatch_getnext(resp, timeout); + break; + } + case isc_socktype_tcp: + INSIST(disp->timedout > 0); + disp->timedout--; + tcp_dispatch_getnext(disp, resp, timeout); + break; + default: + UNREACHABLE(); + } - dispatch_getnext(disp, resp, timeout); + UNLOCK(&disp->lock); } void dns_dispatch_send(dns_dispentry_t *resp, isc_region_t *r, isc_dscp_t dscp) { - isc_nmhandle_t *handle = NULL; - REQUIRE(VALID_RESPONSE(resp)); - + REQUIRE(VALID_DISPATCH(resp->disp)); UNUSED(dscp); + dns_dispatch_t *disp = resp->disp; + isc_nmhandle_t *sendhandle = NULL; + #if 0 /* XXX: no DSCP support */ if (dscp == -1) { @@ -1964,14 +2213,19 @@ dns_dispatch_send(dns_dispentry_t *resp, isc_region_t *r, isc_dscp_t dscp) { } #endif - if (resp->disp->socktype == isc_socktype_tcp) { - handle = resp->disp->handle; - } else { - handle = resp->handle; + dispentry_log(resp, LVL(90), "sending"); + switch (disp->socktype) { + case isc_socktype_udp: + isc_nmhandle_attach(resp->handle, &sendhandle); + break; + case isc_socktype_tcp: + isc_nmhandle_attach(disp->handle, &sendhandle); + break; + default: + UNREACHABLE(); } - - dispentry_attach(resp, &(dns_dispentry_t *){ NULL }); - isc_nm_send(handle, r, send_done, resp); + dns_dispentry_ref(resp); /* DISPENTRY007 */ + isc_nm_send(sendhandle, r, send_done, resp); } isc_result_t @@ -1989,19 +2243,21 @@ dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) { isc_result_t dns_dispentry_getlocaladdress(dns_dispentry_t *resp, isc_sockaddr_t *addrp) { REQUIRE(VALID_RESPONSE(resp)); + REQUIRE(VALID_DISPATCH(resp->disp)); REQUIRE(addrp != NULL); - if (resp->disp->socktype == isc_socktype_tcp) { - *addrp = resp->disp->local; - return (ISC_R_SUCCESS); - } + dns_dispatch_t *disp = resp->disp; - if (resp->handle != NULL) { + switch (disp->socktype) { + case isc_socktype_tcp: + *addrp = disp->local; + return (ISC_R_SUCCESS); + case isc_socktype_udp: *addrp = isc_nmhandle_localaddr(resp->handle); return (ISC_R_SUCCESS); + default: + UNREACHABLE(); } - - return (ISC_R_NOTIMPLEMENTED); } dns_dispatch_t * @@ -2048,7 +2304,7 @@ dns_dispatchset_create(isc_mem_t *mctx, dns_dispatch_t *source, isc_mem_attach(mctx, &dset->mctx); dset->dispatches[0] = NULL; - dns_dispatch_attach(source, &dset->dispatches[0]); + dns_dispatch_attach(source, &dset->dispatches[0]); /* DISPATCH004 */ LOCK(&mgr->lock); for (i = 1; i < n; i++) { @@ -2069,7 +2325,7 @@ fail: UNLOCK(&mgr->lock); for (j = 0; j < i; j++) { - dns_dispatch_detach(&(dset->dispatches[j])); + dns_dispatch_detach(&(dset->dispatches[j])); /* DISPATCH004 */ } isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n); if (dset->mctx == mctx) { @@ -2091,7 +2347,7 @@ dns_dispatchset_destroy(dns_dispatchset_t **dsetp) { dset = *dsetp; *dsetp = NULL; for (i = 0; i < dset->ndisp; i++) { - dns_dispatch_detach(&(dset->dispatches[i])); + dns_dispatch_detach(&(dset->dispatches[i])); /* DISPATCH004 */ } isc_mem_put(dset->mctx, dset->dispatches, sizeof(dns_dispatch_t *) * dset->ndisp); diff --git a/lib/dns/include/dns/dispatch.h b/lib/dns/include/dns/dispatch.h index fff052cdd0..c740c62969 100644 --- a/lib/dns/include/dns/dispatch.h +++ b/lib/dns/include/dns/dispatch.h @@ -53,10 +53,13 @@ #include #include #include +#include #include #include +#undef DNS_DISPATCH_TRACE + ISC_LANG_BEGINDECLS /*% @@ -94,25 +97,22 @@ dns_dispatchmgr_create(isc_mem_t *mctx, isc_nm_t *nm, dns_dispatchmgr_t **mgrp); *\li anything else -- failure */ -void -dns_dispatchmgr_attach(dns_dispatchmgr_t *mgr, dns_dispatchmgr_t **mgrp); -/*%< - * Attach to a dispatch manger. - * - * Requires: - *\li is valid. - * - *\li mgrp != NULL && *mgrp == NULL - */ +#if DNS_DISPATCH_TRACE +#define dns_dispatchmgr_ref(ptr) \ + dns_dispatchmgr__ref(ptr, __func__, __FILE__, __LINE__) +#define dns_dispatchmgr_unref(ptr) \ + dns_dispatchmgr__unref(ptr, __func__, __FILE__, __LINE__) +#define dns_dispatchmgr_attach(ptr, ptrp) \ + dns_dispatchmgr__attach(ptr, ptrp, __func__, __FILE__, __LINE__) +#define dns_dispatchmgr_detach(ptrp) \ + dns_dispatchmgr__detach(ptrp, __func__, __FILE__, __LINE__) +ISC_REFCOUNT_TRACE_DECL(dns_dispatchmgr); +#else +ISC_REFCOUNT_DECL(dns_dispatchmgr); +#endif -void -dns_dispatchmgr_detach(dns_dispatchmgr_t **mgrp); /*%< - * Detach from the dispatch manager, and destroy it if no references - * remain. - * - * Requires: - *\li mgrp != NULL && *mgrp is a valid dispatchmgr. + * Attach/Detach to a dispatch manager. */ void @@ -201,10 +201,21 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, *\li Anything else -- failure. */ -void -dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp); +#if DNS_DISPATCH_TRACE +#define dns_dispatch_ref(ptr) \ + dns_dispatch__ref(ptr, __func__, __FILE__, __LINE__) +#define dns_dispatch_unref(ptr) \ + dns_dispatch__unref(ptr, __func__, __FILE__, __LINE__) +#define dns_dispatch_attach(ptr, ptrp) \ + dns_dispatch__attach(ptr, ptrp, __func__, __FILE__, __LINE__) +#define dns_dispatch_detach(ptrp) \ + dns_dispatch__detach(ptrp, __func__, __FILE__, __LINE__) +ISC_REFCOUNT_TRACE_DECL(dns_dispatch); +#else +ISC_REFCOUNT_DECL(dns_dispatch); +#endif /*%< - * Attach to a dispatch handle. + * Attach/Detach to a dispatch handle. * * Requires: *\li disp is valid. @@ -212,15 +223,6 @@ dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp); *\li dispp != NULL && *dispp == NULL */ -void -dns_dispatch_detach(dns_dispatch_t **dispp); -/*%< - * Detaches from the dispatch. - * - * Requires: - *\li dispp != NULL and *dispp be a valid dispatch. - */ - isc_result_t dns_dispatch_connect(dns_dispentry_t *resp); /*%< @@ -253,11 +255,9 @@ dns_dispatch_resume(dns_dispentry_t *resp, uint16_t timeout); isc_result_t dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, - const isc_sockaddr_t *localaddr, bool *connected, - dns_dispatch_t **dispp); + const isc_sockaddr_t *localaddr, dns_dispatch_t **dispp); /* - * Attempt to connect to a existing TCP connection (connection completed - * if connected == NULL). + * Attempt to connect to a existing TCP connection. */ typedef void (*dispatch_cb_t)(isc_result_t eresult, isc_region_t *region, @@ -308,22 +308,10 @@ dns_dispatch_add(dns_dispatch_t *disp, unsigned int options, void dns_dispatch_done(dns_dispentry_t **respp); -/*%< - * Disconnects a dispatch response entry from its dispatch and shuts it - * down. This is called when the dispatch is complete; use - * dns_dispatch_cancel() if it is still pending. - * - * Requires: - *\li "resp" != NULL and "*resp" contain a value previously allocated - * by dns_dispatch_add(); - */ +/*< + * Disconnect a dispatch response entry from its dispatch, cancel all + * pending connects and reads in a dispatch entry and shut it down. -void -dns_dispatch_cancel(dns_dispentry_t **respp); -/*%< - * Cancel all pending connects and reads in a dispatch entry, - * then call dns_dispatch_done(). This is used when the caller - * cancels a dispatch response before it has completed. * * Requires: *\li "resp" != NULL and "*resp" contain a value previously allocated diff --git a/lib/dns/request.c b/lib/dns/request.c index 1116565cc4..24b3def175 100644 --- a/lib/dns/request.c +++ b/lib/dns/request.c @@ -339,20 +339,18 @@ isblackholed(dns_dispatchmgr_t *dispatchmgr, const isc_sockaddr_t *destaddr) { static isc_result_t tcp_dispatch(bool newtcp, dns_requestmgr_t *requestmgr, const isc_sockaddr_t *srcaddr, const isc_sockaddr_t *destaddr, - isc_dscp_t dscp, bool *connected, dns_dispatch_t **dispatchp) { + isc_dscp_t dscp, dns_dispatch_t **dispatchp) { isc_result_t result; if (!newtcp) { result = dns_dispatch_gettcp(requestmgr->dispatchmgr, destaddr, - srcaddr, connected, dispatchp); + srcaddr, dispatchp); if (result == ISC_R_SUCCESS) { char peer[ISC_SOCKADDR_FORMATSIZE]; isc_sockaddr_format(destaddr, peer, sizeof(peer)); req_log(ISC_LOG_DEBUG(1), - "attached to %s TCP " - "connection to %s", - *connected ? "existing" : "pending", peer); + "attached to TCP connection to %s", peer); return (result); } } @@ -394,12 +392,12 @@ udp_dispatch(dns_requestmgr_t *requestmgr, const isc_sockaddr_t *srcaddr, static isc_result_t get_dispatch(bool tcp, bool newtcp, dns_requestmgr_t *requestmgr, const isc_sockaddr_t *srcaddr, const isc_sockaddr_t *destaddr, - isc_dscp_t dscp, bool *connected, dns_dispatch_t **dispatchp) { + isc_dscp_t dscp, dns_dispatch_t **dispatchp) { isc_result_t result; if (tcp) { result = tcp_dispatch(newtcp, requestmgr, srcaddr, destaddr, - dscp, connected, dispatchp); + dscp, dispatchp); } else { result = udp_dispatch(requestmgr, srcaddr, destaddr, dispatchp); } @@ -423,7 +421,6 @@ dns_request_createraw(dns_requestmgr_t *requestmgr, isc_buffer_t *msgbuf, bool tcp = false; bool newtcp = false; isc_region_t r; - bool connected = false; unsigned int dispopt = 0; REQUIRE(VALID_REQUESTMGR(requestmgr)); @@ -497,7 +494,7 @@ dns_request_createraw(dns_requestmgr_t *requestmgr, isc_buffer_t *msgbuf, again: result = get_dispatch(tcp, newtcp, requestmgr, srcaddr, destaddr, dscp, - &connected, &request->dispatch); + &request->dispatch); if (result != ISC_R_SUCCESS) { goto detach; } @@ -514,7 +511,6 @@ again: if (result != ISC_R_SUCCESS) { if ((options & DNS_REQUESTOPT_FIXEDID) != 0 && !newtcp) { newtcp = true; - connected = false; dns_dispatch_detach(&request->dispatch); goto again; } @@ -534,21 +530,15 @@ again: UNLOCK(&requestmgr->lock); request->destaddr = *destaddr; - if (tcp && connected) { - req_send(request); - /* no need to call req_connected(), detach here */ - req_detach(&(dns_request_t *){ request }); - } else { - request->flags |= DNS_REQUEST_F_CONNECTING; - if (tcp) { - request->flags |= DNS_REQUEST_F_TCP; - } + request->flags |= DNS_REQUEST_F_CONNECTING; + if (tcp) { + request->flags |= DNS_REQUEST_F_TCP; + } - result = dns_dispatch_connect(request->dispentry); - if (result != ISC_R_SUCCESS) { - goto unlink; - } + result = dns_dispatch_connect(request->dispentry); + if (result != ISC_R_SUCCESS) { + goto unlink; } req_log(ISC_LOG_DEBUG(3), "dns_request_createraw: request %p", request); @@ -660,7 +650,7 @@ dns_request_create(dns_requestmgr_t *requestmgr, dns_message_t *message, again: result = get_dispatch(tcp, false, requestmgr, srcaddr, destaddr, dscp, - &connected, &request->dispatch); + &request->dispatch); if (result != ISC_R_SUCCESS) { goto detach; } @@ -841,7 +831,7 @@ request_cancel(dns_request_t *request) { request->flags &= ~DNS_REQUEST_F_CONNECTING; if (request->dispentry != NULL) { - dns_dispatch_cancel(&request->dispentry); + dns_dispatch_done(&request->dispentry); } dns_dispatch_detach(&request->dispatch); @@ -996,13 +986,13 @@ static void req_response(isc_result_t result, isc_region_t *region, void *arg) { dns_request_t *request = (dns_request_t *)arg; - req_log(ISC_LOG_DEBUG(3), "req_response: request %p: %s", request, - isc_result_totext(result)); - if (result == ISC_R_CANCELED) { return; } + req_log(ISC_LOG_DEBUG(3), "req_response: request %p: %s", request, + isc_result_totext(result)); + if (result == ISC_R_TIMEDOUT) { LOCK(&request->requestmgr->locks[request->hash]); if (request->udpcount != 0) { diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index 3db50f2cee..295c1d424a 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -1494,7 +1494,7 @@ fctx_cancelquery(resquery_t **queryp, isc_time_t *finish, bool no_response, * exist, cancel them. */ if (query->dispentry != NULL) { - dns_dispatch_cancel(&query->dispentry); + dns_dispatch_done(&query->dispentry); } LOCK(&fctx->lock);