From b7a72b1667fa3e73de217b00372ee6437bcd7916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Tue, 19 Nov 2019 11:56:00 +0100 Subject: [PATCH 1/9] netmgr: TCP improvements - add timeout support for TCP and TCPDNS connections to protect against slowloris style attacks. currently, all timeouts are hard-coded. - rework and simplify the TCPDNS state machine. --- lib/isc/netmgr/netmgr-int.h | 15 +- lib/isc/netmgr/netmgr.c | 143 ++++++++++----- lib/isc/netmgr/tcp.c | 112 +++++++++--- lib/isc/netmgr/tcpdns.c | 342 +++++++++++++++++------------------- 4 files changed, 357 insertions(+), 255 deletions(-) diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index 3961a6ff3b..037ffa145f 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -116,6 +116,7 @@ typedef enum isc__netievent_type { netievent_tcplisten, netievent_tcpstoplisten, netievent_tcpclose, + netievent_closecb, } isc__netievent_type; typedef struct isc__netievent_stop { @@ -186,6 +187,7 @@ typedef isc__netievent__socket_t isc__netievent_tcpclose_t; typedef isc__netievent__socket_t isc__netievent_startread_t; typedef isc__netievent__socket_t isc__netievent_pauseread_t; typedef isc__netievent__socket_t isc__netievent_resumeread_t; +typedef isc__netievent__socket_t isc__netievent_closecb_t; typedef struct isc__netievent__socket_req { isc__netievent_type type; @@ -268,6 +270,9 @@ struct isc_nmsocket { isc_nmsocket_t *parent; isc_quota_t *quota; bool overquota; + uv_timer_t timer; + bool timer_initialized; + uint64_t read_timeout; /*% outer socket is for 'wrapped' sockets - e.g. tcpdns in tcp */ isc_nmsocket_t *outer; @@ -366,7 +371,7 @@ struct isc_nmsocket { * might want to change it to something lockless in the * future. */ - size_t ah; + atomic_int_fast32_t ah; size_t ah_size; size_t *ah_frees; isc_nmhandle_t **ah_handles; @@ -398,6 +403,8 @@ isc__nm_get_ievent(isc_nm_t *mgr, isc__netievent_type type); /*%< * Allocate an ievent and set the type. */ +void +isc__nm_put_ievent(isc_nm_t *mgr, void *ievent); void isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event); @@ -471,6 +478,12 @@ isc__nmsocket_prep_destroy(isc_nmsocket_t *sock); * if there are no remaining references or active handles. */ +void +isc__nm_async_closecb(isc__networker_t *worker, isc__netievent_t *ievent0); +/*%< + * Issue a 'handle closed' callback on the socket. + */ + isc_result_t isc__nm_udp_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb, void *cbarg); diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index ba7094badd..e77b7c96f1 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -450,12 +450,15 @@ async_cb(uv_async_t *handle) { case netievent_tcpclose: isc__nm_async_tcpclose(worker, ievent); break; + case netievent_closecb: + isc__nm_async_closecb(worker, ievent); + break; default: INSIST(0); ISC_UNREACHABLE(); } - isc_mem_put(worker->mgr->mctx, ievent, - sizeof(isc__netievent_storage_t)); + + isc__nm_put_ievent(worker->mgr, ievent); } } @@ -471,6 +474,11 @@ isc__nm_get_ievent(isc_nm_t *mgr, isc__netievent_type type) { return (event); } +void +isc__nm_put_ievent(isc_nm_t *mgr, void *ievent) { + isc_mem_put(mgr->mctx, ievent, sizeof(isc__netievent_storage_t)); +} + void isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event) { isc_queue_enqueue(worker->ievents, (uintptr_t)event); @@ -552,6 +560,11 @@ nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree) { isc_quota_detach(&sock->quota); } + if (sock->timer_initialized) { + uv_close((uv_handle_t *)&sock->timer, NULL); + sock->timer_initialized = false; + } + isc_astack_destroy(sock->inactivehandles); while ((uvreq = isc_astack_pop(sock->inactivereqs)) != NULL) { @@ -570,7 +583,6 @@ nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree) { } else { isc_nm_detach(&sock->mgr); } - } static void @@ -596,11 +608,11 @@ nmsocket_maybe_destroy(isc_nmsocket_t *sock) { * accept destruction. */ LOCK(&sock->lock); - active_handles += sock->ah; + active_handles += atomic_load(&sock->ah); if (sock->children != NULL) { for (int i = 0; i < sock->nchildren; i++) { LOCK(&sock->children[i].lock); - active_handles += sock->children[i].ah; + active_handles += atomic_load(&sock->children[i].ah); UNLOCK(&sock->children[i].lock); } } @@ -780,6 +792,7 @@ isc__nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer, isc_sockaddr_t *local) { isc_nmhandle_t *handle = NULL; + size_t handlenum; int pos; REQUIRE(VALID_NMSOCK(sock)); @@ -812,7 +825,7 @@ isc__nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer, LOCK(&sock->lock); /* We need to add this handle to the list of active handles */ - if (sock->ah == sock->ah_size) { + if ((size_t) atomic_load(&sock->ah) == sock->ah_size) { sock->ah_frees = isc_mem_reallocate(sock->mgr->mctx, sock->ah_frees, sock->ah_size * 2 * @@ -831,7 +844,9 @@ isc__nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer, sock->ah_size *= 2; } - pos = sock->ah_frees[sock->ah++]; + handlenum = atomic_fetch_add(&sock->ah, 1); + pos = sock->ah_frees[handlenum]; + INSIST(sock->ah_handles[pos] == NULL); sock->ah_handles[pos] = handle; handle->ah_pos = pos; @@ -875,62 +890,85 @@ nmhandle_free(isc_nmsocket_t *sock, isc_nmhandle_t *handle) { *handle = (isc_nmhandle_t) { .magic = 0 }; + isc_mem_put(sock->mgr->mctx, handle, sizeof(isc_nmhandle_t) + extra); } void isc_nmhandle_unref(isc_nmhandle_t *handle) { + isc_nmsocket_t *sock = NULL; + size_t handlenum; + bool reuse = false; int refs; REQUIRE(VALID_NMHANDLE(handle)); refs = isc_refcount_decrement(&handle->references); INSIST(refs > 0); - if (refs == 1) { - isc_nmsocket_t *sock = handle->sock; - bool reuse = false; + if (refs > 1) { + return; + } - handle->sock = NULL; - if (handle->doreset != NULL) { - handle->doreset(handle->opaque); - } + sock = handle->sock; + handle->sock = NULL; - /* - * We do it all under lock to avoid races with socket - * destruction. - */ - LOCK(&sock->lock); - INSIST(sock->ah_handles[handle->ah_pos] == handle); - INSIST(sock->ah_size > handle->ah_pos); - INSIST(sock->ah > 0); - sock->ah_handles[handle->ah_pos] = NULL; - sock->ah_frees[--sock->ah] = handle->ah_pos; - handle->ah_pos = 0; + if (handle->doreset != NULL) { + handle->doreset(handle->opaque); + } - if (atomic_load(&sock->active)) { - reuse = isc_astack_trypush(sock->inactivehandles, - handle); - } - UNLOCK(&sock->lock); + /* + * We do all of this under lock to avoid races with socket + * destruction. + */ + LOCK(&sock->lock); - /* - * Handle is closed. If the socket has a callback - * configured for that (e.g., to perform cleanup after - * request processing), call it now. - */ - if (sock->closehandle_cb != NULL) { + INSIST(sock->ah_handles[handle->ah_pos] == handle); + INSIST(sock->ah_size > handle->ah_pos); + INSIST(atomic_load(&sock->ah) > 0); + + sock->ah_handles[handle->ah_pos] = NULL; + handlenum = atomic_fetch_sub(&sock->ah, 1) - 1; + sock->ah_frees[handlenum] = handle->ah_pos; + handle->ah_pos = 0; + + if (atomic_load(&sock->active)) { + reuse = isc_astack_trypush(sock->inactivehandles, + handle); + } + + UNLOCK(&sock->lock); + + if (!reuse) { + nmhandle_free(sock, handle); + } + + /* + * The handle is closed. If the socket has a callback configured + * for that (e.g., to perform cleanup after request processing), + * call it now. + */ + if (sock->closehandle_cb != NULL) { + if (sock->tid == isc_nm_tid()) { sock->closehandle_cb(sock); - } - if (!reuse) { - nmhandle_free(sock, handle); - } + /* + * If we do this asynchronously then + * the async event will clean it up. + */ + if (sock->ah == 0 && + !atomic_load(&sock->active) && + !atomic_load(&sock->destroying)) + { + nmsocket_maybe_destroy(sock); + } + } else { - if (sock->ah == 0 && - !atomic_load(&sock->active) && - !atomic_load(&sock->destroying)) - { - nmsocket_maybe_destroy(sock); + isc__netievent_closecb_t * event = + isc__nm_get_ievent(sock->mgr, + netievent_closecb); + isc_nmsocket_attach(sock, &event->sock); + isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], + (isc__netievent_t *) event); } } } @@ -1055,6 +1093,21 @@ isc_nm_send(isc_nmhandle_t *handle, isc_region_t *region, } } +void +isc__nm_async_closecb(isc__networker_t *worker, isc__netievent_t *ievent0) { + isc__netievent_closecb_t *ievent = + (isc__netievent_closecb_t *) ievent0; + + REQUIRE(VALID_NMSOCK(ievent->sock)); + REQUIRE(ievent->sock->tid == isc_nm_tid()); + REQUIRE(ievent->sock->closehandle_cb != NULL); + + UNUSED(worker); + + ievent->sock->closehandle_cb(ievent->sock); + isc_nmsocket_detach(&ievent->sock); +} + bool isc__nm_acquire_interlocked(isc_nm_t *mgr) { LOCK(&mgr->lock); diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index 4b6c9ca9a4..493d82a5aa 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -242,25 +242,52 @@ isc__nm_async_tcpstoplisten(isc__networker_t *worker, uv_close(&sock->uv_handle.handle, stoplistening_cb); } +static void +readtimeout_cb(uv_timer_t *handle) { + isc_nmsocket_t *sock = (isc_nmsocket_t *) handle->data; + + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); + + /* + * Socket is actively processing something, so restart the timer + * and return. + */ + if (atomic_load(&sock->processing)) { + uv_timer_start(handle, readtimeout_cb, sock->read_timeout, 0); + return; + } + + /* + * Timeout; stop reading and process whatever we have. + */ + uv_read_stop(&sock->uv_handle.stream); + if (sock->quota) { + isc_quota_detach(&sock->quota); + } + sock->rcb.recv(sock->tcphandle, NULL, sock->rcbarg); +} + isc_result_t isc_nm_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) { isc_nmsocket_t *sock = NULL; + isc__netievent_startread_t *ievent = NULL; REQUIRE(VALID_NMHANDLE(handle)); REQUIRE(VALID_NMSOCK(handle->sock)); sock = handle->sock; sock->rcb.recv = cb; - sock->rcbarg = cbarg; /* That's obviously broken... */ + sock->rcbarg = cbarg; + + ievent = isc__nm_get_ievent(sock->mgr, netievent_tcpstartread); + ievent->sock = sock; + if (sock->tid == isc_nm_tid()) { - int r = uv_read_start(&sock->uv_handle.stream, - isc__nm_alloc_cb, read_cb); - INSIST(r == 0); + isc__nm_async_startread(&sock->mgr->workers[sock->tid], + (isc__netievent_t *) ievent); + isc__nm_put_ievent(sock->mgr, ievent); } else { - isc__netievent_startread_t *ievent = - isc__nm_get_ievent(sock->mgr, - netievent_tcpstartread); - ievent->sock = sock; isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *) ievent); } @@ -275,12 +302,23 @@ isc__nm_async_startread(isc__networker_t *worker, isc__netievent_t *ievent0) { isc_nmsocket_t *sock = ievent->sock; REQUIRE(worker->id == isc_nm_tid()); + if (sock->read_timeout != 0) { + if (!sock->timer_initialized) { + uv_timer_init(&worker->loop, &sock->timer); + sock->timer.data = sock; + sock->timer_initialized = true; + } + uv_timer_start(&sock->timer, readtimeout_cb, + sock->read_timeout, 0); + } uv_read_start(&sock->uv_handle.stream, isc__nm_alloc_cb, read_cb); } isc_result_t isc_nm_pauseread(isc_nmsocket_t *sock) { + isc__netievent_pauseread_t *ievent = NULL; + REQUIRE(VALID_NMSOCK(sock)); if (atomic_load(&sock->readpaused)) { @@ -288,15 +326,14 @@ isc_nm_pauseread(isc_nmsocket_t *sock) { } atomic_store(&sock->readpaused, true); + ievent = isc__nm_get_ievent(sock->mgr, netievent_tcppauseread); + ievent->sock = sock; if (sock->tid == isc_nm_tid()) { - int r = uv_read_stop(&sock->uv_handle.stream); - INSIST(r == 0); + isc__nm_async_pauseread(&sock->mgr->workers[sock->tid], + (isc__netievent_t *) ievent); + isc__nm_put_ievent(sock->mgr, ievent); } else { - isc__netievent_pauseread_t *ievent = - isc__nm_get_ievent(sock->mgr, - netievent_tcppauseread); - ievent->sock = sock; isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *) ievent); } @@ -309,15 +346,20 @@ isc__nm_async_pauseread(isc__networker_t *worker, isc__netievent_t *ievent0) { isc__netievent_pauseread_t *ievent = (isc__netievent_pauseread_t *) ievent0; isc_nmsocket_t *sock = ievent->sock; - REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(VALID_NMSOCK(sock)); REQUIRE(worker->id == isc_nm_tid()); + if (sock->timer_initialized) { + uv_timer_stop(&sock->timer); + } uv_read_stop(&sock->uv_handle.stream); } isc_result_t isc_nm_resumeread(isc_nmsocket_t *sock) { + isc__netievent_startread_t *ievent = NULL; + REQUIRE(VALID_NMSOCK(sock)); REQUIRE(sock->rcb.recv != NULL); @@ -327,16 +369,14 @@ isc_nm_resumeread(isc_nmsocket_t *sock) { atomic_store(&sock->readpaused, false); + ievent = isc__nm_get_ievent(sock->mgr, netievent_tcpstartread); + ievent->sock = sock; + if (sock->tid == isc_nm_tid()) { - int r = uv_read_start(&sock->uv_handle.stream, - isc__nm_alloc_cb, read_cb); - INSIST(r == 0); + isc__nm_async_startread(&sock->mgr->workers[sock->tid], + (isc__netievent_t *) ievent); + isc__nm_put_ievent(sock->mgr, ievent); } else { - /* It's the same as startread */ - isc__netievent_startread_t *ievent = - isc__nm_get_ievent(sock->mgr, - netievent_tcpstartread); - ievent->sock = sock; isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *) ievent); } @@ -359,6 +399,11 @@ read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { INSIST(sock->rcb.recv != NULL); sock->rcb.recv(sock->tcphandle, ®ion, sock->rcbarg); + if (sock->timer_initialized && sock->read_timeout != 0) { + /* The timer will be updated */ + uv_timer_start(&sock->timer, readtimeout_cb, + sock->read_timeout, 0); + } isc__nm_free_uvbuf(sock, buf); return; } @@ -440,6 +485,7 @@ accept_connection(isc_nmsocket_t *ssock) { handle = isc__nmhandle_get(csock, NULL, &local); INSIST(ssock->rcb.accept != NULL); + csock->read_timeout = 1000; ssock->rcb.accept(handle, ISC_R_SUCCESS, ssock->rcbarg); isc_nmsocket_detach(&csock); @@ -568,6 +614,16 @@ tcp_close_cb(uv_handle_t *uvhandle) { isc__nmsocket_prep_destroy(sock); } +static void +timer_close_cb(uv_handle_t *uvhandle) { + isc_nmsocket_t *sock = uvhandle->data; + + REQUIRE(VALID_NMSOCK(sock)); + + isc_nmsocket_detach(&sock->server); + uv_close(&sock->uv_handle.handle, tcp_close_cb); +} + static void tcp_close_direct(isc_nmsocket_t *sock) { REQUIRE(VALID_NMSOCK(sock)); @@ -587,9 +643,13 @@ tcp_close_direct(isc_nmsocket_t *sock) { } } } - - isc_nmsocket_detach(&sock->server); - uv_close(&sock->uv_handle.handle, tcp_close_cb); + if (sock->timer_initialized) { + uv_close((uv_handle_t *)&sock->timer, timer_close_cb); + sock->timer_initialized = false; + } else { + isc_nmsocket_detach(&sock->server); + uv_close(&sock->uv_handle.handle, tcp_close_cb); + } } void diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c index 8e86a39474..a06d5f7b0f 100644 --- a/lib/isc/netmgr/tcpdns.c +++ b/lib/isc/netmgr/tcpdns.c @@ -47,8 +47,16 @@ dnslen(unsigned char* base) { return ((base[0] << 8) + (base[1])); } +/* + * Regular TCP buffer, should suffice in most cases. + */ #define NM_REG_BUF 4096 -#define NM_BIG_BUF 65536 +/* + * Two full DNS packets with lengths. + * netmgr receives 64k at most so there's no risk + * of overrun. + */ +#define NM_BIG_BUF (65535+2)*2 static inline void alloc_dnsbuf(isc_nmsocket_t *sock, size_t len) { REQUIRE(len <= NM_BIG_BUF); @@ -66,6 +74,23 @@ alloc_dnsbuf(isc_nmsocket_t *sock, size_t len) { } } +static void +timer_close_cb(uv_handle_t *handle) { + isc_nmsocket_t *sock = (isc_nmsocket_t *) handle->data; + INSIST(VALID_NMSOCK(sock)); + sock->timer_initialized = false; + atomic_store(&sock->closed, true); + isc_nmsocket_detach(&sock); +} + +static void +dnstcp_readtimeout(uv_timer_t *timer) { + isc_nmsocket_t *sock = (isc_nmsocket_t *) timer->data; + REQUIRE(VALID_NMSOCK(sock)); + isc_nmsocket_detach(&sock->outer); + uv_close((uv_handle_t*) &sock->timer, timer_close_cb); +} + /* * Accept callback for TCP-DNS connection */ @@ -94,77 +119,71 @@ dnslisten_acceptcb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { isc_nmsocket_attach(handle->sock, &dnssock->outer); dnssock->peer = handle->sock->peer; dnssock->iface = handle->sock->iface; + dnssock->read_timeout = 5000; + dnssock->tid = isc_nm_tid(); dnssock->closehandle_cb = resume_processing; + uv_timer_init(&dnssock->mgr->workers[isc_nm_tid()].loop, + &dnssock->timer); + dnssock->timer.data = dnssock; + dnssock->timer_initialized = true; + uv_timer_start(&dnssock->timer, dnstcp_readtimeout, + dnssock->read_timeout, 0); + isc_nm_read(handle, dnslisten_readcb, dnssock); } -static bool -connection_limit(isc_nmsocket_t *sock) { - int ah; +/* + * Process a single packet from the incoming buffer. + * + * Return ISC_R_SUCCESS and attach 'handlep' to a handle if something + * was processed; return ISC_R_NOMORE if there isn't a full message + * to be processed. + * + * The caller will need to unreference the handle. + */ +static isc_result_t +processbuffer(isc_nmsocket_t *dnssock, isc_nmhandle_t **handlep) { + size_t len; - REQUIRE(sock->type == isc_nm_tcpdnssocket && sock->outer != NULL); - - if (atomic_load(&sock->sequential)) { - /* - * We're already non-pipelining, so there's - * no need to check per-connection limits. - */ - return (false); - } - - LOCK(&sock->lock); - ah = sock->ah; - UNLOCK(&sock->lock); - - if (ah >= TCPDNS_CLIENTS_PER_CONN) { - atomic_store(&sock->overlimit, true); - isc_nm_pauseread(sock->outer); - return (true); - } - - return (false); -} - -/* Process all complete packets out of incoming buffer */ -static void -processbuffer(isc_nmsocket_t *dnssock) { REQUIRE(VALID_NMSOCK(dnssock)); + REQUIRE(handlep != NULL && *handlep == NULL); - /* While we have a complete packet in the buffer */ - while (dnssock->buf_len > 2 && - dnslen(dnssock->buf) <= dnssock->buf_len - 2 && - !connection_limit(dnssock)) - { + /* + * If we don't even have the length yet, we can't do + * anything. + */ + if (dnssock->buf_len < 2) { + return (ISC_R_NOMORE); + } + + /* + * Process the first packet from the buffer, leaving + * the rest (if any) for later. + */ + len = dnslen(dnssock->buf); + if (len <= dnssock->buf_len - 2) { isc_nmhandle_t *dnshandle = NULL; isc_region_t r2 = { .base = dnssock->buf + 2, - .length = dnslen(dnssock->buf) + .length = len }; - size_t len; dnshandle = isc__nmhandle_get(dnssock, NULL, NULL); - atomic_store(&dnssock->processing, true); dnssock->rcb.recv(dnshandle, &r2, dnssock->rcbarg); - /* - * If the recv callback wants to hold on to the - * handle, it needs to attach to it. - */ - isc_nmhandle_unref(dnshandle); - - len = dnslen(dnssock->buf) + 2; + len += 2; dnssock->buf_len -= len; if (len > 0) { memmove(dnssock->buf, dnssock->buf + len, dnssock->buf_len); } - /* Check here to make sure we do the processing at least once */ - if (atomic_load(&dnssock->processing)) { - return; - } + *handlep = dnshandle; + return (ISC_R_SUCCESS); } + + return (ISC_R_NOMORE); } /* @@ -174,8 +193,8 @@ processbuffer(isc_nmsocket_t *dnssock) { static void dnslisten_readcb(isc_nmhandle_t *handle, isc_region_t *region, void *arg) { isc_nmsocket_t *dnssock = (isc_nmsocket_t *) arg; - isc_sockaddr_t local; unsigned char *base = NULL; + bool done = false; size_t len; REQUIRE(VALID_NMSOCK(dnssock)); @@ -183,133 +202,63 @@ dnslisten_readcb(isc_nmhandle_t *handle, isc_region_t *region, void *arg) { if (region == NULL) { /* Connection closed */ - atomic_store(&dnssock->closed, true); - isc_nmsocket_detach(&dnssock->outer); - isc_nmsocket_detach(&dnssock); + isc__nm_tcpdns_close(dnssock); return; } - local = isc_nmhandle_localaddr(handle); - base = region->base; len = region->length; - /* - * We have something in the buffer, we need to glue it. - */ - if (dnssock->buf_len > 0) { - if (dnssock->buf_len == 1) { - /* Make sure we have the length */ - dnssock->buf[1] = base[0]; - dnssock->buf_len = 2; - base++; - len--; - } - - processbuffer(dnssock); + if (dnssock->buf_len + len > dnssock->buf_size) { + alloc_dnsbuf(dnssock, dnssock->buf_len + len); } + memmove(dnssock->buf + dnssock->buf_len, base, len); + dnssock->buf_len += len; - if (dnssock->buf_len > 0) { - size_t plen; + do { + isc_result_t result; + isc_nmhandle_t *dnshandle = NULL; - if (dnssock->buf_len == 1) { - /* Make sure we have the length */ - dnssock->buf[1] = base[0]; - dnssock->buf_len = 2; - base++; - len--; - } - - /* At this point we definitely have 2 bytes there. */ - plen = ISC_MIN(len, (dnslen(dnssock->buf) + 2 - - dnssock->buf_len)); - - if (dnssock->buf_len + plen > NM_BIG_BUF) { + result = processbuffer(dnssock, &dnshandle); + if (result != ISC_R_SUCCESS) { /* - * XXX: continuing to read will overrun the - * socket buffer. We may need to force the - * connection to close so the client will have - * to open a new one. + * There wasn't anything in the buffer to process. */ return; } - if (dnssock->buf_len + plen > dnssock->buf_size) { - alloc_dnsbuf(dnssock, dnssock->buf_len + plen); - } - - memmove(dnssock->buf + dnssock->buf_len, base, plen); - dnssock->buf_len += plen; - base += plen; - len -= plen; - - /* Do we have a complete packet in the buffer? */ - if (dnslen(dnssock->buf) >= dnssock->buf_len - 2 && - !connection_limit(dnssock)) - { - isc_nmhandle_t *dnshandle = NULL; - isc_region_t r2 = { - .base = dnssock->buf + 2, - .length = dnslen(dnssock->buf) - }; - - dnshandle = isc__nmhandle_get(dnssock, NULL, &local); - atomic_store(&dnssock->processing, true); - dnssock->rcb.recv(dnshandle, &r2, dnssock->rcbarg); - dnssock->buf_len = 0; - - /* - * If the recv callback wants to hold on to the - * handle, it needs to attach to it. - */ - isc_nmhandle_unref(dnshandle); - } - } - - /* - * At this point we've processed whatever was previously in the - * socket buffer. If there are more messages to be found in what - * we've read, and if we're either pipelining or not processing - * anything else currently, then we can process those messages now. - */ - while (len >= 2 && dnslen(base) <= len - 2 && - (!atomic_load(&dnssock->sequential) || - !atomic_load(&dnssock->processing)) && - !connection_limit(dnssock)) - { - isc_nmhandle_t *dnshandle = NULL; - isc_region_t r2 = { - .base = base + 2, - .length = dnslen(base) - }; - - len -= dnslen(base) + 2; - base += dnslen(base) + 2; - - dnshandle = isc__nmhandle_get(dnssock, NULL, &local); - atomic_store(&dnssock->processing, true); - dnssock->rcb.recv(dnshandle, &r2, dnssock->rcbarg); - /* - * If the recv callback wants to hold on to the - * handle, it needs to attach to it. + * We have a packet: stop timeout timers */ - isc_nmhandle_unref(dnshandle); - } + atomic_store(&dnssock->outer->processing, true); + uv_timer_stop(&dnssock->timer); - /* - * We have less than a full message remaining; it can be - * stored in the socket buffer for next time. - */ - if (len > 0) { - if (len > dnssock->buf_size) { - alloc_dnsbuf(dnssock, len); + if (dnssock->sequential) { + /* + * We're in sequential mode and we processed + * one packet, so we're done until the next read + * completes. + */ + isc_nm_pauseread(dnssock->outer); + done = true; + } else { + /* + * We're pipelining, so we now resume processing + * packets until the clients-per-connection limit + * is reached (as determined by the number of + * active handles on the socket). When the limit + * is reached, pause reading. + */ + if (atomic_load(&dnssock->ah) >= + TCPDNS_CLIENTS_PER_CONN) + { + isc_nm_pauseread(dnssock->outer); + done = true; + } } - INSIST(len <= dnssock->buf_size); - memmove(dnssock->buf, base, len); - dnssock->buf_len = len; - } + isc_nmhandle_unref(dnshandle); + } while (!done); } /* @@ -394,23 +343,64 @@ typedef struct tcpsend { static void resume_processing(void *arg) { isc_nmsocket_t *sock = (isc_nmsocket_t *) arg; + isc_result_t result; REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->tid == isc_nm_tid()); if (sock->type != isc_nm_tcpdnssocket || sock->outer == NULL) { return; } - /* - * If we're in sequential mode or over the - * clients-per-connection limit, the sock can - * resume reading now. - */ - if (atomic_load(&sock->overlimit) || atomic_load(&sock->sequential)) { - atomic_store(&sock->overlimit, false); - atomic_store(&sock->processing, false); - isc_nm_resumeread(sock->outer); + if (atomic_load(&sock->ah) == 0) { + /* Nothing is active; sockets can timeout now */ + atomic_store(&sock->outer->processing, false); + uv_timer_start(&sock->timer, dnstcp_readtimeout, + sock->read_timeout, 0); } + + /* + * For sequential sockets: Process what's in the buffer, or + * if there aren't any messages buffered, resume reading. + */ + if (sock->sequential) { + isc_nmhandle_t *handle = NULL; + + result = processbuffer(sock, &handle); + if (result == ISC_R_SUCCESS) { + atomic_store(&sock->outer->processing, true); + uv_timer_stop(&sock->timer); + isc_nmhandle_unref(handle); + } else if (sock->outer != NULL) { + isc_nm_resumeread(sock->outer); + } + + return; + } + + /* + * For pipelined sockets: If we're under the clients-per-connection + * limit, resume processing until we reach the limit again. + */ + do { + isc_nmhandle_t *dnshandle = NULL; + + result = processbuffer(sock, &dnshandle); + if (result != ISC_R_SUCCESS) { + /* + * Nothing in the buffer; resume reading. + */ + if (sock->outer != NULL) { + isc_nm_resumeread(sock->outer); + } + + break; + } + + uv_timer_stop(&sock->timer); + atomic_store(&sock->outer->processing, true); + isc_nmhandle_unref(dnshandle); + } while (atomic_load(&sock->ah) < TCPDNS_CLIENTS_PER_CONN); } static void @@ -422,19 +412,6 @@ tcpdnssend_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { ts->cb(ts->orighandle, result, ts->cbarg); isc_mem_put(ts->mctx, ts->region.base, ts->region.length); - /* - * The response was sent; if we're in sequential or overlimit - * mode, resume processing now. - */ - if (atomic_load(&ts->orighandle->sock->sequential) || - atomic_load(&ts->orighandle->sock->overlimit)) - { - atomic_store(&ts->orighandle->sock->processing, false); - atomic_store(&ts->orighandle->sock->overlimit, false); - processbuffer(ts->orighandle->sock); - isc_nm_resumeread(handle->sock); - } - isc_nmhandle_unref(ts->orighandle); isc_mem_putanddetach(&ts->mctx, ts, sizeof(*ts)); } @@ -483,12 +460,11 @@ isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region, return (isc__nm_tcp_send(t->handle, &t->region, tcpdnssend_cb, t)); } + void isc__nm_tcpdns_close(isc_nmsocket_t *sock) { if (sock->outer != NULL) { isc_nmsocket_detach(&sock->outer); } - - atomic_store(&sock->closed, true); - isc__nmsocket_prep_destroy(sock); + uv_close((uv_handle_t*) &sock->timer, timer_close_cb); } From 199bd6b62392d3c2111c92c1c3c9d35a5e83bd57 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Wed, 20 Nov 2019 22:33:35 +0100 Subject: [PATCH 2/9] netmgr: make TCP timeouts configurable - restore support for tcp-initial-timeout, tcp-idle-timeout, tcp-keepalive-timeout and tcp-advertised-timeout configuration options, which were ineffective previously. --- bin/named/server.c | 10 ++--- bin/tests/system/stop.pl | 2 +- lib/isc/include/isc/netmgr.h | 87 ++++++++++++++++++++++++++++-------- lib/isc/netmgr/netmgr-int.h | 18 ++++++++ lib/isc/netmgr/netmgr.c | 77 +++++++++++++++++++++++++------ lib/isc/netmgr/tcp.c | 11 +++-- lib/isc/netmgr/tcpdns.c | 31 ++++++++++--- lib/isc/win32/libisc.def.in | 4 ++ lib/ns/client.c | 8 ++-- lib/ns/include/ns/server.h | 20 --------- lib/ns/interfacemgr.c | 1 - lib/ns/server.c | 33 -------------- lib/ns/win32/libns.def | 2 - 13 files changed, 200 insertions(+), 104 deletions(-) diff --git a/bin/named/server.c b/bin/named/server.c index e2115df9ae..55663d515b 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -8470,8 +8470,8 @@ load_configuration(const char *filename, named_server_t *server, advertised = MAX_TCP_TIMEOUT; } - ns_server_settimeouts(named_g_server->sctx, - initial, idle, keepalive, advertised); + isc_nm_tcp_settimeouts(named_g_nm, initial, idle, + keepalive, advertised); /* * Configure sets of UDP query source ports. @@ -15405,8 +15405,8 @@ named_server_tcptimeouts(isc_lex_t *lex, isc_buffer_t **text) { if (ptr == NULL) return (ISC_R_UNEXPECTEDEND); - ns_server_gettimeouts(named_g_server->sctx, - &initial, &idle, &keepalive, &advertised); + isc_nm_tcp_gettimeouts(named_g_nm, &initial, &idle, + &keepalive, &advertised); /* Look for optional arguments. */ ptr = next_token(lex, NULL); @@ -15445,7 +15445,7 @@ named_server_tcptimeouts(isc_lex_t *lex, isc_buffer_t **text) { result = isc_task_beginexclusive(named_g_server->task); RUNTIME_CHECK(result == ISC_R_SUCCESS); - ns_server_settimeouts(named_g_server->sctx, initial, idle, + isc_nm_tcp_settimeouts(named_g_nm, initial, idle, keepalive, advertised); isc_task_endexclusive(named_g_server->task); diff --git a/bin/tests/system/stop.pl b/bin/tests/system/stop.pl index 1a78ade59f..a667d446fb 100644 --- a/bin/tests/system/stop.pl +++ b/bin/tests/system/stop.pl @@ -109,7 +109,7 @@ foreach my $name(@ans) { stop_signal($name, "TERM", 1); } -@ans = wait_for_servers(60, @ans); +@ans = wait_for_servers(1200, @ans); # Pass 3: SIGABRT foreach my $name (@ns) { diff --git a/lib/isc/include/isc/netmgr.h b/lib/isc/include/isc/netmgr.h index 1696f3cb8c..97bb1d6d76 100644 --- a/lib/isc/include/isc/netmgr.h +++ b/lib/isc/include/isc/netmgr.h @@ -113,8 +113,20 @@ isc_nmhandle_setdata(isc_nmhandle_t *handle, void *arg, isc_sockaddr_t isc_nmhandle_peeraddr(isc_nmhandle_t *handle); +/*%< + * Return the peer address for the given handle. + */ isc_sockaddr_t isc_nmhandle_localaddr(isc_nmhandle_t *handle); +/*%< + * Return the local address for the given handle. + */ + +isc_nm_t * +isc_nmhandle_netmgr(isc_nmhandle_t *handle); +/*%< + * Return a pointer to the netmgr object for the given handle. + */ typedef void (*isc_nm_recv_cb_t)(isc_nmhandle_t *handle, isc_region_t *region, void *cbarg); @@ -212,7 +224,7 @@ isc_result_t isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_cb_t cb, void *cbarg, size_t extrahandlesize, isc_quota_t *quota, - isc_nmsocket_t **rv); + isc_nmsocket_t **sockp); /*%< * Start listening for raw messages over the TCP interface 'iface', using * net manager 'mgr'. @@ -230,8 +242,8 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface, * quota. This allows us to enforce TCP client quota limits. * * NOTE: This is currently only called inside isc_nm_listentcpdns(), which - * creates a 'wrapper' socket that sends and receives DNS messages - - * prepended with a two-byte length field - and handles buffering. + * creates a 'wrapper' socket that sends and receives DNS messages + * prepended with a two-byte length field, and handles buffering. */ void @@ -250,8 +262,10 @@ isc_nm_listentcpdns(isc_nm_t *mgr, isc_nmiface_t *iface, * net manager 'mgr'. * * On success, 'sockp' will be updated to contain a new listening TCPDNS - * socket. This is a wrapper around a TCP socket, and handles DNS length - * processing. + * socket. This is a wrapper around a raw TCP socket, which sends and + * receives DNS messages via that socket. It handles message buffering + * and pipelining, and automatically prepends messages with a two-byte + * length field. * * When a complete DNS message is received on the socket, 'cb' will be * called with 'cbarg' as its argument. @@ -259,6 +273,8 @@ isc_nm_listentcpdns(isc_nm_t *mgr, isc_nmiface_t *iface, * When handles are allocated for the socket, 'extrasize' additional bytes * will be allocated along with the handle for an associated object * (typically ns_client). + * + * 'quota' is passed to isc_nm_listentcp() when opening the raw TCP socket. */ void @@ -270,25 +286,60 @@ isc_nm_tcpdns_stoplistening(isc_nmsocket_t *sock); void isc_nm_tcpdns_sequential(isc_nmhandle_t *handle); /*%< - * Disable pipelining on this connection. Each DNS packet - * will be only processed after the previous completes. + * Disable pipelining on this connection. Each DNS packet will be only + * processed after the previous completes. * - * The socket must be unpaused after the query is processed. - * This is done the response is sent, or if we're dropping the - * query, it will be done when a handle is fully dereferenced - * by calling the socket's closehandle_cb callback. + * The socket must be unpaused after the query is processed. This is done + * the response is sent, or if we're dropping the query, it will be done + * when a handle is fully dereferenced by calling the socket's + * closehandle_cb callback. * - * Note: This can only be run while a message is being processed; - * if it is run before any messages are read, no messages will - * be read. + * Note: This can only be run while a message is being processed; if it is + * run before any messages are read, no messages will be read. * - * Also note: once this has been set, it cannot be reversed for a - * given connection. + * Also note: once this has been set, it cannot be reversed for a given + * connection. + */ + +void +isc_nm_tcpdns_keepalive(isc_nmhandle_t *handle); +/*%< + * Enable keepalive on this connection. + * + * When keepalive is active, we switch to using the keepalive timeout + * to determine when to close a connection, rather than the idle timeout. + */ + +void +isc_nm_tcp_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle, + uint32_t keepalive, uint32_t advertised); +/*%< + * Sets the initial, idle, and keepalive timeout values to use for + * TCP connections, and the timeout value to advertise in responses using + * the EDNS TCP Keepalive option (which should ordinarily be the same + * as 'keepalive'), in tenths of seconds. + * + * Requires: + * \li 'mgr' is a valid netmgr. + */ + +void +isc_nm_tcp_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle, + uint32_t *keepalive, uint32_t *advertised); +/*%< + * Gets the initial, idle, keepalive, or advertised timeout values, + * in tenths of seconds. + * + * Any integer pointer parameter not set to NULL will be updated to + * contain the corresponding timeout value. + * + * Requires: + * \li 'mgr' is a valid netmgr. */ void isc_nm_maxudp(isc_nm_t *mgr, uint32_t maxudp); /*%< - * Simulate a broken firewall that blocks UDP messages larger - * than a given size. + * Simulate a broken firewall that blocks UDP messages larger than a given + * size. */ diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index 037ffa145f..4bea485800 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -243,6 +243,18 @@ struct isc_nm { * event or wait for the other one to finish if we want to pause. */ atomic_bool interlocked; + + /* + * Timeout values for TCP connections, coresponding to + * tcp-intiial-timeout, tcp-idle-timeout, tcp-keepalive-timeout, + * and tcp-advertised-timeout. Note that these are stored in + * milliseconds so they can be used directly with the libuv timer, + * but they are configured in tenths of seconds. + */ + uint32_t init; + uint32_t idle; + uint32_t keepalive; + uint32_t advertised; }; typedef enum isc_nmsocket_type { @@ -339,6 +351,12 @@ struct isc_nmsocket { */ atomic_bool readpaused; + /*% + * A TCP or TCPDNS socket has been set to use the keepalive + * timeout instead of the default idle timeout. + */ + atomic_bool keepalive; + /*% * 'spare' handles for that can be reused to avoid allocations, * for UDP. diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index e77b7c96f1..9a41171cae 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -94,6 +94,15 @@ isc_nm_start(isc_mem_t *mctx, uint32_t workers) { atomic_init(&mgr->paused, false); atomic_init(&mgr->interlocked, false); + /* + * Default TCP timeout values. + * May be updated by isc_nm_listentcp(). + */ + mgr->init = 30000; + mgr->idle = 30000; + mgr->keepalive = 30000; + mgr->advertised = 30000; + mgr->workers = isc_mem_get(mctx, workers * sizeof(isc__networker_t)); for (size_t i = 0; i < workers; i++) { int r; @@ -303,6 +312,41 @@ isc_nm_maxudp(isc_nm_t *mgr, uint32_t maxudp) { atomic_store(&mgr->maxudp, maxudp); } +void +isc_nm_tcp_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle, + uint32_t keepalive, uint32_t advertised) +{ + REQUIRE(VALID_NM(mgr)); + + mgr->init = init * 100; + mgr->idle = idle * 100; + mgr->keepalive = keepalive * 100; + mgr->advertised = advertised * 100; +} + +void +isc_nm_tcp_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle, + uint32_t *keepalive, uint32_t *advertised) +{ + REQUIRE(VALID_NM(mgr)); + + if (initial != NULL) { + *initial = mgr->init / 100; + } + + if (idle != NULL) { + *idle = mgr->idle / 100; + } + + if (keepalive != NULL) { + *keepalive = mgr->keepalive / 100; + } + + if (advertised != NULL) { + *advertised = mgr->advertised / 100; + } +} + /* * nm_thread is a single worker thread, that runs uv_run event loop * until asked to stop. @@ -712,7 +756,6 @@ isc__nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, .inactivehandles = isc_astack_new(mgr->mctx, 60), .inactivereqs = isc_astack_new(mgr->mctx, 60) }; - isc_nm_attach(mgr, &sock->mgr); sock->uv_handle.handle.data = sock; @@ -950,27 +993,27 @@ isc_nmhandle_unref(isc_nmhandle_t *handle) { if (sock->closehandle_cb != NULL) { if (sock->tid == isc_nm_tid()) { sock->closehandle_cb(sock); - - /* - * If we do this asynchronously then - * the async event will clean it up. - */ - if (sock->ah == 0 && - !atomic_load(&sock->active) && - !atomic_load(&sock->destroying)) - { - nmsocket_maybe_destroy(sock); - } } else { - isc__netievent_closecb_t * event = isc__nm_get_ievent(sock->mgr, netievent_closecb); isc_nmsocket_attach(sock, &event->sock); isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid], (isc__netievent_t *) event); + /* + * If we do this asynchronously then the async event + * will clean the socket, so just exit. + */ + return; } } + + if (atomic_load(&sock->ah) == 0 && + !atomic_load(&sock->active) && + !atomic_load(&sock->destroying)) + { + nmsocket_maybe_destroy(sock); + } } void * @@ -1012,6 +1055,14 @@ isc_nmhandle_localaddr(isc_nmhandle_t *handle) { return (handle->local); } +isc_nm_t * +isc_nmhandle_netmgr(isc_nmhandle_t *handle) { + REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(handle->sock)); + + return (handle->sock->mgr); +} + isc__nm_uvreq_t * isc__nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock) { isc__nm_uvreq_t *req = NULL; diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index 493d82a5aa..c400866db7 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -130,7 +130,7 @@ isc_result_t isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_cb_t cb, void *cbarg, size_t extrahandlesize, isc_quota_t *quota, - isc_nmsocket_t **rv) + isc_nmsocket_t **sockp) { isc__netievent_tcplisten_t *ievent = NULL; isc_nmsocket_t *nsock = NULL; @@ -163,7 +163,7 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface, ievent->sock = nsock; isc__nm_enqueue_ievent(&mgr->workers[nsock->tid], (isc__netievent_t *) ievent); - *rv = nsock; + *sockp = nsock; return (ISC_R_SUCCESS); } @@ -399,6 +399,11 @@ read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { INSIST(sock->rcb.recv != NULL); sock->rcb.recv(sock->tcphandle, ®ion, sock->rcbarg); + + sock->read_timeout = (atomic_load(&sock->keepalive) + ? sock->mgr->keepalive + : sock->mgr->idle); + if (sock->timer_initialized && sock->read_timeout != 0) { /* The timer will be updated */ uv_timer_start(&sock->timer, readtimeout_cb, @@ -485,7 +490,7 @@ accept_connection(isc_nmsocket_t *ssock) { handle = isc__nmhandle_get(csock, NULL, &local); INSIST(ssock->rcb.accept != NULL); - csock->read_timeout = 1000; + csock->read_timeout = ssock->mgr->init; ssock->rcb.accept(handle, ISC_R_SUCCESS, ssock->rcbarg); isc_nmsocket_detach(&csock); diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c index a06d5f7b0f..c3f87a3d4d 100644 --- a/lib/isc/netmgr/tcpdns.c +++ b/lib/isc/netmgr/tcpdns.c @@ -86,7 +86,9 @@ timer_close_cb(uv_handle_t *handle) { static void dnstcp_readtimeout(uv_timer_t *timer) { isc_nmsocket_t *sock = (isc_nmsocket_t *) timer->data; + REQUIRE(VALID_NMSOCK(sock)); + isc_nmsocket_detach(&sock->outer); uv_close((uv_handle_t*) &sock->timer, timer_close_cb); } @@ -119,7 +121,7 @@ dnslisten_acceptcb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { isc_nmsocket_attach(handle->sock, &dnssock->outer); dnssock->peer = handle->sock->peer; dnssock->iface = handle->sock->iface; - dnssock->read_timeout = 5000; + dnssock->read_timeout = handle->sock->mgr->init; dnssock->tid = isc_nm_tid(); dnssock->closehandle_cb = resume_processing; @@ -215,6 +217,10 @@ dnslisten_readcb(isc_nmhandle_t *handle, isc_region_t *region, void *arg) { memmove(dnssock->buf + dnssock->buf_len, base, len); dnssock->buf_len += len; + dnssock->read_timeout = (atomic_load(&dnssock->keepalive) + ? dnssock->mgr->keepalive + : dnssock->mgr->idle); + do { isc_result_t result; isc_nmhandle_t *dnshandle = NULL; @@ -233,7 +239,7 @@ dnslisten_readcb(isc_nmhandle_t *handle, isc_region_t *region, void *arg) { atomic_store(&dnssock->outer->processing, true); uv_timer_stop(&dnssock->timer); - if (dnssock->sequential) { + if (atomic_load(&dnssock->sequential)) { /* * We're in sequential mode and we processed * one packet, so we're done until the next read @@ -270,7 +276,7 @@ isc_result_t isc_nm_listentcpdns(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb, void *cbarg, size_t extrahandlesize, isc_quota_t *quota, - isc_nmsocket_t **rv) + isc_nmsocket_t **sockp) { /* A 'wrapper' socket object with outer set to true TCP socket */ isc_nmsocket_t *dnslistensock = @@ -291,7 +297,8 @@ isc_nm_listentcpdns(isc_nm_t *mgr, isc_nmiface_t *iface, quota, &dnslistensock->outer); atomic_store(&dnslistensock->listening, true); - *rv = dnslistensock; + *sockp = dnslistensock; + return (result); } @@ -331,6 +338,20 @@ isc_nm_tcpdns_sequential(isc_nmhandle_t *handle) { atomic_store(&handle->sock->sequential, true); } +void +isc_nm_tcpdns_keepalive(isc_nmhandle_t *handle) { + REQUIRE(VALID_NMHANDLE(handle)); + + if (handle->sock->type != isc_nm_tcpdnssocket || + handle->sock->outer == NULL) + { + return; + } + + atomic_store(&handle->sock->keepalive, true); + atomic_store(&handle->sock->outer->keepalive, true); +} + typedef struct tcpsend { isc_mem_t *mctx; isc_nmhandle_t *handle; @@ -363,7 +384,7 @@ resume_processing(void *arg) { * For sequential sockets: Process what's in the buffer, or * if there aren't any messages buffered, resume reading. */ - if (sock->sequential) { + if (atomic_load(&sock->sequential)) { isc_nmhandle_t *handle = NULL; result = processbuffer(sock, &handle); diff --git a/lib/isc/win32/libisc.def.in b/lib/isc/win32/libisc.def.in index 9b3a45d45e..35faea7d55 100644 --- a/lib/isc/win32/libisc.def.in +++ b/lib/isc/win32/libisc.def.in @@ -438,6 +438,7 @@ isc_netscope_pton isc_nmhandle_getdata isc_nmhandle_getextra isc_nmhandle_is_stream +isc_nmhandle_netmgr isc_nmhandle_localaddr isc_nmhandle_peeraddr isc_nmhandle_ref @@ -450,7 +451,10 @@ isc_nm_listenudp isc_nm_maxudp isc_nm_send isc_nm_start +isc_nm_tcp_gettimeouts +isc_nm_tcp_settimeouts isc_nmsocket_detach +isc_nm_tcpdns_keepalive isc_nm_tcpdns_sequential isc_nm_tcpdns_stoplistening isc_nm_tid diff --git a/lib/ns/client.c b/lib/ns/client.c index 189172f7e3..8cd8226549 100644 --- a/lib/ns/client.c +++ b/lib/ns/client.c @@ -1023,12 +1023,14 @@ ns_client_addopt(ns_client_t *client, dns_message_t *message, } if (TCP_CLIENT(client) && USEKEEPALIVE(client)) { isc_buffer_t buf; + uint32_t adv; INSIST(count < DNS_EDNSOPTIONS); + isc_nm_tcp_gettimeouts(isc_nmhandle_netmgr(client->handle), + NULL, NULL, NULL, &adv); isc_buffer_init(&buf, advtimo, sizeof(advtimo)); - isc_buffer_putuint16(&buf, - (uint16_t) client->sctx->advertisedtimo); + isc_buffer_putuint16(&buf, (uint16_t) adv); ednsopts[count].code = DNS_OPT_TCP_KEEPALIVE; ednsopts[count].length = 2; ednsopts[count].value = advtimo; @@ -2191,7 +2193,7 @@ get_clientmctx(ns_clientmgr_t *manager, isc_mem_t **mctxp) { #if CLIENT_NMCTXS > 0 LOCK(&manager->lock); - if (isc_nm_tid()>=0) { + if (isc_nm_tid() >= 0) { nextmctx = isc_nm_tid(); } else { nextmctx = manager->nextmctx++; diff --git a/lib/ns/include/ns/server.h b/lib/ns/include/ns/server.h index fa27bdd446..a8b232340e 100644 --- a/lib/ns/include/ns/server.h +++ b/lib/ns/include/ns/server.h @@ -92,11 +92,6 @@ struct ns_server { uint32_t options; unsigned int delay; - unsigned int initialtimo; - unsigned int idletimo; - unsigned int keepalivetimo; - unsigned int advertisedtimo; - dns_acl_t *blackholeacl; dns_acl_t *keepresporder; uint16_t udpsize; @@ -174,21 +169,6 @@ ns_server_setserverid(ns_server_t *sctx, const char *serverid); *\li 'sctx' is valid. */ -void -ns_server_settimeouts(ns_server_t *sctx, unsigned int initial, - unsigned int idle, unsigned int keepalive, - unsigned int advertised); -void -ns_server_gettimeouts(ns_server_t *sctx, unsigned int *initial, - unsigned int *idle, unsigned int *keepalive, - unsigned int *advertised); -/*%< - * Set/get tcp-timeout values. - * - * Requires: - *\li 'sctx' is valid. - */ - void ns_server_setoption(ns_server_t *sctx, unsigned int option, bool value); diff --git a/lib/ns/interfacemgr.c b/lib/ns/interfacemgr.c index 93a59dd9eb..8096d5ffef 100644 --- a/lib/ns/interfacemgr.c +++ b/lib/ns/interfacemgr.c @@ -457,7 +457,6 @@ static isc_result_t ns_interface_listentcp(ns_interface_t *ifp) { isc_result_t result; - /* Reserve space for an ns_client_t with the netmgr handle */ result = isc_nm_listentcpdns(ifp->mgr->nm, (isc_nmiface_t *) &ifp->addr, ns__client_request, ifp, diff --git a/lib/ns/server.c b/lib/ns/server.c index 4259cea796..7cf773d99d 100644 --- a/lib/ns/server.c +++ b/lib/ns/server.c @@ -87,11 +87,6 @@ ns_server_create(isc_mem_t *mctx, ns_matchview_t matchingview, CHECKFATAL(isc_stats_create(mctx, &sctx->tcpoutstats6, dns_sizecounter_out_max)); - sctx->initialtimo = 300; - sctx->idletimo = 300; - sctx->keepalivetimo = 300; - sctx->advertisedtimo = 300; - sctx->udpsize = 4096; sctx->transfer_tcp_message_size = 20480; @@ -216,34 +211,6 @@ ns_server_setserverid(ns_server_t *sctx, const char *serverid) { return (ISC_R_SUCCESS); } -void -ns_server_settimeouts(ns_server_t *sctx, unsigned int initial, - unsigned int idle, unsigned int keepalive, - unsigned int advertised) -{ - REQUIRE(SCTX_VALID(sctx)); - - sctx->initialtimo = initial; - sctx->idletimo = idle; - sctx->keepalivetimo = keepalive; - sctx->advertisedtimo = advertised; -} - -void -ns_server_gettimeouts(ns_server_t *sctx, unsigned int *initial, - unsigned int *idle, unsigned int *keepalive, - unsigned int *advertised) -{ - REQUIRE(SCTX_VALID(sctx)); - REQUIRE(initial != NULL && idle != NULL && - keepalive != NULL && advertised != NULL); - - *initial = sctx->initialtimo; - *idle = sctx->idletimo; - *keepalive = sctx->keepalivetimo; - *advertised = sctx->advertisedtimo; -} - void ns_server_setoption(ns_server_t *sctx, unsigned int option, bool value) diff --git a/lib/ns/win32/libns.def b/lib/ns/win32/libns.def index 2105a3e493..d06dbc92a3 100644 --- a/lib/ns/win32/libns.def +++ b/lib/ns/win32/libns.def @@ -87,10 +87,8 @@ ns_server_attach ns_server_create ns_server_detach ns_server_getoption -ns_server_gettimeouts ns_server_setoption ns_server_setserverid -ns_server_settimeouts ns_sortlist_addrorder1 ns_sortlist_addrorder2 ns_sortlist_byaddrsetup From 0260d31d2615be5167a4e636f593f7c73503f092 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Thu, 21 Nov 2019 17:08:06 -0800 Subject: [PATCH 3/9] netmgr: performance improvement - use memory pools for ievent and uvreq objects. --- lib/isc/netmgr/netmgr-int.h | 8 ++++- lib/isc/netmgr/netmgr.c | 61 ++++++++++++++++++++++--------------- 2 files changed, 44 insertions(+), 25 deletions(-) diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index 4bea485800..1979a01454 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -41,7 +41,6 @@ typedef struct isc__networker { uv_async_t async; /* async channel to send * data to this networker */ isc_mutex_t lock; - isc_mempool_t *mpool_bufs; isc_condition_t cond; bool paused; bool finished; @@ -231,6 +230,13 @@ struct isc_nm { isc_mutex_t lock; isc_condition_t wkstatecond; isc__networker_t *workers; + + isc_mempool_t *reqpool; + isc_mutex_t reqlock; + + isc_mempool_t *evpool; + isc_mutex_t evlock; + atomic_uint_fast32_t workers_running; atomic_uint_fast32_t workers_paused; atomic_uint_fast32_t maxudp; diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 9a41171cae..273200a6e3 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -96,13 +96,30 @@ isc_nm_start(isc_mem_t *mctx, uint32_t workers) { /* * Default TCP timeout values. - * May be updated by isc_nm_listentcp(). + * May be updated by isc_nm_tcptimeouts(). */ mgr->init = 30000; mgr->idle = 30000; mgr->keepalive = 30000; mgr->advertised = 30000; + isc_mutex_init(&mgr->reqlock); + isc_mempool_create(mgr->mctx, sizeof(isc__nm_uvreq_t), &mgr->reqpool); + isc_mempool_setname(mgr->reqpool, "nm_reqpool"); + isc_mempool_setmaxalloc(mgr->reqpool, 32768); + isc_mempool_setfreemax(mgr->reqpool, 32768); + isc_mempool_associatelock(mgr->reqpool, &mgr->reqlock); + isc_mempool_setfillcount(mgr->reqpool, 32); + + isc_mutex_init(&mgr->evlock); + isc_mempool_create(mgr->mctx, sizeof(isc__netievent_storage_t), + &mgr->evpool); + isc_mempool_setname(mgr->evpool, "nm_evpool"); + isc_mempool_setmaxalloc(mgr->evpool, 32768); + isc_mempool_setfreemax(mgr->evpool, 32768); + isc_mempool_associatelock(mgr->evpool, &mgr->evlock); + isc_mempool_setfillcount(mgr->evpool, 32); + mgr->workers = isc_mem_get(mctx, workers * sizeof(isc__networker_t)); for (size_t i = 0; i < workers; i++) { int r; @@ -123,7 +140,6 @@ isc_nm_start(isc_mem_t *mctx, uint32_t workers) { isc_mutex_init(&worker->lock); isc_condition_init(&worker->cond); - isc_mempool_create(mgr->mctx, 65536, &worker->mpool_bufs); worker->ievents = isc_queue_new(mgr->mctx, 128); /* @@ -177,17 +193,22 @@ nm_destroy(isc_nm_t **mgr0) { while ((ievent = (isc__netievent_t *) isc_queue_dequeue(mgr->workers[i].ievents)) != NULL) { - isc_mem_put(mgr->mctx, ievent, - sizeof(isc__netievent_storage_t)); + isc_mempool_put(mgr->evpool, ievent); } int r = uv_loop_close(&mgr->workers[i].loop); INSIST(r == 0); isc_queue_destroy(mgr->workers[i].ievents); - isc_mempool_destroy(&mgr->workers[i].mpool_bufs); } isc_condition_destroy(&mgr->wkstatecond); isc_mutex_destroy(&mgr->lock); + + isc_mempool_destroy(&mgr->evpool); + isc_mutex_destroy(&mgr->evlock); + + isc_mempool_destroy(&mgr->reqpool); + isc_mutex_destroy(&mgr->reqlock); + isc_mem_put(mgr->mctx, mgr->workers, mgr->nworkers * sizeof(isc__networker_t)); isc_mem_putanddetach(&mgr->mctx, mgr, sizeof(*mgr)); @@ -413,7 +434,7 @@ nm_thread(void *worker0) { * XXX: uv_run() in UV_RUN_DEFAULT mode returns * zero if there are still active uv_handles. * This shouldn't happen, but if it does, we just - * to keep checking until they're done. We nap for a + * keep checking until they're done. We nap for a * tenth of a second on each loop so as not to burn * CPU. (We do a conditional wait instead, but it * seems like overkill for this case.) @@ -440,29 +461,23 @@ nm_thread(void *worker0) { } /* - * async_cb is an universal callback for 'async' events sent to event loop. - * It's the only way to safely pass data to libuv event loop. We use a single - * async event and a lockless queue of 'isc__netievent_t' structures passed - * from other threads. + * async_cb is a universal callback for 'async' events sent to event loop. + * It's the only way to safely pass data to the libuv event loop. We use a + * single async event and a lockless queue of 'isc__netievent_t' structures + * passed from other threads. */ static void async_cb(uv_async_t *handle) { isc__networker_t *worker = (isc__networker_t *) handle->loop->data; isc__netievent_t *ievent; - /* - * We only try dequeue to not waste time, libuv guarantees - * that if someone calls uv_async_send -after- async_cb was called - * then async_cb will be called again, we won't loose any signals. - */ while ((ievent = (isc__netievent_t *) isc_queue_dequeue(worker->ievents)) != NULL) { switch (ievent->type) { case netievent_stop: uv_stop(handle->loop); - isc_mem_put(worker->mgr->mctx, ievent, - sizeof(isc__netievent_storage_t)); + isc_mempool_put(worker->mgr->evpool, ievent); return; case netievent_udplisten: isc__nm_async_udplisten(worker, ievent); @@ -508,10 +523,8 @@ async_cb(uv_async_t *handle) { void * isc__nm_get_ievent(isc_nm_t *mgr, isc__netievent_type type) { - isc__netievent_storage_t *event = - isc_mem_get(mgr->mctx, sizeof(isc__netievent_storage_t)); + isc__netievent_storage_t *event = isc_mempool_get(mgr->evpool); - /* XXX: Use a memory pool? */ *event = (isc__netievent_storage_t) { .ni.type = type }; @@ -520,7 +533,7 @@ isc__nm_get_ievent(isc_nm_t *mgr, isc__netievent_type type) { void isc__nm_put_ievent(isc_nm_t *mgr, void *ievent) { - isc_mem_put(mgr->mctx, ievent, sizeof(isc__netievent_storage_t)); + isc_mempool_put(mgr->evpool, ievent); } void @@ -612,7 +625,7 @@ nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree) { isc_astack_destroy(sock->inactivehandles); while ((uvreq = isc_astack_pop(sock->inactivereqs)) != NULL) { - isc_mem_put(sock->mgr->mctx, uvreq, sizeof(*uvreq)); + isc_mempool_put(sock->mgr->reqpool, uvreq); } isc_astack_destroy(sock->inactivereqs); @@ -1076,7 +1089,7 @@ isc__nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock) { } if (req == NULL) { - req = isc_mem_get(mgr->mctx, sizeof(isc__nm_uvreq_t)); + req = isc_mempool_get(mgr->reqpool); } *req = (isc__nm_uvreq_t) { @@ -1114,7 +1127,7 @@ isc__nm_uvreq_put(isc__nm_uvreq_t **req0, isc_nmsocket_t *sock) { if (!atomic_load(&sock->active) || !isc_astack_trypush(sock->inactivereqs, req)) { - isc_mem_put(sock->mgr->mctx, req, sizeof(isc__nm_uvreq_t)); + isc_mempool_put(sock->mgr->reqpool, req); } if (handle != NULL) { From c4ad0466d64dceae38e000d9fca7f4acd23f1370 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Thu, 21 Nov 2019 18:38:04 -0800 Subject: [PATCH 4/9] netmgr: log TCP connection errors --- lib/isc/include/isc/log.h | 3 ++- lib/isc/log.c | 1 + lib/isc/netmgr/tcp.c | 9 +++++++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/isc/include/isc/log.h b/lib/isc/include/isc/log.h index d2d2886161..2251a42837 100644 --- a/lib/isc/include/isc/log.h +++ b/lib/isc/include/isc/log.h @@ -177,7 +177,8 @@ LIBISC_EXTERNAL_DATA extern isc_logmodule_t isc_modules[]; #define ISC_LOGMODULE_INTERFACE (&isc_modules[2]) #define ISC_LOGMODULE_TIMER (&isc_modules[3]) #define ISC_LOGMODULE_FILE (&isc_modules[4]) -#define ISC_LOGMODULE_OTHER (&isc_modules[5]) +#define ISC_LOGMODULE_NETMGR (&isc_modules[5]) +#define ISC_LOGMODULE_OTHER (&isc_modules[6]) ISC_LANG_BEGINDECLS diff --git a/lib/isc/log.c b/lib/isc/log.c index 0af599f8c2..3d60ca2fad 100644 --- a/lib/isc/log.c +++ b/lib/isc/log.c @@ -192,6 +192,7 @@ LIBISC_EXTERNAL_DATA isc_logmodule_t isc_modules[] = { { "interface", 0 }, { "timer", 0 }, { "file", 0 }, + { "netmgr", 0 }, { "other", 0 }, { NULL, 0 } }; diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index c400866db7..56e8600f57 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -500,15 +501,19 @@ accept_connection(isc_nmsocket_t *ssock) { static void tcp_connection_cb(uv_stream_t *server, int status) { isc_nmsocket_t *ssock = server->data; - isc_result_t result = accept_connection(ssock); + isc_result_t result; UNUSED(status); + result = accept_connection(ssock); if (result != ISC_R_SUCCESS) { if (result == ISC_R_QUOTA || result == ISC_R_SOFTQUOTA) { ssock->overquota = true; } - /* TODO: Log the error. */ + isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR, + "TCP connection failed: %s", + isc_result_totext(result)); } } From 37354ee225314862a493bc64ac56618327233166 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Fri, 22 Nov 2019 13:19:45 +0100 Subject: [PATCH 5/9] netmgr: fix TCP backlog and client quota count - add support for TCP backlog, using the value provided by config. - don't attach to TCP client quota for listening sockets, only connected sockets. --- bin/tests/system/tcp/tests.sh | 2 +- lib/isc/include/isc/netmgr.h | 6 ++++-- lib/isc/netmgr/netmgr-int.h | 15 +++++++++++++++ lib/isc/netmgr/netmgr.c | 2 ++ lib/isc/netmgr/tcp.c | 23 ++++++++++------------- lib/isc/netmgr/tcpdns.c | 5 +++-- lib/ns/interfacemgr.c | 1 + 7 files changed, 36 insertions(+), 18 deletions(-) diff --git a/bin/tests/system/tcp/tests.sh b/bin/tests/system/tcp/tests.sh index a3ce32d28b..2190ec5f40 100644 --- a/bin/tests/system/tcp/tests.sh +++ b/bin/tests/system/tcp/tests.sh @@ -115,7 +115,7 @@ n=$((n + 1)) echo_i "TCP high-water: check initial statistics ($n)" ret=0 refresh_tcp_stats -assert_int_equal "${TCP_CUR}" 1 "current TCP clients count" || ret=1 +assert_int_equal "${TCP_CUR}" 0 "current TCP clients count" || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status + ret)) diff --git a/lib/isc/include/isc/netmgr.h b/lib/isc/include/isc/netmgr.h index 97bb1d6d76..82d32f0dc6 100644 --- a/lib/isc/include/isc/netmgr.h +++ b/lib/isc/include/isc/netmgr.h @@ -223,7 +223,8 @@ isc_nm_send(isc_nmhandle_t *handle, isc_region_t *region, isc_result_t isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_cb_t cb, void *cbarg, - size_t extrahandlesize, isc_quota_t *quota, + size_t extrahandlesize, int backlog, + isc_quota_t *quota, isc_nmsocket_t **sockp); /*%< * Start listening for raw messages over the TCP interface 'iface', using @@ -255,7 +256,8 @@ isc_nm_tcp_stoplistening(isc_nmsocket_t *sock); isc_result_t isc_nm_listentcpdns(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb, void *arg, - size_t extrahandlesize, isc_quota_t *quota, + size_t extrahandlesize, int backlog, + isc_quota_t *quota, isc_nmsocket_t **sockp); /*%< * Start listening for DNS messages over the TCP interface 'iface', using diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index 1979a01454..0844cdcbb7 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -286,8 +286,20 @@ struct isc_nmsocket { isc_nmsocket_type type; isc_nm_t *mgr; isc_nmsocket_t *parent; + + /* + * quota is the TCP client, attached when a TCP connection + * is established. pquota is a non-attached pointer to the + * TCP client quota, stored in listening sockets but only + * attached in connected sockets. + */ isc_quota_t *quota; + isc_quota_t *pquota; bool overquota; + + /* + * TCP read timeout timer. + */ uv_timer_t timer; bool timer_initialized; uint64_t read_timeout; @@ -307,6 +319,9 @@ struct isc_nmsocket { /*% extra data allocated at the end of each isc_nmhandle_t */ size_t extrahandlesize; + /*% TCP backlog */ + int backlog; + /*% libuv data */ uv_os_sock_t fd; union uv_any_handle uv_handle; diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 273200a6e3..d7699e6d97 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -617,6 +617,8 @@ nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree) { isc_quota_detach(&sock->quota); } + sock->pquota = NULL; + if (sock->timer_initialized) { uv_close((uv_handle_t *)&sock->timer, NULL); sock->timer_initialized = false; diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index 56e8600f57..5283740f5e 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -130,7 +130,8 @@ tcp_connect_cb(uv_connect_t *uvreq, int status) { isc_result_t isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_cb_t cb, void *cbarg, - size_t extrahandlesize, isc_quota_t *quota, + size_t extrahandlesize, int backlog, + isc_quota_t *quota, isc_nmsocket_t **sockp) { isc__netievent_tcplisten_t *ievent = NULL; @@ -144,15 +145,13 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface, nsock->rcb.accept = cb; nsock->rcbarg = cbarg; nsock->extrahandlesize = extrahandlesize; + nsock->backlog = backlog; if (quota != NULL) { /* - * We need to force it to make sure we get it attached. - * An example failure mode would be server under attack - * reconfiguring interfaces - that might cause weak attach - * to fail and leave this listening socket without limits. - * We can ignore the result. + * We don't attach to quota, just assign - to avoid + * increasing quota unnecesarily. */ - isc_quota_force(quota, &nsock->quota); + nsock->pquota = quota; } nsock->tid = isc_random_uniform(mgr->nworkers); @@ -185,7 +184,7 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ievent0) { } uv_tcp_bind(&sock->uv_handle.tcp, &sock->iface->addr.type.sa, 0); - r = uv_listen((uv_stream_t *) &sock->uv_handle.tcp, 10, + r = uv_listen((uv_stream_t *) &sock->uv_handle.tcp, sock->backlog, tcp_connection_cb); if (r != 0) { return; @@ -219,9 +218,7 @@ stoplistening_cb(uv_handle_t *handle) { SIGNAL(&sock->cond); UNLOCK(&sock->lock); - if (sock->quota != NULL) { - isc_quota_detach(&sock->quota); - } + sock->pquota = NULL; isc_nmsocket_detach(&sock); } @@ -446,8 +443,8 @@ accept_connection(isc_nmsocket_t *ssock) { return (ISC_R_CANCELED); } - if (ssock->quota != NULL) { - result = isc_quota_attach(ssock->quota, "a); + if (ssock->pquota != NULL) { + result = isc_quota_attach(ssock->pquota, "a); if (result != ISC_R_SUCCESS) { return (result); } diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c index c3f87a3d4d..d75fdda943 100644 --- a/lib/isc/netmgr/tcpdns.c +++ b/lib/isc/netmgr/tcpdns.c @@ -275,7 +275,8 @@ dnslisten_readcb(isc_nmhandle_t *handle, isc_region_t *region, void *arg) { isc_result_t isc_nm_listentcpdns(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb, void *cbarg, - size_t extrahandlesize, isc_quota_t *quota, + size_t extrahandlesize, int backlog, + isc_quota_t *quota, isc_nmsocket_t **sockp) { /* A 'wrapper' socket object with outer set to true TCP socket */ @@ -293,7 +294,7 @@ isc_nm_listentcpdns(isc_nm_t *mgr, isc_nmiface_t *iface, /* We set dnslistensock->outer to a true listening socket */ result = isc_nm_listentcp(mgr, iface, dnslisten_acceptcb, - dnslistensock, extrahandlesize, + dnslistensock, extrahandlesize, backlog, quota, &dnslistensock->outer); atomic_store(&dnslistensock->listening, true); diff --git a/lib/ns/interfacemgr.c b/lib/ns/interfacemgr.c index 8096d5ffef..2dd03a0eee 100644 --- a/lib/ns/interfacemgr.c +++ b/lib/ns/interfacemgr.c @@ -461,6 +461,7 @@ ns_interface_listentcp(ns_interface_t *ifp) { (isc_nmiface_t *) &ifp->addr, ns__client_request, ifp, sizeof(ns_client_t), + ifp->mgr->backlog, &ifp->mgr->sctx->tcpquota, &ifp->tcplistensocket); if (result != ISC_R_SUCCESS) { From d6c5052f7e48971c537cdfc985fcbc8ba7a95de6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Fri, 22 Nov 2019 14:13:19 +0100 Subject: [PATCH 6/9] netmgr: actively close all sockets when shutting down server without this change, named could sometimes lag for a while on shutdown while it waited for open TCP connections to time out. --- lib/isc/netmgr/netmgr-int.h | 25 +++++++++++++++++++------ lib/isc/netmgr/netmgr.c | 30 ++++++++++++++++++++++++++++++ lib/isc/netmgr/tcp.c | 7 +++++++ 3 files changed, 56 insertions(+), 6 deletions(-) diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index 0844cdcbb7..f2aa88af1a 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -116,12 +116,9 @@ typedef enum isc__netievent_type { netievent_tcpstoplisten, netievent_tcpclose, netievent_closecb, + netievent_shutdown, } isc__netievent_type; -typedef struct isc__netievent_stop { - isc__netievent_type type; -} isc__netievent_stop_t; - /* * We have to split it because we can read and write on a socket * simultaneously. @@ -209,10 +206,13 @@ typedef struct isc__netievent { isc__netievent_type type; } isc__netievent_t; +typedef isc__netievent_t isc__netievent_shutdown_t; +typedef isc__netievent_t isc__netievent_stop_t; + typedef union { isc__netievent_t ni; - isc__netievent_stop_t nis; - isc__netievent_udplisten_t niul; + isc__netievent__socket_t nis; + isc__netievent__socket_req_t nisr; isc__netievent_udpsend_t nius; } isc__netievent_storage_t; @@ -523,6 +523,13 @@ isc__nm_async_closecb(isc__networker_t *worker, isc__netievent_t *ievent0); * Issue a 'handle closed' callback on the socket. */ +void +isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ievent0); +/*%< + * Walk through all uv handles, get the underlying sockets and issue + * close on them. + */ + isc_result_t isc__nm_udp_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb, void *cbarg); @@ -555,6 +562,12 @@ isc__nm_tcp_close(isc_nmsocket_t *sock); * Close a TCP socket. */ +void +isc__nm_tcp_shutdown(isc_nmsocket_t *sock); +/*%< + * Called on shutdown to close and clean up a listening TCP socket. + */ + void isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ievent0); void diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index d7699e6d97..2b02a54734 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -309,6 +309,12 @@ isc_nm_destroy(isc_nm_t **mgr0) { mgr = *mgr0; *mgr0 = NULL; + for (size_t i = 0; i < mgr->nworkers; i++) { + isc__netievent_t *event = NULL; + event = isc__nm_get_ievent(mgr, netievent_shutdown); + isc__nm_enqueue_ievent(&mgr->workers[i], event); + } + /* * Wait for the manager to be dereferenced elsehwere. */ @@ -512,6 +518,9 @@ async_cb(uv_async_t *handle) { case netievent_closecb: isc__nm_async_closecb(worker, ievent); break; + case netievent_shutdown: + isc__nm_async_shutdown(worker, ievent); + break; default: INSIST(0); ISC_UNREACHABLE(); @@ -1174,6 +1183,27 @@ isc__nm_async_closecb(isc__networker_t *worker, isc__netievent_t *ievent0) { isc_nmsocket_detach(&ievent->sock); } +static void +shutdown_walk_cb(uv_handle_t *handle, void *arg) { + isc_nmsocket_t *sock = NULL; + + UNUSED(arg); + + switch(handle->type) { + case UV_TCP: + isc__nm_tcp_shutdown((isc_nmsocket_t *) handle->data); + break; + default: + break; + } +} + +void +isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ievent0) { + UNUSED(ievent0); + uv_walk(&worker->loop, shutdown_walk_cb, NULL); +} + bool isc__nm_acquire_interlocked(isc_nm_t *mgr) { LOCK(&mgr->lock); diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index 5283740f5e..bfe4af956d 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -688,3 +688,10 @@ isc__nm_async_tcpclose(isc__networker_t *worker, isc__netievent_t *ievent0) { tcp_close_direct(ievent->sock); } + +void +isc__nm_tcp_shutdown(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + + sock->rcb.recv(sock->tcphandle, NULL, sock->rcbarg); +} From 00333a5c971499f63837fb1319f0d318efbbdfbb Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Fri, 22 Nov 2019 15:57:42 -0800 Subject: [PATCH 7/9] netmgr: add shutdown function - new function isc_nm_shutdown() shuts down all active TCP connections, but does not destroy the netmgr. --- bin/named/main.c | 14 +++++++++++- lib/isc/include/isc/netmgr.h | 11 +++++++++- lib/isc/netmgr/netmgr-int.h | 12 ++++++----- lib/isc/netmgr/netmgr.c | 42 ++++++++++++++++++++---------------- lib/isc/netmgr/tcp.c | 8 +++++-- lib/isc/win32/libisc.def.in | 1 + 6 files changed, 61 insertions(+), 27 deletions(-) diff --git a/bin/named/main.c b/bin/named/main.c index d707916a7c..1716de3532 100644 --- a/bin/named/main.c +++ b/bin/named/main.c @@ -939,11 +939,23 @@ create_managers(void) { static void destroy_managers(void) { /* - * isc_taskmgr_destroy() will block until all tasks have exited, + * isc_nm_closedown() closes all active connections, freeing + * attached clients and other resources and preventing new + * connections from being established, but it not does not + * stop all processing or destroy the netmgr yet. + */ + isc_nm_closedown(named_g_nm); + + /* + * isc_taskmgr_destroy() will block until all tasks have exited. */ isc_taskmgr_destroy(&named_g_taskmgr); isc_timermgr_destroy(&named_g_timermgr); isc_socketmgr_destroy(&named_g_socketmgr); + + /* + * At this point is safe to destroy the netmgr. + */ isc_nm_destroy(&named_g_nm); } diff --git a/lib/isc/include/isc/netmgr.h b/lib/isc/include/isc/netmgr.h index 82d32f0dc6..157961609f 100644 --- a/lib/isc/include/isc/netmgr.h +++ b/lib/isc/include/isc/netmgr.h @@ -46,7 +46,16 @@ isc_nm_destroy(isc_nm_t **mgr0); * for all other references to be gone. */ -/* Return thread id of current thread, or ISC_NETMGR_TID_UNKNOWN */ +void +isc_nm_closedown(isc_nm_t *mgr); +/*%< + * Close down all active connections, freeing associated resources; + * prevent new connections from being established. This can optionally + * be called prior to shutting down the netmgr, to stop all processing + * before shutting down the task manager. + */ + +/* Return thread ID of current thread, or ISC_NETMGR_TID_UNKNOWN */ int isc_nm_tid(void); diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index f2aa88af1a..36ebd3aaa1 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -182,7 +182,6 @@ typedef isc__netievent__socket_t isc__netievent_tcpstoplisten_t; typedef isc__netievent__socket_t isc__netievent_tcpclose_t; typedef isc__netievent__socket_t isc__netievent_startread_t; typedef isc__netievent__socket_t isc__netievent_pauseread_t; -typedef isc__netievent__socket_t isc__netievent_resumeread_t; typedef isc__netievent__socket_t isc__netievent_closecb_t; typedef struct isc__netievent__socket_req { @@ -242,6 +241,12 @@ struct isc_nm { atomic_uint_fast32_t maxudp; atomic_bool paused; + /* + * Acive connections are being closed and new connections are + * no longer allowed. + */ + atomic_bool closing; + /* * A worker is actively waiting for other workers, for example to * stop listening; that means no other thread can do the same thing @@ -582,15 +587,12 @@ isc__nm_async_startread(isc__networker_t *worker, isc__netievent_t *ievent0); void isc__nm_async_pauseread(isc__networker_t *worker, isc__netievent_t *ievent0); void -isc__nm_async_resumeread(isc__networker_t *worker, isc__netievent_t *ievent0); -void isc__nm_async_tcpclose(isc__networker_t *worker, isc__netievent_t *ievent0); /*%< * Callback handlers for asynchronous TCP events (connect, listen, - * stoplisten, send, read, pauseread, resumeread, close). + * stoplisten, send, read, pause, close). */ - isc_result_t isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb, void *cbarg); diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 2b02a54734..32569093a4 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -297,26 +297,34 @@ isc_nm_detach(isc_nm_t **mgr0) { } } - void -isc_nm_destroy(isc_nm_t **mgr0) { - isc_nm_t *mgr = NULL; - int references; - - REQUIRE(mgr0 != NULL); - REQUIRE(VALID_NM(*mgr0)); - - mgr = *mgr0; - *mgr0 = NULL; +isc_nm_closedown(isc_nm_t *mgr) { + REQUIRE(VALID_NM(mgr)); + atomic_store(&mgr->closing, true); for (size_t i = 0; i < mgr->nworkers; i++) { isc__netievent_t *event = NULL; event = isc__nm_get_ievent(mgr, netievent_shutdown); isc__nm_enqueue_ievent(&mgr->workers[i], event); } +} + +void +isc_nm_destroy(isc_nm_t **mgr0) { + isc_nm_t *mgr = NULL; + + REQUIRE(mgr0 != NULL); + REQUIRE(VALID_NM(*mgr0)); + + mgr = *mgr0; /* - * Wait for the manager to be dereferenced elsehwere. + * Close active connections. + */ + isc_nm_closedown(mgr); + + /* + * Wait for the manager to be dereferenced elsewhere. */ while (isc_refcount_current(&mgr->references) > 1) { #ifdef WIN32 @@ -325,11 +333,11 @@ isc_nm_destroy(isc_nm_t **mgr0) { usleep(1000000); #endif } - references = isc_refcount_decrement(&mgr->references); - INSIST(references > 0); - if (references == 1) { - nm_destroy(&mgr); - } + + /* + * Detach final reference. + */ + isc_nm_detach(mgr0); } void @@ -1185,8 +1193,6 @@ isc__nm_async_closecb(isc__networker_t *worker, isc__netievent_t *ievent0) { static void shutdown_walk_cb(uv_handle_t *handle, void *arg) { - isc_nmsocket_t *sock = NULL; - UNUSED(arg); switch(handle->type) { diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index bfe4af956d..75dcd3a7e2 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -438,7 +438,9 @@ accept_connection(isc_nmsocket_t *ssock) { REQUIRE(VALID_NMSOCK(ssock)); REQUIRE(ssock->tid == isc_nm_tid()); - if (!atomic_load_relaxed(&ssock->active)) { + if (!atomic_load_relaxed(&ssock->active) || + atomic_load_relaxed(&ssock->mgr->closing)) + { /* We're closing, bail */ return (ISC_R_CANCELED); } @@ -693,5 +695,7 @@ void isc__nm_tcp_shutdown(isc_nmsocket_t *sock) { REQUIRE(VALID_NMSOCK(sock)); - sock->rcb.recv(sock->tcphandle, NULL, sock->rcbarg); + if (sock->type == isc_nm_tcpsocket && sock->tcphandle != NULL) { + sock->rcb.recv(sock->tcphandle, NULL, sock->rcbarg); + } } diff --git a/lib/isc/win32/libisc.def.in b/lib/isc/win32/libisc.def.in index 35faea7d55..e65855f187 100644 --- a/lib/isc/win32/libisc.def.in +++ b/lib/isc/win32/libisc.def.in @@ -444,6 +444,7 @@ isc_nmhandle_peeraddr isc_nmhandle_ref isc_nmhandle_setdata isc_nmhandle_unref +isc_nm_closedown isc_nm_destroy isc_nm_detach isc_nm_listentcpdns From d484b66ae1355a20b1daf461989e26dac54b3124 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Fri, 22 Nov 2019 12:27:23 -0800 Subject: [PATCH 8/9] improve system tests - increase prefetch test timing tolerance. - remove five-second pause and explicit connection closing in tcp test as they are no longer necessary. --- bin/tests/system/resolver/ns5/named.conf.in | 1 + bin/tests/system/resolver/tests.sh | 6 +++--- bin/tests/system/tcp/tests.sh | 4 ---- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/bin/tests/system/resolver/ns5/named.conf.in b/bin/tests/system/resolver/ns5/named.conf.in index b1d431e703..a98290903b 100644 --- a/bin/tests/system/resolver/ns5/named.conf.in +++ b/bin/tests/system/resolver/ns5/named.conf.in @@ -22,6 +22,7 @@ options { recursion yes; dnssec-validation yes; querylog yes; + prefetch 3 9; }; server 10.53.0.7 { diff --git a/bin/tests/system/resolver/tests.sh b/bin/tests/system/resolver/tests.sh index b9e5180f25..11a359be4d 100755 --- a/bin/tests/system/resolver/tests.sh +++ b/bin/tests/system/resolver/tests.sh @@ -452,7 +452,7 @@ n=`expr $n + 1` echo_i "check prefetch (${n})" ret=0 $DIG $DIGOPTS @10.53.0.5 fetch.tld txt > dig.out.1.${n} || ret=1 -ttl1=`awk '/"A" "short" "ttl"/ { print $2 - 2 }' dig.out.1.${n}` +ttl1=`awk '/"A" "short" "ttl"/ { print $2 - 3 }' dig.out.1.${n}` # sleep so we are in prefetch range sleep ${ttl1:-0} # trigger prefetch @@ -470,7 +470,7 @@ n=`expr $n + 1` echo_i "check prefetch of validated DS's RRSIG TTL is updated (${n})" ret=0 $DIG $DIGOPTS +dnssec @10.53.0.5 ds.example.net ds > dig.out.1.${n} || ret=1 -dsttl1=`awk '$4 == "DS" && $7 == "2" { print $2 - 2 }' dig.out.1.${n}` +dsttl1=`awk '$4 == "DS" && $7 == "2" { print $2 - 3 }' dig.out.1.${n}` # sleep so we are in prefetch range sleep ${dsttl1:-0} # trigger prefetch @@ -517,7 +517,7 @@ n=`expr $n + 1` echo_i "check prefetch qtype * (${n})" ret=0 $DIG $DIGOPTS @10.53.0.5 fetchall.tld any > dig.out.1.${n} || ret=1 -ttl1=`awk '/"A" "short" "ttl"/ { print $2 - 2 }' dig.out.1.${n}` +ttl1=`awk '/"A" "short" "ttl"/ { print $2 - 3 }' dig.out.1.${n}` # sleep so we are in prefetch range sleep ${ttl1:-0} # trigger prefetch diff --git a/bin/tests/system/tcp/tests.sh b/bin/tests/system/tcp/tests.sh index 2190ec5f40..83191c2193 100644 --- a/bin/tests/system/tcp/tests.sh +++ b/bin/tests/system/tcp/tests.sh @@ -166,12 +166,8 @@ check_stats_limit() { assert_int_equal "${TCP_HIGH}" "${TCP_LIMIT}" "TCP high-water value" || return 1 } retry 2 check_stats_limit || ret=1 -close_connections $((TCP_LIMIT + 1)) || : if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status + ret)) -# wait for connections to close -sleep 5 - echo_i "exit status: $status" [ $status -eq 0 ] || exit 1 From 8bdb5f586adc7ae5b9a8a0e818a62f16f24198cc Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Fri, 22 Nov 2019 12:26:42 -0800 Subject: [PATCH 9/9] CHANGES --- CHANGES | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGES b/CHANGES index 282cb11d05..e1996edb4f 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,8 @@ +5325. [bug] Addressed several issues with TCP connections in + the netmgr: restored support for TCP connection + timeouts, restored TCP backlog support, actively + close all open sockets during shutdown. [GL #1312] + 5324. [bug] Change the category of some log messages from general to the more appopriate catergory of xfer-in. [GL #1394]