From 9e7617cc84f465769be1a3f426f30cd516220902 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Thu, 3 Jan 2019 14:17:43 +0100 Subject: [PATCH 1/7] fix enforcement of tcp-clients (v1) tcp-clients settings could be exceeded in some cases by creating more and more active TCP clients that are over the set quota limit, which in the end could lead to a DoS attack by e.g. exhaustion of file descriptors. If TCP client we're closing went over the quota (so it's not attached to a quota) mark it as mortal - so that it will be destroyed and not set up to listen for new connections - unless it's the last client for a specific interface. (cherry picked from commit eafcff07c25bdbe038ae1e4b6660602a080b9395) --- lib/ns/client.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/ns/client.c b/lib/ns/client.c index 4a43fb6d32..72a9ebd0af 100644 --- a/lib/ns/client.c +++ b/lib/ns/client.c @@ -441,8 +441,19 @@ exit_check(ns_client_t *client) { isc_socket_detach(&client->tcpsocket); } - if (client->tcpquota != NULL) + if (client->tcpquota != NULL) { isc_quota_detach(&client->tcpquota); + } else { + /* + * We went over quota with this client, we don't + * want to restart listening unless this is the + * last client on this interface, which is + * checked later. + */ + if (TCP_CLIENT(client)) { + client->mortal = true; + } + } if (client->timerset) { (void)isc_timer_reset(client->timer, From d7e84cee0bd7957a0707b86d47c29de4b798d350 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Fri, 4 Jan 2019 12:50:51 +0100 Subject: [PATCH 2/7] tcp-clients could still be exceeded (v2) the TCP client quota could still be ineffective under some circumstances. this change: - improves quota accounting to ensure that TCP clients are properly limited, while still guaranteeing that at least one client is always available to serve TCP connections on each interface. - uses more descriptive names and removes one (ntcptarget) that was no longer needed - adds comments (cherry picked from commit 9e74969f85329fe26df2fad390468715215e2edd) --- lib/ns/client.c | 304 ++++++++++++++++++++++++------- lib/ns/include/ns/client.h | 14 +- lib/ns/include/ns/interfacemgr.h | 11 +- lib/ns/interfacemgr.c | 8 +- 4 files changed, 262 insertions(+), 75 deletions(-) diff --git a/lib/ns/client.c b/lib/ns/client.c index 72a9ebd0af..4042030b49 100644 --- a/lib/ns/client.c +++ b/lib/ns/client.c @@ -243,7 +243,7 @@ static void ns_client_dumpmessage(ns_client_t *client, const char *reason); static isc_result_t get_client(ns_clientmgr_t *manager, ns_interface_t *ifp, dns_dispatch_t *disp, bool tcp); static isc_result_t get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, - isc_socket_t *sock); + isc_socket_t *sock, ns_client_t *oldclient); static void compute_cookie(ns_client_t *client, uint32_t when, uint32_t nonce, const unsigned char *secret, isc_buffer_t *buf); @@ -425,8 +425,11 @@ exit_check(ns_client_t *client) { */ INSIST(client->recursionquota == NULL); INSIST(client->newstate <= NS_CLIENTSTATE_READY); - if (client->nreads > 0) + + if (client->nreads > 0) { dns_tcpmsg_cancelread(&client->tcpmsg); + } + if (client->nreads != 0) { /* Still waiting for read cancel completion. */ return (true); @@ -436,25 +439,58 @@ exit_check(ns_client_t *client) { dns_tcpmsg_invalidate(&client->tcpmsg); client->tcpmsg_valid = false; } + if (client->tcpsocket != NULL) { CTRACE("closetcp"); isc_socket_detach(&client->tcpsocket); + + if (client->tcpactive) { + LOCK(&client->interface->lock); + INSIST(client->interface->ntcpactive > 0); + client->interface->ntcpactive--; + UNLOCK(&client->interface->lock); + client->tcpactive = false; + } } if (client->tcpquota != NULL) { - isc_quota_detach(&client->tcpquota); - } else { /* - * We went over quota with this client, we don't - * want to restart listening unless this is the - * last client on this interface, which is - * checked later. + * If we are not in a pipeline group, or + * we are the last client in the group, detach from + * tcpquota; otherwise, transfer the quota to + * another client in the same group. */ - if (TCP_CLIENT(client)) { - client->mortal = true; + if (!ISC_LINK_LINKED(client, glink) || + (client->glink.next == NULL && + client->glink.prev == NULL)) + { + isc_quota_detach(&client->tcpquota); + } else if (client->glink.next != NULL) { + INSIST(client->glink.next->tcpquota == NULL); + client->glink.next->tcpquota = client->tcpquota; + client->tcpquota = NULL; + } else { + INSIST(client->glink.prev->tcpquota == NULL); + client->glink.prev->tcpquota = client->tcpquota; + client->tcpquota = NULL; } } + /* + * Unlink from pipeline group. + */ + if (ISC_LINK_LINKED(client, glink)) { + if (client->glink.next != NULL) { + client->glink.next->glink.prev = + client->glink.prev; + } + if (client->glink.prev != NULL) { + client->glink.prev->glink.next = + client->glink.next; + } + ISC_LINK_INIT(client, glink); + } + if (client->timerset) { (void)isc_timer_reset(client->timer, isc_timertype_inactive, @@ -475,17 +511,18 @@ exit_check(ns_client_t *client) { * that already. Check whether this client needs to remain * active and force it to go inactive if not. * - * UDP clients go inactive at this point, but TCP clients - * may remain active if we have fewer active TCP client - * objects than desired due to an earlier quota exhaustion. + * UDP clients go inactive at this point, but a TCP client + * will needs to remain active if no other clients are + * listening for TCP requests on this interface, to + * prevent this interface from going nonresponsive. */ if (client->mortal && TCP_CLIENT(client) && ((client->sctx->options & NS_SERVER_CLIENTTEST) == 0)) { LOCK(&client->interface->lock); - if (client->interface->ntcpcurrent < - client->interface->ntcptarget) + if (client->interface->ntcpaccepting == 0) { client->mortal = false; + } UNLOCK(&client->interface->lock); } @@ -494,15 +531,17 @@ exit_check(ns_client_t *client) { * queue for recycling. */ if (client->mortal) { - if (client->newstate > NS_CLIENTSTATE_INACTIVE) + if (client->newstate > NS_CLIENTSTATE_INACTIVE) { client->newstate = NS_CLIENTSTATE_INACTIVE; + } } if (NS_CLIENTSTATE_READY == client->newstate) { if (TCP_CLIENT(client)) { client_accept(client); - } else + } else { client_udprecv(client); + } client->newstate = NS_CLIENTSTATE_MAX; return (true); } @@ -514,41 +553,57 @@ exit_check(ns_client_t *client) { /* * We are trying to enter the inactive state. */ - if (client->naccepts > 0) + if (client->naccepts > 0) { isc_socket_cancel(client->tcplistener, client->task, ISC_SOCKCANCEL_ACCEPT); + } /* Still waiting for accept cancel completion. */ - if (! (client->naccepts == 0)) + if (! (client->naccepts == 0)) { return (true); + } /* Accept cancel is complete. */ - if (client->nrecvs > 0) + if (client->nrecvs > 0) { isc_socket_cancel(client->udpsocket, client->task, ISC_SOCKCANCEL_RECV); + } /* Still waiting for recv cancel completion. */ - if (! (client->nrecvs == 0)) + if (! (client->nrecvs == 0)) { return (true); + } /* Still waiting for control event to be delivered */ - if (client->nctls > 0) + if (client->nctls > 0) { return (true); - - /* Deactivate the client. */ - if (client->interface) - ns_interface_detach(&client->interface); + } INSIST(client->naccepts == 0); INSIST(client->recursionquota == NULL); - if (client->tcplistener != NULL) + if (client->tcplistener != NULL) { isc_socket_detach(&client->tcplistener); - if (client->udpsocket != NULL) + if (client->tcpactive) { + LOCK(&client->interface->lock); + INSIST(client->interface->ntcpactive > 0); + client->interface->ntcpactive--; + UNLOCK(&client->interface->lock); + client->tcpactive = false; + } + } + if (client->udpsocket != NULL) { isc_socket_detach(&client->udpsocket); + } - if (client->dispatch != NULL) + /* Deactivate the client. */ + if (client->interface != NULL) { + ns_interface_detach(&client->interface); + } + + if (client->dispatch != NULL) { dns_dispatch_detach(&client->dispatch); + } client->attributes = 0; client->mortal = false; @@ -579,8 +634,9 @@ exit_check(ns_client_t *client) { ISC_QUEUE_PUSH(manager->inactive, client, ilink); } - if (client->needshutdown) + if (client->needshutdown) { isc_task_shutdown(client->task); + } return (true); } } @@ -706,7 +762,6 @@ client_start(isc_task_t *task, isc_event_t *event) { } } - /*% * The client's task has received a shutdown event. */ @@ -2482,17 +2537,12 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { /* * Pipeline TCP query processing. */ - if (client->message->opcode != dns_opcode_query) + if (client->message->opcode != dns_opcode_query) { client->pipelined = false; + } if (TCP_CLIENT(client) && client->pipelined) { - result = isc_quota_reserve(&client->sctx->tcpquota); - if (result == ISC_R_SUCCESS) - result = ns_client_replace(client); + result = ns_client_replace(client); if (result != ISC_R_SUCCESS) { - ns_client_log(client, NS_LOGCATEGORY_CLIENT, - NS_LOGMODULE_CLIENT, ISC_LOG_WARNING, - "no more TCP clients(read): %s", - isc_result_totext(result)); client->pipelined = false; } } @@ -3053,6 +3103,7 @@ client_create(ns_clientmgr_t *manager, ns_client_t **clientp) { dns_ecs_init(&client->ecs); client->needshutdown = ((client->sctx->options & NS_SERVER_CLIENTTEST) != 0); + client->tcpactive = false; ISC_EVENT_INIT(&client->ctlevent, sizeof(client->ctlevent), 0, NULL, NS_EVENT_CLIENTCONTROL, client_start, client, client, @@ -3066,6 +3117,7 @@ client_create(ns_clientmgr_t *manager, ns_client_t **clientp) { client->formerrcache.id = 0; ISC_LINK_INIT(client, link); ISC_LINK_INIT(client, rlink); + ISC_LINK_INIT(client, glink); ISC_QLINK_INIT(client, ilink); client->keytag = NULL; client->keytag_len = 0; @@ -3162,12 +3214,19 @@ client_newconn(isc_task_t *task, isc_event_t *event) { INSIST(client->state == NS_CLIENTSTATE_READY); + /* + * The accept() was successful and we're now establishing a new + * connection. We need to make note of it in the client and + * interface objects so client objects can do the right thing + * when going inactive in exit_check() (see comments in + * client_accept() for details). + */ INSIST(client->naccepts == 1); client->naccepts--; LOCK(&client->interface->lock); - INSIST(client->interface->ntcpcurrent > 0); - client->interface->ntcpcurrent--; + INSIST(client->interface->ntcpaccepting > 0); + client->interface->ntcpaccepting--; UNLOCK(&client->interface->lock); /* @@ -3201,6 +3260,9 @@ client_newconn(isc_task_t *task, isc_event_t *event) { NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), "accept failed: %s", isc_result_totext(nevent->result)); + if (client->tcpquota != NULL) { + isc_quota_detach(&client->tcpquota); + } } if (exit_check(client)) @@ -3237,18 +3299,11 @@ client_newconn(isc_task_t *task, isc_event_t *event) { * deny service to legitimate TCP clients. */ client->pipelined = false; - result = isc_quota_attach(&client->sctx->tcpquota, - &client->tcpquota); - if (result == ISC_R_SUCCESS) - result = ns_client_replace(client); - if (result != ISC_R_SUCCESS) { - ns_client_log(client, NS_LOGCATEGORY_CLIENT, - NS_LOGMODULE_CLIENT, ISC_LOG_WARNING, - "no more TCP clients(accept): %s", - isc_result_totext(result)); - } else if (client->sctx->keepresporder == NULL || - !dns_acl_allowed(&netaddr, NULL, - client->sctx->keepresporder, env)) + result = ns_client_replace(client); + if (result == ISC_R_SUCCESS && + (client->sctx->keepresporder == NULL || + !dns_acl_allowed(&netaddr, NULL, + client->sctx->keepresporder, env))) { client->pipelined = true; } @@ -3266,12 +3321,80 @@ client_accept(ns_client_t *client) { CTRACE("accept"); + /* + * The tcpquota object can only be simultaneously referenced a + * pre-defined number of times; this is configured by 'tcp-clients' + * in named.conf. If we can't attach to it here, that means the TCP + * client quota has been exceeded. + */ + result = isc_quota_attach(&client->sctx->tcpquota, + &client->tcpquota); + if (result != ISC_R_SUCCESS) { + bool exit; + + ns_client_log(client, NS_LOGCATEGORY_CLIENT, + NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(1), + "no more TCP clients: %s", + isc_result_totext(result)); + + /* + * We have exceeded the system-wide TCP client + * quota. But, we can't just block this accept + * in all cases, because if we did, a heavy TCP + * load on other interfaces might cause this + * interface to be starved, with no clients able + * to accept new connections. + * + * So, we check here to see if any other client + * is already servicing TCP queries on this + * interface (whether accepting, reading, or + * processing). + * + * If so, then it's okay *not* to call + * accept - we can let this client to go inactive + * and the other one handle the next connection + * when it's ready. + * + * But if not, then we need to be a little bit + * flexible about the quota. We allow *one* extra + * TCP client through, to ensure we're listening on + * every interface. + * + * (Note: In practice this means that the *real* + * TCP client quota is tcp-clients plus the number + * of interfaces.) + */ + LOCK(&client->interface->lock); + exit = (client->interface->ntcpactive > 0); + UNLOCK(&client->interface->lock); + + if (exit) { + client->newstate = NS_CLIENTSTATE_INACTIVE; + (void)exit_check(client); + return; + } + } + + /* + * By incrementing the interface's ntcpactive counter we signal + * that there is at least one client servicing TCP queries for the + * interface. + * + * We also make note of the fact in the client itself with the + * tcpactive flag. This ensures proper accounting by preventing + * us from accidentally incrementing or decrementing ntcpactive + * more than once per client object. + */ + if (!client->tcpactive) { + LOCK(&client->interface->lock); + client->interface->ntcpactive++; + UNLOCK(&client->interface->lock); + client->tcpactive = true; + } + result = isc_socket_accept(client->tcplistener, client->task, client_newconn, client); if (result != ISC_R_SUCCESS) { - UNEXPECTED_ERROR(__FILE__, __LINE__, - "isc_socket_accept() failed: %s", - isc_result_totext(result)); /* * XXXRTH What should we do? We're trying to accept but * it didn't work. If we just give up, then TCP @@ -3279,12 +3402,39 @@ client_accept(ns_client_t *client) { * * For now, we just go idle. */ + UNEXPECTED_ERROR(__FILE__, __LINE__, + "isc_socket_accept() failed: %s", + isc_result_totext(result)); + if (client->tcpquota != NULL) { + isc_quota_detach(&client->tcpquota); + } return; } + + /* + * The client's 'naccepts' counter indicates that this client has + * called accept() and is waiting for a new connection. It should + * never exceed 1. + */ INSIST(client->naccepts == 0); client->naccepts++; + + /* + * The interface's 'ntcpaccepting' counter is incremented when + * any client calls accept(), and decremented in client_newconn() + * once the connection is established. + * + * When the client object is shutting down after handling a TCP + * request (see exit_check()), it looks to see whether this value is + * non-zero. If so, that means another client has already called + * accept() and is waiting to establish the next connection, which + * means the first client is free to go inactive. Otherwise, + * the first client must come back and call accept() again; this + * guarantees there will always be at least one client listening + * for new TCP connections on each interface. + */ LOCK(&client->interface->lock); - client->interface->ntcpcurrent++; + client->interface->ntcpaccepting++; UNLOCK(&client->interface->lock); } @@ -3358,13 +3508,14 @@ ns_client_replace(ns_client_t *client) { tcp = TCP_CLIENT(client); if (tcp && client->pipelined) { result = get_worker(client->manager, client->interface, - client->tcpsocket); + client->tcpsocket, client); } else { result = get_client(client->manager, client->interface, client->dispatch, tcp); } - if (result != ISC_R_SUCCESS) + if (result != ISC_R_SUCCESS) { return (result); + } /* * The responsibility for listening for new requests is hereby @@ -3561,6 +3712,7 @@ get_client(ns_clientmgr_t *manager, ns_interface_t *ifp, client->attributes |= NS_CLIENTATTR_TCP; isc_socket_attach(ifp->tcpsocket, &client->tcplistener); + } else { isc_socket_t *sock; @@ -3578,13 +3730,16 @@ get_client(ns_clientmgr_t *manager, ns_interface_t *ifp, } static isc_result_t -get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock) { +get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock, + ns_client_t *oldclient) +{ isc_result_t result = ISC_R_SUCCESS; isc_event_t *ev; ns_client_t *client; MTRACE("get worker"); REQUIRE(manager != NULL); + REQUIRE(oldclient != NULL); if (manager->exiting) return (ISC_R_SHUTTINGDOWN); @@ -3618,7 +3773,28 @@ get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock) { client->newstate = client->state = NS_CLIENTSTATE_WORKING; INSIST(client->recursionquota == NULL); client->sctx = manager->sctx; - client->tcpquota = &client->sctx->tcpquota; + + /* + * Transfer TCP quota to the new client. + */ + INSIST(client->tcpquota == NULL); + INSIST(oldclient->tcpquota != NULL); + client->tcpquota = oldclient->tcpquota; + oldclient->tcpquota = NULL; + + /* + * Link to a pipeline group, creating it if needed. + */ + if (!ISC_LINK_LINKED(oldclient, glink)) { + oldclient->glink.next = NULL; + oldclient->glink.prev = NULL; + } + client->glink.next = oldclient->glink.next; + client->glink.prev = oldclient; + if (oldclient->glink.next != NULL) { + oldclient->glink.next->glink.prev = client; + } + oldclient->glink.next = client; client->dscp = ifp->dscp; @@ -3633,6 +3809,12 @@ get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock) { (void)isc_socket_getpeername(client->tcpsocket, &client->peeraddr); client->peeraddr_valid = true; + LOCK(&client->interface->lock); + client->interface->ntcpactive++; + UNLOCK(&client->interface->lock); + + client->tcpactive = true; + INSIST(client->tcpmsg_valid == false); dns_tcpmsg_init(client->mctx, client->tcpsocket, &client->tcpmsg); client->tcpmsg_valid = true; diff --git a/lib/ns/include/ns/client.h b/lib/ns/include/ns/client.h index cd6c9c8a47..92d5349f7b 100644 --- a/lib/ns/include/ns/client.h +++ b/lib/ns/include/ns/client.h @@ -95,7 +95,8 @@ struct ns_client { int nupdates; int nctls; int references; - bool needshutdown; /* + bool tcpactive; + bool needshutdown; /* * Used by clienttest to get * the client to go from * inactive to free state @@ -130,10 +131,10 @@ struct ns_client { isc_time_t requesttime; isc_stdtime_t now; isc_time_t tnow; - dns_name_t signername; /*%< [T]SIG key name */ - dns_name_t *signer; /*%< NULL if not valid sig */ - bool mortal; /*%< Die after handling request */ - bool pipelined; /*%< TCP queries not in sequence */ + dns_name_t signername; /*%< [T]SIG key name */ + dns_name_t *signer; /*%< NULL if not valid sig */ + bool mortal; /*%< Die after handling request */ + bool pipelined; /*%< TCP queries not in sequence */ isc_quota_t *tcpquota; isc_quota_t *recursionquota; ns_interface_t *interface; @@ -143,7 +144,7 @@ struct ns_client { isc_netaddr_t destaddr; isc_sockaddr_t destsockaddr; - dns_ecs_t ecs; /*%< EDNS client subnet sent by client */ + dns_ecs_t ecs; /*%< EDNS client subnet sent by client */ struct in6_pktinfo pktinfo; isc_dscp_t dscp; @@ -165,6 +166,7 @@ struct ns_client { ISC_LINK(ns_client_t) link; ISC_LINK(ns_client_t) rlink; + ISC_LINK(ns_client_t) glink; ISC_QLINK(ns_client_t) ilink; unsigned char cookie[8]; uint32_t expire; diff --git a/lib/ns/include/ns/interfacemgr.h b/lib/ns/include/ns/interfacemgr.h index 54846fdc27..f579b7ec28 100644 --- a/lib/ns/include/ns/interfacemgr.h +++ b/lib/ns/include/ns/interfacemgr.h @@ -76,9 +76,14 @@ struct ns_interface { /*%< UDP dispatchers. */ isc_socket_t * tcpsocket; /*%< TCP socket. */ isc_dscp_t dscp; /*%< "listen-on" DSCP value */ - int ntcptarget; /*%< Desired number of concurrent - TCP accepts */ - int ntcpcurrent; /*%< Current ditto, locked */ + int ntcpaccepting; /*%< Number of clients + ready to accept new + TCP connections on this + interface */ + int ntcpactive; /*%< Number of clients + servicing TCP queries + (whether accepting or + connected) */ int nudpdispatch; /*%< Number of UDP dispatches */ ns_clientmgr_t * clientmgr; /*%< Client manager. */ ISC_LINK(ns_interface_t) link; diff --git a/lib/ns/interfacemgr.c b/lib/ns/interfacemgr.c index 865abe7d31..1b59b67190 100644 --- a/lib/ns/interfacemgr.c +++ b/lib/ns/interfacemgr.c @@ -425,8 +425,8 @@ ns_interface_create(ns_interfacemgr_t *mgr, isc_sockaddr_t *addr, * connections will be handled in parallel even though there is * only one client initially. */ - ifp->ntcptarget = 1; - ifp->ntcpcurrent = 0; + ifp->ntcpaccepting = 0; + ifp->ntcpactive = 0; ifp->nudpdispatch = 0; ifp->dscp = -1; @@ -561,9 +561,7 @@ ns_interface_accepttcp(ns_interface_t *ifp) { */ (void)isc_socket_filter(ifp->tcpsocket, "dataready"); - result = ns_clientmgr_createclients(ifp->clientmgr, - ifp->ntcptarget, ifp, - true); + result = ns_clientmgr_createclients(ifp->clientmgr, 1, ifp, true); if (result != ISC_R_SUCCESS) { UNEXPECTED_ERROR(__FILE__, __LINE__, "TCP ns_clientmgr_createclients(): %s", From 2211120222b5f008a96145474b7f6749d4307028 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= Date: Thu, 17 Jan 2019 15:53:38 +0100 Subject: [PATCH 3/7] use reference counter for pipeline groups (v3) Track pipeline groups using a shared reference counter instead of a linked list. (cherry picked from commit 31f392db20207a1b05d6286c3c56f76c8d69e574) --- lib/ns/client.c | 171 +++++++++++++++++++++++-------------- lib/ns/include/ns/client.h | 2 +- 2 files changed, 110 insertions(+), 63 deletions(-) diff --git a/lib/ns/client.c b/lib/ns/client.c index 4042030b49..e6e9376262 100644 --- a/lib/ns/client.c +++ b/lib/ns/client.c @@ -318,6 +318,75 @@ read_settimeout(ns_client_t *client, bool newconn) { } } +/*% + * Allocate a reference counter that will track the number of client structures + * using the TCP connection that 'client' called accept() for. This counter + * will be shared between all client structures associated with this TCP + * connection. + */ +static void +pipeline_init(ns_client_t *client) { + isc_refcount_t *refs; + + REQUIRE(client->pipeline_refs == NULL); + + /* + * A global memory context is used for the allocation as different + * client structures may have different memory contexts assigned and a + * reference counter allocated here might need to be freed by a + * different client. The performance impact caused by memory context + * contention here is expected to be negligible, given that this code + * is only executed for TCP connections. + */ + refs = isc_mem_allocate(client->sctx->mctx, sizeof(*refs)); + isc_refcount_init(refs, 1); + client->pipeline_refs = refs; +} + +/*% + * Increase the count of client structures using the TCP connection that + * 'source' is associated with and put a pointer to that count in 'target', + * thus associating it with the same TCP connection. + */ +static void +pipeline_attach(ns_client_t *source, ns_client_t *target) { + int old_refs; + + REQUIRE(source->pipeline_refs != NULL); + REQUIRE(target->pipeline_refs == NULL); + + old_refs = isc_refcount_increment(source->pipeline_refs); + INSIST(old_refs > 0); + target->pipeline_refs = source->pipeline_refs; +} + +/*% + * Decrease the count of client structures using the TCP connection that + * 'client' is associated with. If this is the last client using this TCP + * connection, free the reference counter and return true; otherwise, return + * false. + */ +static bool +pipeline_detach(ns_client_t *client) { + isc_refcount_t *refs; + int old_refs; + + REQUIRE(client->pipeline_refs != NULL); + + refs = client->pipeline_refs; + client->pipeline_refs = NULL; + + old_refs = isc_refcount_decrement(refs); + INSIST(old_refs > 0); + + if (old_refs == 1) { + isc_mem_free(client->sctx->mctx, refs); + return (true); + } + + return (false); +} + /*% * Check for a deactivation or shutdown request and take appropriate * action. Returns true if either is in progress; in this case @@ -440,6 +509,40 @@ exit_check(ns_client_t *client) { client->tcpmsg_valid = false; } + if (client->tcpquota != NULL) { + if (client->pipeline_refs == NULL || + pipeline_detach(client)) + { + /* + * Only detach from the TCP client quota if + * there are no more client structures using + * this TCP connection. + * + * Note that we check 'pipeline_refs' and not + * 'pipelined' because in some cases (e.g. + * after receiving a request with an opcode + * different than QUERY) 'pipelined' is set to + * false after the reference counter gets + * allocated in pipeline_init() and we must + * still drop our reference as failing to do so + * would prevent the reference counter itself + * from being freed. + */ + isc_quota_detach(&client->tcpquota); + } else { + /* + * There are other client structures using this + * TCP connection, so we cannot detach from the + * TCP client quota to prevent excess TCP + * connections from being accepted. However, + * this client structure might later be reused + * for accepting new connections and thus must + * have its 'tcpquota' field set to NULL. + */ + client->tcpquota = NULL; + } + } + if (client->tcpsocket != NULL) { CTRACE("closetcp"); isc_socket_detach(&client->tcpsocket); @@ -453,44 +556,6 @@ exit_check(ns_client_t *client) { } } - if (client->tcpquota != NULL) { - /* - * If we are not in a pipeline group, or - * we are the last client in the group, detach from - * tcpquota; otherwise, transfer the quota to - * another client in the same group. - */ - if (!ISC_LINK_LINKED(client, glink) || - (client->glink.next == NULL && - client->glink.prev == NULL)) - { - isc_quota_detach(&client->tcpquota); - } else if (client->glink.next != NULL) { - INSIST(client->glink.next->tcpquota == NULL); - client->glink.next->tcpquota = client->tcpquota; - client->tcpquota = NULL; - } else { - INSIST(client->glink.prev->tcpquota == NULL); - client->glink.prev->tcpquota = client->tcpquota; - client->tcpquota = NULL; - } - } - - /* - * Unlink from pipeline group. - */ - if (ISC_LINK_LINKED(client, glink)) { - if (client->glink.next != NULL) { - client->glink.next->glink.prev = - client->glink.prev; - } - if (client->glink.prev != NULL) { - client->glink.prev->glink.next = - client->glink.next; - } - ISC_LINK_INIT(client, glink); - } - if (client->timerset) { (void)isc_timer_reset(client->timer, isc_timertype_inactive, @@ -3096,6 +3161,7 @@ client_create(ns_clientmgr_t *manager, ns_client_t **clientp) { client->mortal = false; client->sendcb = NULL; client->pipelined = false; + client->pipeline_refs = NULL; client->tcpquota = NULL; client->recursionquota = NULL; client->interface = NULL; @@ -3117,7 +3183,6 @@ client_create(ns_clientmgr_t *manager, ns_client_t **clientp) { client->formerrcache.id = 0; ISC_LINK_INIT(client, link); ISC_LINK_INIT(client, rlink); - ISC_LINK_INIT(client, glink); ISC_QLINK_INIT(client, ilink); client->keytag = NULL; client->keytag_len = 0; @@ -3305,6 +3370,7 @@ client_newconn(isc_task_t *task, isc_event_t *event) { !dns_acl_allowed(&netaddr, NULL, client->sctx->keepresporder, env))) { + pipeline_init(client); client->pipelined = true; } @@ -3773,36 +3839,17 @@ get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock, client->newstate = client->state = NS_CLIENTSTATE_WORKING; INSIST(client->recursionquota == NULL); client->sctx = manager->sctx; - - /* - * Transfer TCP quota to the new client. - */ - INSIST(client->tcpquota == NULL); - INSIST(oldclient->tcpquota != NULL); - client->tcpquota = oldclient->tcpquota; - oldclient->tcpquota = NULL; - - /* - * Link to a pipeline group, creating it if needed. - */ - if (!ISC_LINK_LINKED(oldclient, glink)) { - oldclient->glink.next = NULL; - oldclient->glink.prev = NULL; - } - client->glink.next = oldclient->glink.next; - client->glink.prev = oldclient; - if (oldclient->glink.next != NULL) { - oldclient->glink.next->glink.prev = client; - } - oldclient->glink.next = client; + client->tcpquota = &client->sctx->tcpquota; client->dscp = ifp->dscp; client->attributes |= NS_CLIENTATTR_TCP; - client->pipelined = true; client->mortal = true; client->sendcb = NULL; + pipeline_attach(oldclient, client); + client->pipelined = true; + isc_socket_attach(ifp->tcpsocket, &client->tcplistener); isc_socket_attach(sock, &client->tcpsocket); isc_socket_setname(client->tcpsocket, "worker-tcp", NULL); diff --git a/lib/ns/include/ns/client.h b/lib/ns/include/ns/client.h index 92d5349f7b..e24d7b9dc0 100644 --- a/lib/ns/include/ns/client.h +++ b/lib/ns/include/ns/client.h @@ -135,6 +135,7 @@ struct ns_client { dns_name_t *signer; /*%< NULL if not valid sig */ bool mortal; /*%< Die after handling request */ bool pipelined; /*%< TCP queries not in sequence */ + isc_refcount_t *pipeline_refs; isc_quota_t *tcpquota; isc_quota_t *recursionquota; ns_interface_t *interface; @@ -166,7 +167,6 @@ struct ns_client { ISC_LINK(ns_client_t) link; ISC_LINK(ns_client_t) rlink; - ISC_LINK(ns_client_t) glink; ISC_QLINK(ns_client_t) ilink; unsigned char cookie[8]; uint32_t expire; From 08968412726d680777de6e596c836c6be07819a1 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Wed, 6 Feb 2019 11:26:36 -0800 Subject: [PATCH 4/7] better tcpquota accounting and client mortality checks - ensure that tcpactive is cleaned up correctly when accept() fails. - set 'client->tcpattached' when the client is attached to the tcpquota. carry this value on to new clients sharing the same pipeline group. don't call isc_quota_detach() on the tcpquota unless tcpattached is set. this way clients that were allowed to accept TCP connections despite being over quota (and therefore, were never attached to the quota) will not inadvertently detach from it and mess up the accounting. - simplify the code for tcpquota disconnection by using a new function tcpquota_disconnect(). - before deciding whether to reject a new connection due to quota exhaustion, check to see whether there are at least two active clients. previously, this was "at least one", but that could be insufficient if there was one other client in READING state (waiting for messages on an open connection) but none in READY (listening for new connections). - before deciding whether a TCP client object can to go inactive, we must ensure there are enough other clients to maintain service afterward -- both accepting new connections and reading/processing new queries. A TCP client can't shut down unless at least one client is accepting new connections and (in the case of pipelined clients) at least one additional client is waiting to read. (cherry picked from commit 427a2fb4d17bc04ca3262f58a9dcf5c93fc6d33e) --- lib/ns/client.c | 215 +++++++++++++++++++++++-------------- lib/ns/include/ns/client.h | 1 + 2 files changed, 138 insertions(+), 78 deletions(-) diff --git a/lib/ns/client.c b/lib/ns/client.c index e6e9376262..1917d3f43e 100644 --- a/lib/ns/client.c +++ b/lib/ns/client.c @@ -241,7 +241,8 @@ static void ns_client_endrequest(ns_client_t *client); static void client_start(isc_task_t *task, isc_event_t *event); static void ns_client_dumpmessage(ns_client_t *client, const char *reason); static isc_result_t get_client(ns_clientmgr_t *manager, ns_interface_t *ifp, - dns_dispatch_t *disp, bool tcp); + dns_dispatch_t *disp, ns_client_t *oldclient, + bool tcp); static isc_result_t get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock, ns_client_t *oldclient); static void compute_cookie(ns_client_t *client, uint32_t when, @@ -387,6 +388,33 @@ pipeline_detach(ns_client_t *client) { return (false); } +/* + * Detach a client from the TCP client quota if appropriate, and set + * the quota pointer to NULL. + * + * Sometimes when the TCP client quota is exhausted but there are no other + * clients servicing the interface, a client will be allowed to continue + * running despite not having been attached to the quota. In this event, + * the TCP quota was never attached to the client, so when the client (or + * associated pipeline group) shuts down, the quota must NOT be detached. + * + * Otherwise, if the quota pointer is set, it should be detached. If not + * set at all, we just return without doing anything. + */ +static void +tcpquota_disconnect(ns_client_t *client) { + if (client->tcpquota == NULL) { + return; + } + + if (client->tcpattached) { + isc_quota_detach(&client->tcpquota); + client->tcpattached = false; + } else { + client->tcpquota = NULL; + } +} + /*% * Check for a deactivation or shutdown request and take appropriate * action. Returns true if either is in progress; in this case @@ -509,38 +537,31 @@ exit_check(ns_client_t *client) { client->tcpmsg_valid = false; } - if (client->tcpquota != NULL) { - if (client->pipeline_refs == NULL || - pipeline_detach(client)) - { - /* - * Only detach from the TCP client quota if - * there are no more client structures using - * this TCP connection. - * - * Note that we check 'pipeline_refs' and not - * 'pipelined' because in some cases (e.g. - * after receiving a request with an opcode - * different than QUERY) 'pipelined' is set to - * false after the reference counter gets - * allocated in pipeline_init() and we must - * still drop our reference as failing to do so - * would prevent the reference counter itself - * from being freed. - */ - isc_quota_detach(&client->tcpquota); - } else { - /* - * There are other client structures using this - * TCP connection, so we cannot detach from the - * TCP client quota to prevent excess TCP - * connections from being accepted. However, - * this client structure might later be reused - * for accepting new connections and thus must - * have its 'tcpquota' field set to NULL. - */ - client->tcpquota = NULL; - } + /* + * Detach from pipeline group and from TCP client quota, + * if appropriate. + * + * - If no pipeline group is active, attempt to + * detach from the TCP client quota. + * + * - If a pipeline group is active, detach from it; + * if the return code indicates that there no more + * clients left if this pipeline group, we also detach + * from the TCP client quota. + * + * - Otherwise we don't try to detach, we just set the + * TCP quota pointer to NULL if it wasn't NULL already. + * + * tcpquota_disconnect() will set tcpquota to NULL, either + * by detaching it or by assignment, depending on the + * needs of the client. See the comments on that function + * for further information. + */ + if (client->pipeline_refs == NULL || pipeline_detach(client)) { + tcpquota_disconnect(client); + } else { + client->tcpquota = NULL; + client->tcpattached = false; } if (client->tcpsocket != NULL) { @@ -563,8 +584,6 @@ exit_check(ns_client_t *client) { client->timerset = false; } - client->pipelined = false; - client->peeraddr_valid = false; client->state = NS_CLIENTSTATE_READY; @@ -577,20 +596,29 @@ exit_check(ns_client_t *client) { * active and force it to go inactive if not. * * UDP clients go inactive at this point, but a TCP client - * will needs to remain active if no other clients are - * listening for TCP requests on this interface, to - * prevent this interface from going nonresponsive. + * may need to remain active and go into ready state if + * no other clients are available to listen for TCP + * requests on this interface or (in the case of pipelined + * clients) to read for additional messages on the current + * connection. */ if (client->mortal && TCP_CLIENT(client) && ((client->sctx->options & NS_SERVER_CLIENTTEST) == 0)) { LOCK(&client->interface->lock); - if (client->interface->ntcpaccepting == 0) { + if ((client->interface->ntcpaccepting == 0 || + (client->pipelined && + client->interface->ntcpactive < 2)) && + client->newstate != NS_CLIENTSTATE_FREED) + { client->mortal = false; + client->newstate = NS_CLIENTSTATE_READY; } UNLOCK(&client->interface->lock); } + client->pipelined = false; + /* * We don't need the client; send it to the inactive * queue for recycling. @@ -2606,6 +2634,18 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { client->pipelined = false; } if (TCP_CLIENT(client) && client->pipelined) { + /* + * We're pipelining. Replace the client; the + * the replacement can read the TCP socket looking + * for new messages and this client can process the + * current message asynchronously. + * + * There are now at least three clients using this + * TCP socket - one accepting new connections, + * one reading an existing connection to get new + * messages, and one answering the message already + * received. + */ result = ns_client_replace(client); if (result != ISC_R_SUCCESS) { client->pipelined = false; @@ -3163,6 +3203,7 @@ client_create(ns_clientmgr_t *manager, ns_client_t **clientp) { client->pipelined = false; client->pipeline_refs = NULL; client->tcpquota = NULL; + client->tcpattached = false; client->recursionquota = NULL; client->interface = NULL; client->peeraddr_valid = false; @@ -3325,9 +3366,7 @@ client_newconn(isc_task_t *task, isc_event_t *event) { NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), "accept failed: %s", isc_result_totext(nevent->result)); - if (client->tcpquota != NULL) { - isc_quota_detach(&client->tcpquota); - } + tcpquota_disconnect(client); } if (exit_check(client)) @@ -3411,27 +3450,27 @@ client_accept(ns_client_t *client) { * interface to be starved, with no clients able * to accept new connections. * - * So, we check here to see if any other client - * is already servicing TCP queries on this + * So, we check here to see if any other clients + * are already servicing TCP queries on this * interface (whether accepting, reading, or - * processing). + * processing). If there are at least two + * (one reading and one processing a request) + * then it's okay *not* to call accept - we + * can let this client go inactive and another + * one will resume accepting when it's done. * - * If so, then it's okay *not* to call - * accept - we can let this client to go inactive - * and the other one handle the next connection - * when it's ready. + * If there aren't enough active clients on the + * interface, then we can be a little bit + * flexible about the quota. We'll allow *one* + * extra client through to ensure we're listening + * on every interface. * - * But if not, then we need to be a little bit - * flexible about the quota. We allow *one* extra - * TCP client through, to ensure we're listening on - * every interface. - * - * (Note: In practice this means that the *real* - * TCP client quota is tcp-clients plus the number - * of interfaces.) + * (Note: In practice this means that the real + * TCP client quota is tcp-clients plus the + * number of listening interfaces plus 2.) */ LOCK(&client->interface->lock); - exit = (client->interface->ntcpactive > 0); + exit = (client->interface->ntcpactive > 1); UNLOCK(&client->interface->lock); if (exit) { @@ -3439,6 +3478,9 @@ client_accept(ns_client_t *client) { (void)exit_check(client); return; } + + } else { + client->tcpattached = true; } /* @@ -3471,9 +3513,16 @@ client_accept(ns_client_t *client) { UNEXPECTED_ERROR(__FILE__, __LINE__, "isc_socket_accept() failed: %s", isc_result_totext(result)); - if (client->tcpquota != NULL) { - isc_quota_detach(&client->tcpquota); + + tcpquota_disconnect(client); + + if (client->tcpactive) { + LOCK(&client->interface->lock); + client->interface->ntcpactive--; + UNLOCK(&client->interface->lock); + client->tcpactive = false; } + return; } @@ -3491,13 +3540,12 @@ client_accept(ns_client_t *client) { * once the connection is established. * * When the client object is shutting down after handling a TCP - * request (see exit_check()), it looks to see whether this value is - * non-zero. If so, that means another client has already called - * accept() and is waiting to establish the next connection, which - * means the first client is free to go inactive. Otherwise, - * the first client must come back and call accept() again; this - * guarantees there will always be at least one client listening - * for new TCP connections on each interface. + * request (see exit_check()), if this value is at least one, that + * means another client has called accept() and is waiting to + * establish the next connection. That means the client may be + * be free to become inactive; otherwise it may need to start + * listening for connections itself to prevent the interface + * going dead. */ LOCK(&client->interface->lock); client->interface->ntcpaccepting++; @@ -3577,19 +3625,19 @@ ns_client_replace(ns_client_t *client) { client->tcpsocket, client); } else { result = get_client(client->manager, client->interface, - client->dispatch, tcp); + client->dispatch, client, tcp); + + /* + * The responsibility for listening for new requests is hereby + * transferred to the new client. Therefore, the old client + * should refrain from listening for any more requests. + */ + client->mortal = true; } if (result != ISC_R_SUCCESS) { return (result); } - /* - * The responsibility for listening for new requests is hereby - * transferred to the new client. Therefore, the old client - * should refrain from listening for any more requests. - */ - client->mortal = true; - return (ISC_R_SUCCESS); } @@ -3730,7 +3778,7 @@ ns_clientmgr_destroy(ns_clientmgr_t **managerp) { static isc_result_t get_client(ns_clientmgr_t *manager, ns_interface_t *ifp, - dns_dispatch_t *disp, bool tcp) + dns_dispatch_t *disp, ns_client_t *oldclient, bool tcp) { isc_result_t result = ISC_R_SUCCESS; isc_event_t *ev; @@ -3775,6 +3823,16 @@ get_client(ns_clientmgr_t *manager, ns_interface_t *ifp, client->dscp = ifp->dscp; if (tcp) { + client->tcpattached = false; + if (oldclient != NULL) { + client->tcpattached = oldclient->tcpattached; + } + + LOCK(&client->interface->lock); + client->interface->ntcpactive++; + UNLOCK(&client->interface->lock); + client->tcpactive = true; + client->attributes |= NS_CLIENTATTR_TCP; isc_socket_attach(ifp->tcpsocket, &client->tcplistener); @@ -3840,6 +3898,7 @@ get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock, INSIST(client->recursionquota == NULL); client->sctx = manager->sctx; client->tcpquota = &client->sctx->tcpquota; + client->tcpattached = oldclient->tcpattached; client->dscp = ifp->dscp; @@ -3859,7 +3918,6 @@ get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock, LOCK(&client->interface->lock); client->interface->ntcpactive++; UNLOCK(&client->interface->lock); - client->tcpactive = true; INSIST(client->tcpmsg_valid == false); @@ -3936,7 +3994,8 @@ ns_clientmgr_createclients(ns_clientmgr_t *manager, unsigned int n, MTRACE("createclients"); for (disp = 0; disp < n; disp++) { - result = get_client(manager, ifp, ifp->udpdispatch[disp], tcp); + result = get_client(manager, ifp, ifp->udpdispatch[disp], + NULL, tcp); if (result != ISC_R_SUCCESS) break; } diff --git a/lib/ns/include/ns/client.h b/lib/ns/include/ns/client.h index e24d7b9dc0..2e30c1a7ad 100644 --- a/lib/ns/include/ns/client.h +++ b/lib/ns/include/ns/client.h @@ -137,6 +137,7 @@ struct ns_client { bool pipelined; /*%< TCP queries not in sequence */ isc_refcount_t *pipeline_refs; isc_quota_t *tcpquota; + bool tcpattached; isc_quota_t *recursionquota; ns_interface_t *interface; From 4a8fc979c49104534cf6be5d81dc54da5b6836c9 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Wed, 6 Feb 2019 11:27:11 -0800 Subject: [PATCH 5/7] refactor tcpquota and pipeline refs; allow special-case overrun in isc_quota - if the TCP quota has been exceeded but there are no clients listening for new connections on the interface, we can now force attachment to the quota using isc_quota_force(), instead of carrying on with the quota not attached. - the TCP client quota is now referenced via a reference-counted 'ns_tcpconn' object, one of which is created whenever a client begins listening for new connections, and attached to by members of that client's pipeline group. when the last reference to the tcpconn object is detached, it is freed and the TCP quota slot is released. - reduce code duplication by adding mark_tcp_active() function - convert counters to stdatomic (cherry picked from commit a8dd133d270873b736c1be9bf50ebaa074f5b38f) --- lib/isc/include/isc/quota.h | 7 + lib/isc/quota.c | 30 ++- lib/isc/win32/libisc.def.in | 1 + lib/ns/client.c | 445 +++++++++++++------------------ lib/ns/include/ns/client.h | 12 +- lib/ns/include/ns/interfacemgr.h | 4 +- lib/ns/interfacemgr.c | 5 +- 7 files changed, 235 insertions(+), 269 deletions(-) diff --git a/lib/isc/include/isc/quota.h b/lib/isc/include/isc/quota.h index 8e593ffb0e..16f6181cda 100644 --- a/lib/isc/include/isc/quota.h +++ b/lib/isc/include/isc/quota.h @@ -115,6 +115,13 @@ isc_quota_attach(isc_quota_t *quota, isc_quota_t **p); * quota if successful (ISC_R_SUCCESS or ISC_R_SOFTQUOTA). */ +isc_result_t +isc_quota_force(isc_quota_t *quota, isc_quota_t **p); +/*%< + * Like isc_quota_attach, but will attach '*p' to the quota + * even if the hard quota has been exceeded. + */ + void isc_quota_detach(isc_quota_t **p); /*%< diff --git a/lib/isc/quota.c b/lib/isc/quota.c index 1e25b402d4..cf63e05f52 100644 --- a/lib/isc/quota.c +++ b/lib/isc/quota.c @@ -85,20 +85,36 @@ isc_quota_release(isc_quota_t *quota) { INSIST(atomic_fetch_sub("a->used, 1) > 0); } -isc_result_t -isc_quota_attach(isc_quota_t *quota, isc_quota_t **p) -{ +static isc_result_t +doattach(isc_quota_t *quota, isc_quota_t **p, bool force) { isc_result_t result; - INSIST(p != NULL && *p == NULL); + REQUIRE(p != NULL && *p == NULL); + result = isc_quota_reserve(quota); - if (result == ISC_R_SUCCESS || result == ISC_R_SOFTQUOTA) + if (result == ISC_R_SUCCESS || result == ISC_R_SOFTQUOTA) { *p = quota; + } else if (result == ISC_R_QUOTA && force) { + /* attach anyway */ + atomic_fetch_add("a->used, 1); + *p = quota; + result = ISC_R_SUCCESS; + } + return (result); } +isc_result_t +isc_quota_attach(isc_quota_t *quota, isc_quota_t **p) { + return (doattach(quota, p, false)); +} + +isc_result_t +isc_quota_force(isc_quota_t *quota, isc_quota_t **p) { + return (doattach(quota, p, true)); +} + void -isc_quota_detach(isc_quota_t **p) -{ +isc_quota_detach(isc_quota_t **p) { INSIST(p != NULL && *p != NULL); isc_quota_release(*p); *p = NULL; diff --git a/lib/isc/win32/libisc.def.in b/lib/isc/win32/libisc.def.in index 4cec81fca1..4b66b3c9c1 100644 --- a/lib/isc/win32/libisc.def.in +++ b/lib/isc/win32/libisc.def.in @@ -449,6 +449,7 @@ isc_portset_removerange isc_quota_attach isc_quota_destroy isc_quota_detach +isc_quota_force isc_quota_getmax isc_quota_getsoft isc_quota_getused diff --git a/lib/ns/client.c b/lib/ns/client.c index 1917d3f43e..8dd3018182 100644 --- a/lib/ns/client.c +++ b/lib/ns/client.c @@ -241,8 +241,7 @@ static void ns_client_endrequest(ns_client_t *client); static void client_start(isc_task_t *task, isc_event_t *event); static void ns_client_dumpmessage(ns_client_t *client, const char *reason); static isc_result_t get_client(ns_clientmgr_t *manager, ns_interface_t *ifp, - dns_dispatch_t *disp, ns_client_t *oldclient, - bool tcp); + dns_dispatch_t *disp, bool tcp); static isc_result_t get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock, ns_client_t *oldclient); static void compute_cookie(ns_client_t *client, uint32_t when, @@ -320,16 +319,32 @@ read_settimeout(ns_client_t *client, bool newconn) { } /*% - * Allocate a reference counter that will track the number of client structures - * using the TCP connection that 'client' called accept() for. This counter - * will be shared between all client structures associated with this TCP - * connection. + * Allocate a reference-counted object that will maintain a single pointer to + * the (also reference-counted) TCP client quota, shared between all the + * clients processing queries on a single TCP connection, so that all + * clients sharing the one socket will together consume only one slot in + * the 'tcp-clients' quota. */ -static void -pipeline_init(ns_client_t *client) { - isc_refcount_t *refs; +static isc_result_t +tcpconn_init(ns_client_t *client, bool force) { + isc_result_t result; + isc_quota_t *quota = NULL; + ns_tcpconn_t *tconn = NULL; - REQUIRE(client->pipeline_refs == NULL); + REQUIRE(client->tcpconn == NULL); + + /* + * Try to attach to the quota first, so we won't pointlessly + * allocate memory for a tcpconn object if we can't get one. + */ + if (force) { + result = isc_quota_force(&client->sctx->tcpquota, "a); + } else { + result = isc_quota_attach(&client->sctx->tcpquota, "a); + } + if (result != ISC_R_SUCCESS) { + return (result); + } /* * A global memory context is used for the allocation as different @@ -339,79 +354,82 @@ pipeline_init(ns_client_t *client) { * contention here is expected to be negligible, given that this code * is only executed for TCP connections. */ - refs = isc_mem_allocate(client->sctx->mctx, sizeof(*refs)); - isc_refcount_init(refs, 1); - client->pipeline_refs = refs; + tconn = isc_mem_allocate(client->sctx->mctx, sizeof(*tconn)); + + isc_refcount_init(&tconn->refs, 1); + tconn->tcpquota = quota; + quota = NULL; + tconn->pipelined = false; + + client->tcpconn = tconn; + + return (ISC_R_SUCCESS); } /*% - * Increase the count of client structures using the TCP connection that - * 'source' is associated with and put a pointer to that count in 'target', - * thus associating it with the same TCP connection. + * Increase the count of client structures sharing the TCP connection + * that 'source' is associated with; add a pointer to the same tcpconn + * to 'target', thus associating it with the same TCP connection. */ static void -pipeline_attach(ns_client_t *source, ns_client_t *target) { +tcpconn_attach(ns_client_t *source, ns_client_t *target) { int old_refs; - REQUIRE(source->pipeline_refs != NULL); - REQUIRE(target->pipeline_refs == NULL); + REQUIRE(source->tcpconn != NULL); + REQUIRE(target->tcpconn == NULL); + REQUIRE(source->tcpconn->pipelined); - old_refs = isc_refcount_increment(source->pipeline_refs); + old_refs = isc_refcount_increment(&source->tcpconn->refs); INSIST(old_refs > 0); - target->pipeline_refs = source->pipeline_refs; + target->tcpconn = source->tcpconn; } /*% - * Decrease the count of client structures using the TCP connection that + * Decrease the count of client structures sharing the TCP connection that * 'client' is associated with. If this is the last client using this TCP - * connection, free the reference counter and return true; otherwise, return - * false. + * connection, we detach from the TCP quota and free the tcpconn + * object. Either way, client->tcpconn is set to NULL. */ -static bool -pipeline_detach(ns_client_t *client) { - isc_refcount_t *refs; +static void +tcpconn_detach(ns_client_t *client) { + ns_tcpconn_t *tconn = NULL; int old_refs; - REQUIRE(client->pipeline_refs != NULL); + REQUIRE(client->tcpconn != NULL); - refs = client->pipeline_refs; - client->pipeline_refs = NULL; + tconn = client->tcpconn; + client->tcpconn = NULL; - old_refs = isc_refcount_decrement(refs); + old_refs = isc_refcount_decrement(&tconn->refs); INSIST(old_refs > 0); if (old_refs == 1) { - isc_mem_free(client->sctx->mctx, refs); - return (true); + isc_quota_detach(&tconn->tcpquota); + isc_mem_free(client->sctx->mctx, tconn); } - - return (false); } -/* - * Detach a client from the TCP client quota if appropriate, and set - * the quota pointer to NULL. +/*% + * Mark a client as active and increment the interface's 'ntcpactive' + * counter, as a signal that there is at least one client servicing + * TCP queries for the interface. If we reach the TCP client quota at + * some point, this will be used to determine whether a quota overrun + * should be permitted. * - * Sometimes when the TCP client quota is exhausted but there are no other - * clients servicing the interface, a client will be allowed to continue - * running despite not having been attached to the quota. In this event, - * the TCP quota was never attached to the client, so when the client (or - * associated pipeline group) shuts down, the quota must NOT be detached. - * - * Otherwise, if the quota pointer is set, it should be detached. If not - * set at all, we just return without doing anything. + * Marking the client active with the 'tcpactive' flag ensures proper + * accounting, by preventing us from incrementing or decrementing + * 'ntcpactive' more than once per client. */ static void -tcpquota_disconnect(ns_client_t *client) { - if (client->tcpquota == NULL) { - return; - } - - if (client->tcpattached) { - isc_quota_detach(&client->tcpquota); - client->tcpattached = false; - } else { - client->tcpquota = NULL; +mark_tcp_active(ns_client_t *client, bool active) { + if (active && !client->tcpactive) { + atomic_fetch_add(&client->interface->ntcpactive, 1); + client->tcpactive = active; + } else if (!active && client->tcpactive) { + uint32_t old = + atomic_fetch_sub(&client->interface->ntcpactive, 1); + INSIST(old > 0); + client->tcpactive = active; } } @@ -504,7 +522,8 @@ exit_check(ns_client_t *client) { INSIST(client->recursionquota == NULL); if (NS_CLIENTSTATE_READING == client->newstate) { - if (!client->pipelined) { + INSIST(client->tcpconn != NULL); + if (!client->tcpconn->pipelined) { client_read(client, false); client->newstate = NS_CLIENTSTATE_MAX; return (true); /* We're done. */ @@ -527,8 +546,8 @@ exit_check(ns_client_t *client) { dns_tcpmsg_cancelread(&client->tcpmsg); } - if (client->nreads != 0) { - /* Still waiting for read cancel completion. */ + /* Still waiting for read cancel completion. */ + if (client->nreads > 0) { return (true); } @@ -538,43 +557,45 @@ exit_check(ns_client_t *client) { } /* - * Detach from pipeline group and from TCP client quota, - * if appropriate. + * Soon the client will be ready to accept a new TCP + * connection or UDP request, but we may have enough + * clients doing that already. Check whether this client + * needs to remain active and allow it go inactive if + * not. * - * - If no pipeline group is active, attempt to - * detach from the TCP client quota. + * UDP clients always go inactive at this point, but a TCP + * client may need to stay active and return to READY + * state if no other clients are available to listen + * for TCP requests on this interface. * - * - If a pipeline group is active, detach from it; - * if the return code indicates that there no more - * clients left if this pipeline group, we also detach - * from the TCP client quota. - * - * - Otherwise we don't try to detach, we just set the - * TCP quota pointer to NULL if it wasn't NULL already. - * - * tcpquota_disconnect() will set tcpquota to NULL, either - * by detaching it or by assignment, depending on the - * needs of the client. See the comments on that function - * for further information. + * Regardless, if we're going to FREED state, that means + * the system is shutting down and we don't need to + * retain clients. */ - if (client->pipeline_refs == NULL || pipeline_detach(client)) { - tcpquota_disconnect(client); - } else { - client->tcpquota = NULL; - client->tcpattached = false; + if (client->mortal && TCP_CLIENT(client) && + client->newstate != NS_CLIENTSTATE_FREED && + (client->sctx->options & NS_SERVER_CLIENTTEST) == 0 && + atomic_load(&client->interface->ntcpaccepting) == 0) + { + /* Nobody else is accepting */ + client->mortal = false; + client->newstate = NS_CLIENTSTATE_READY; + } + + /* + * Detach from TCP connection and TCP client quota, + * if appropriate. If this is the last reference to + * the TCP connection in our pipeline group, the + * TCP quota slot will be released. + */ + if (client->tcpconn) { + tcpconn_detach(client); } if (client->tcpsocket != NULL) { CTRACE("closetcp"); isc_socket_detach(&client->tcpsocket); - - if (client->tcpactive) { - LOCK(&client->interface->lock); - INSIST(client->interface->ntcpactive > 0); - client->interface->ntcpactive--; - UNLOCK(&client->interface->lock); - client->tcpactive = false; - } + mark_tcp_active(client, false); } if (client->timerset) { @@ -587,37 +608,6 @@ exit_check(ns_client_t *client) { client->peeraddr_valid = false; client->state = NS_CLIENTSTATE_READY; - INSIST(client->recursionquota == NULL); - - /* - * Now the client is ready to accept a new TCP connection - * or UDP request, but we may have enough clients doing - * that already. Check whether this client needs to remain - * active and force it to go inactive if not. - * - * UDP clients go inactive at this point, but a TCP client - * may need to remain active and go into ready state if - * no other clients are available to listen for TCP - * requests on this interface or (in the case of pipelined - * clients) to read for additional messages on the current - * connection. - */ - if (client->mortal && TCP_CLIENT(client) && - ((client->sctx->options & NS_SERVER_CLIENTTEST) == 0)) - { - LOCK(&client->interface->lock); - if ((client->interface->ntcpaccepting == 0 || - (client->pipelined && - client->interface->ntcpactive < 2)) && - client->newstate != NS_CLIENTSTATE_FREED) - { - client->mortal = false; - client->newstate = NS_CLIENTSTATE_READY; - } - UNLOCK(&client->interface->lock); - } - - client->pipelined = false; /* * We don't need the client; send it to the inactive @@ -652,7 +642,7 @@ exit_check(ns_client_t *client) { } /* Still waiting for accept cancel completion. */ - if (! (client->naccepts == 0)) { + if (client->naccepts > 0) { return (true); } @@ -663,7 +653,7 @@ exit_check(ns_client_t *client) { } /* Still waiting for recv cancel completion. */ - if (! (client->nrecvs == 0)) { + if (client->nrecvs > 0) { return (true); } @@ -676,14 +666,7 @@ exit_check(ns_client_t *client) { INSIST(client->recursionquota == NULL); if (client->tcplistener != NULL) { isc_socket_detach(&client->tcplistener); - - if (client->tcpactive) { - LOCK(&client->interface->lock); - INSIST(client->interface->ntcpactive > 0); - client->interface->ntcpactive--; - UNLOCK(&client->interface->lock); - client->tcpactive = false; - } + mark_tcp_active(client, false); } if (client->udpsocket != NULL) { isc_socket_detach(&client->udpsocket); @@ -845,7 +828,7 @@ client_start(isc_task_t *task, isc_event_t *event) { return; if (TCP_CLIENT(client)) { - if (client->pipelined) { + if (client->tcpconn != NULL) { client_read(client, false); } else { client_accept(client); @@ -2442,6 +2425,7 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { client->nrecvs--; } else { INSIST(TCP_CLIENT(client)); + INSIST(client->tcpconn != NULL); REQUIRE(event->ev_type == DNS_EVENT_TCPMSG); REQUIRE(event->ev_sender == &client->tcpmsg); buffer = &client->tcpmsg.buffer; @@ -2630,17 +2614,19 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { /* * Pipeline TCP query processing. */ - if (client->message->opcode != dns_opcode_query) { - client->pipelined = false; + if (TCP_CLIENT(client) && + client->message->opcode != dns_opcode_query) + { + client->tcpconn->pipelined = false; } - if (TCP_CLIENT(client) && client->pipelined) { + if (TCP_CLIENT(client) && client->tcpconn->pipelined) { /* * We're pipelining. Replace the client; the - * the replacement can read the TCP socket looking - * for new messages and this client can process the + * replacement can read the TCP socket looking + * for new messages and this one can process the * current message asynchronously. * - * There are now at least three clients using this + * There will now be at least three clients using this * TCP socket - one accepting new connections, * one reading an existing connection to get new * messages, and one answering the message already @@ -2648,7 +2634,7 @@ ns__client_request(isc_task_t *task, isc_event_t *event) { */ result = ns_client_replace(client); if (result != ISC_R_SUCCESS) { - client->pipelined = false; + client->tcpconn->pipelined = false; } } @@ -3200,10 +3186,7 @@ client_create(ns_clientmgr_t *manager, ns_client_t **clientp) { dns_name_init(&client->signername, NULL); client->mortal = false; client->sendcb = NULL; - client->pipelined = false; - client->pipeline_refs = NULL; - client->tcpquota = NULL; - client->tcpattached = false; + client->tcpconn = NULL; client->recursionquota = NULL; client->interface = NULL; client->peeraddr_valid = false; @@ -3307,10 +3290,11 @@ client_read(ns_client_t *client, bool newconn) { static void client_newconn(isc_task_t *task, isc_event_t *event) { + isc_result_t result; ns_client_t *client = event->ev_arg; isc_socket_newconnev_t *nevent = (isc_socket_newconnev_t *)event; dns_aclenv_t *env = ns_interfacemgr_getaclenv(client->interface->mgr); - isc_result_t result; + uint32_t old; REQUIRE(event->ev_type == ISC_SOCKEVENT_NEWCONN); REQUIRE(NS_CLIENT_VALID(client)); @@ -3330,10 +3314,8 @@ client_newconn(isc_task_t *task, isc_event_t *event) { INSIST(client->naccepts == 1); client->naccepts--; - LOCK(&client->interface->lock); - INSIST(client->interface->ntcpaccepting > 0); - client->interface->ntcpaccepting--; - UNLOCK(&client->interface->lock); + old = atomic_fetch_sub(&client->interface->ntcpaccepting, 1); + INSIST(old > 0); /* * We must take ownership of the new socket before the exit @@ -3366,7 +3348,7 @@ client_newconn(isc_task_t *task, isc_event_t *event) { NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), "accept failed: %s", isc_result_totext(nevent->result)); - tcpquota_disconnect(client); + tcpconn_detach(client); } if (exit_check(client)) @@ -3402,15 +3384,13 @@ client_newconn(isc_task_t *task, isc_event_t *event) { * telnetting to port 53 (once per CPU) will * deny service to legitimate TCP clients. */ - client->pipelined = false; result = ns_client_replace(client); if (result == ISC_R_SUCCESS && (client->sctx->keepresporder == NULL || !dns_acl_allowed(&netaddr, NULL, client->sctx->keepresporder, env))) { - pipeline_init(client); - client->pipelined = true; + client->tcpconn->pipelined = true; } client_read(client, true); @@ -3427,78 +3407,59 @@ client_accept(ns_client_t *client) { CTRACE("accept"); /* - * The tcpquota object can only be simultaneously referenced a - * pre-defined number of times; this is configured by 'tcp-clients' - * in named.conf. If we can't attach to it here, that means the TCP - * client quota has been exceeded. + * Set up a new TCP connection. This means try to attach to the + * TCP client quota (tcp-clients), but fail if we're over quota. */ - result = isc_quota_attach(&client->sctx->tcpquota, - &client->tcpquota); + result = tcpconn_init(client, false); if (result != ISC_R_SUCCESS) { - bool exit; + bool exit; - ns_client_log(client, NS_LOGCATEGORY_CLIENT, - NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(1), - "no more TCP clients: %s", - isc_result_totext(result)); + ns_client_log(client, NS_LOGCATEGORY_CLIENT, + NS_LOGMODULE_CLIENT, ISC_LOG_WARNING, + "TCP client quota reached: %s", + isc_result_totext(result)); - /* - * We have exceeded the system-wide TCP client - * quota. But, we can't just block this accept - * in all cases, because if we did, a heavy TCP - * load on other interfaces might cause this - * interface to be starved, with no clients able - * to accept new connections. - * - * So, we check here to see if any other clients - * are already servicing TCP queries on this - * interface (whether accepting, reading, or - * processing). If there are at least two - * (one reading and one processing a request) - * then it's okay *not* to call accept - we - * can let this client go inactive and another - * one will resume accepting when it's done. - * - * If there aren't enough active clients on the - * interface, then we can be a little bit - * flexible about the quota. We'll allow *one* - * extra client through to ensure we're listening - * on every interface. - * - * (Note: In practice this means that the real - * TCP client quota is tcp-clients plus the - * number of listening interfaces plus 2.) - */ - LOCK(&client->interface->lock); - exit = (client->interface->ntcpactive > 1); - UNLOCK(&client->interface->lock); + /* + * We have exceeded the system-wide TCP client quota. But, + * we can't just block this accept in all cases, because if + * we did, a heavy TCP load on other interfaces might cause + * this interface to be starved, with no clients able to + * accept new connections. + * + * So, we check here to see if any other clients are + * already servicing TCP queries on this interface (whether + * accepting, reading, or processing). If we find at least + * one, then it's okay *not* to call accept - we can let this + * client go inactive and another will take over when it's + * done. + * + * If there aren't enough active clients on the interface, + * then we can be a little bit flexible about the quota. + * We'll allow *one* extra client through to ensure we're + * listening on every interface; we do this by setting the + * 'force' option to tcpconn_init(). + * + * (Note: In practice this means that the real TCP client + * quota is tcp-clients plus the number of listening + * interfaces plus 1.) + */ + exit = (atomic_load(&client->interface->ntcpactive) > 0U); + if (exit) { + client->newstate = NS_CLIENTSTATE_INACTIVE; + (void)exit_check(client); + return; + } - if (exit) { - client->newstate = NS_CLIENTSTATE_INACTIVE; - (void)exit_check(client); - return; - } - - } else { - client->tcpattached = true; + result = tcpconn_init(client, true); + RUNTIME_CHECK(result == ISC_R_SUCCESS); } /* - * By incrementing the interface's ntcpactive counter we signal - * that there is at least one client servicing TCP queries for the - * interface. - * - * We also make note of the fact in the client itself with the - * tcpactive flag. This ensures proper accounting by preventing - * us from accidentally incrementing or decrementing ntcpactive - * more than once per client object. + * If this client was set up using get_client() or get_worker(), + * then TCP is already marked active. However, if it was restarted + * from exit_check(), it might not be, so we take care of it now. */ - if (!client->tcpactive) { - LOCK(&client->interface->lock); - client->interface->ntcpactive++; - UNLOCK(&client->interface->lock); - client->tcpactive = true; - } + mark_tcp_active(client, true); result = isc_socket_accept(client->tcplistener, client->task, client_newconn, client); @@ -3514,15 +3475,8 @@ client_accept(ns_client_t *client) { "isc_socket_accept() failed: %s", isc_result_totext(result)); - tcpquota_disconnect(client); - - if (client->tcpactive) { - LOCK(&client->interface->lock); - client->interface->ntcpactive--; - UNLOCK(&client->interface->lock); - client->tcpactive = false; - } - + tcpconn_detach(client); + mark_tcp_active(client, false); return; } @@ -3547,9 +3501,7 @@ client_accept(ns_client_t *client) { * listening for connections itself to prevent the interface * going dead. */ - LOCK(&client->interface->lock); - client->interface->ntcpaccepting++; - UNLOCK(&client->interface->lock); + atomic_fetch_add(&client->interface->ntcpaccepting, 1); } static void @@ -3620,24 +3572,25 @@ ns_client_replace(ns_client_t *client) { REQUIRE(client->manager != NULL); tcp = TCP_CLIENT(client); - if (tcp && client->pipelined) { + if (tcp && client->tcpconn != NULL && client->tcpconn->pipelined) { result = get_worker(client->manager, client->interface, client->tcpsocket, client); } else { result = get_client(client->manager, client->interface, - client->dispatch, client, tcp); + client->dispatch, tcp); - /* - * The responsibility for listening for new requests is hereby - * transferred to the new client. Therefore, the old client - * should refrain from listening for any more requests. - */ - client->mortal = true; } if (result != ISC_R_SUCCESS) { return (result); } + /* + * The responsibility for listening for new requests is hereby + * transferred to the new client. Therefore, the old client + * should refrain from listening for any more requests. + */ + client->mortal = true; + return (ISC_R_SUCCESS); } @@ -3778,7 +3731,7 @@ ns_clientmgr_destroy(ns_clientmgr_t **managerp) { static isc_result_t get_client(ns_clientmgr_t *manager, ns_interface_t *ifp, - dns_dispatch_t *disp, ns_client_t *oldclient, bool tcp) + dns_dispatch_t *disp, bool tcp) { isc_result_t result = ISC_R_SUCCESS; isc_event_t *ev; @@ -3823,15 +3776,7 @@ get_client(ns_clientmgr_t *manager, ns_interface_t *ifp, client->dscp = ifp->dscp; if (tcp) { - client->tcpattached = false; - if (oldclient != NULL) { - client->tcpattached = oldclient->tcpattached; - } - - LOCK(&client->interface->lock); - client->interface->ntcpactive++; - UNLOCK(&client->interface->lock); - client->tcpactive = true; + mark_tcp_active(client, true); client->attributes |= NS_CLIENTATTR_TCP; isc_socket_attach(ifp->tcpsocket, @@ -3897,8 +3842,6 @@ get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock, client->newstate = client->state = NS_CLIENTSTATE_WORKING; INSIST(client->recursionquota == NULL); client->sctx = manager->sctx; - client->tcpquota = &client->sctx->tcpquota; - client->tcpattached = oldclient->tcpattached; client->dscp = ifp->dscp; @@ -3906,8 +3849,8 @@ get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock, client->mortal = true; client->sendcb = NULL; - pipeline_attach(oldclient, client); - client->pipelined = true; + tcpconn_attach(oldclient, client); + mark_tcp_active(client, true); isc_socket_attach(ifp->tcpsocket, &client->tcplistener); isc_socket_attach(sock, &client->tcpsocket); @@ -3915,11 +3858,6 @@ get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock, (void)isc_socket_getpeername(client->tcpsocket, &client->peeraddr); client->peeraddr_valid = true; - LOCK(&client->interface->lock); - client->interface->ntcpactive++; - UNLOCK(&client->interface->lock); - client->tcpactive = true; - INSIST(client->tcpmsg_valid == false); dns_tcpmsg_init(client->mctx, client->tcpsocket, &client->tcpmsg); client->tcpmsg_valid = true; @@ -3994,8 +3932,7 @@ ns_clientmgr_createclients(ns_clientmgr_t *manager, unsigned int n, MTRACE("createclients"); for (disp = 0; disp < n; disp++) { - result = get_client(manager, ifp, ifp->udpdispatch[disp], - NULL, tcp); + result = get_client(manager, ifp, ifp->udpdispatch[disp], tcp); if (result != ISC_R_SUCCESS) break; } diff --git a/lib/ns/include/ns/client.h b/lib/ns/include/ns/client.h index 2e30c1a7ad..776a36cd33 100644 --- a/lib/ns/include/ns/client.h +++ b/lib/ns/include/ns/client.h @@ -80,6 +80,13 @@ *** Types ***/ +/*% reference-counted TCP connection object */ +typedef struct ns_tcpconn { + isc_refcount_t refs; + isc_quota_t *tcpquota; + bool pipelined; +} ns_tcpconn_t; + /*% nameserver client structure */ struct ns_client { unsigned int magic; @@ -134,10 +141,7 @@ struct ns_client { dns_name_t signername; /*%< [T]SIG key name */ dns_name_t *signer; /*%< NULL if not valid sig */ bool mortal; /*%< Die after handling request */ - bool pipelined; /*%< TCP queries not in sequence */ - isc_refcount_t *pipeline_refs; - isc_quota_t *tcpquota; - bool tcpattached; + ns_tcpconn_t *tcpconn; isc_quota_t *recursionquota; ns_interface_t *interface; diff --git a/lib/ns/include/ns/interfacemgr.h b/lib/ns/include/ns/interfacemgr.h index f579b7ec28..7baafa95c0 100644 --- a/lib/ns/include/ns/interfacemgr.h +++ b/lib/ns/include/ns/interfacemgr.h @@ -76,11 +76,11 @@ struct ns_interface { /*%< UDP dispatchers. */ isc_socket_t * tcpsocket; /*%< TCP socket. */ isc_dscp_t dscp; /*%< "listen-on" DSCP value */ - int ntcpaccepting; /*%< Number of clients + atomic_uint_fast32_t ntcpaccepting; /*%< Number of clients ready to accept new TCP connections on this interface */ - int ntcpactive; /*%< Number of clients + atomic_uint_fast32_t ntcpactive; /*%< Number of clients servicing TCP queries (whether accepting or connected) */ diff --git a/lib/ns/interfacemgr.c b/lib/ns/interfacemgr.c index 1b59b67190..f92cea8c62 100644 --- a/lib/ns/interfacemgr.c +++ b/lib/ns/interfacemgr.c @@ -425,8 +425,9 @@ ns_interface_create(ns_interfacemgr_t *mgr, isc_sockaddr_t *addr, * connections will be handled in parallel even though there is * only one client initially. */ - ifp->ntcpaccepting = 0; - ifp->ntcpactive = 0; + atomic_init(&ifp->ntcpaccepting, 0); + atomic_init(&ifp->ntcpactive, 0); + ifp->nudpdispatch = 0; ifp->dscp = -1; From cae79e1bab677ed1c2ce3adc5d54163a78f0d30b Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Fri, 22 Feb 2019 14:53:30 -0800 Subject: [PATCH 6/7] restore allowance for tcp-clients < interfaces in the "refactor tcpquota and pipeline refs" commit, the counting of active interfaces was tightened in such a way that named could fail to listen on an interface if there were more interfaces than tcp-clients. when checking the quota to start accepting on an interface, if the number of active clients was above zero, then it was presumed that some other client was able to handle accepting new connections. this, however, ignored the fact that the current client could be included in that count, so if the quota was already exceeded before all the interfaces were listening, some interfaces would never listen. we now check whether the current client has been marked active; if so, then the number of active clients on the interface must be greater than 1, not 0. (cherry picked from commit 02365b87ea0b1ea5ea8b17376f6734c811c95e61) --- doc/arm/Bv9ARM-book.xml | 3 ++- lib/ns/client.c | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/arm/Bv9ARM-book.xml b/doc/arm/Bv9ARM-book.xml index 9213ef588b..b18dee1356 100644 --- a/doc/arm/Bv9ARM-book.xml +++ b/doc/arm/Bv9ARM-book.xml @@ -8253,7 +8253,8 @@ avoid-v6-udp-ports { 40000; range 50000 60000; }; The number of file descriptors reserved for TCP, stdio, etc. This needs to be big enough to cover the number of - interfaces named listens on, tcp-clients as well as + interfaces named listens on plus + tcp-clients, as well as to provide room for outgoing TCP queries and incoming zone transfers. The default is 512. The minimum value is 128 and the diff --git a/lib/ns/client.c b/lib/ns/client.c index 8dd3018182..6edb46d6d7 100644 --- a/lib/ns/client.c +++ b/lib/ns/client.c @@ -3428,8 +3428,9 @@ client_accept(ns_client_t *client) { * * So, we check here to see if any other clients are * already servicing TCP queries on this interface (whether - * accepting, reading, or processing). If we find at least - * one, then it's okay *not* to call accept - we can let this + * accepting, reading, or processing). If we find that at + * least one client other than this one is active, then + * it's okay *not* to call accept - we can let this * client go inactive and another will take over when it's * done. * @@ -3443,7 +3444,8 @@ client_accept(ns_client_t *client) { * quota is tcp-clients plus the number of listening * interfaces plus 1.) */ - exit = (atomic_load(&client->interface->ntcpactive) > 0U); + exit = (atomic_load(&client->interface->ntcpactive) > + (client->tcpactive ? 1U : 0U)); if (exit) { client->newstate = NS_CLIENTSTATE_INACTIVE; (void)exit_check(client); From 79fad84bf6981dfd2a13971d966c0ebee057c448 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Thu, 3 Jan 2019 11:04:41 -0800 Subject: [PATCH 7/7] CHANGES, release note (cherry picked from commit 244e44af432121a05e0a308b7ccce96a8ecd28ab) --- CHANGES | 4 ++++ doc/arm/notes.xml | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/CHANGES b/CHANGES index c055791997..986f53f3f5 100644 --- a/CHANGES +++ b/CHANGES @@ -1,5 +1,9 @@ 5201. [bug] Fix a possible deadlock in RPZ update code. [GL #973] +5200. [security] tcp-clients settings could be exceeded in some cases, + which could lead to exhaustion of file descriptors. + (CVE-2018-5743) [GL #615] + 5199. [security] In certain configurations, named could crash if nxdomain-redirect was in use and a redirected query resulted in an NXDOMAIN from the cache. diff --git a/doc/arm/notes.xml b/doc/arm/notes.xml index 225a68245e..1d8747ae95 100644 --- a/doc/arm/notes.xml +++ b/doc/arm/notes.xml @@ -96,6 +96,13 @@ cache. This flaw is disclosed in CVE-2019-6467. [GL #880] + + + The TCP client quota set using the tcp-clients + option could be exceeded in some cases. This could lead to + exhaustion of file descriptors. (CVE-2018-5743) [GL #615] + +