diff --git a/CHANGES b/CHANGES
index c055791997..986f53f3f5 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,5 +1,9 @@
5201. [bug] Fix a possible deadlock in RPZ update code. [GL #973]
+5200. [security] tcp-clients settings could be exceeded in some cases,
+ which could lead to exhaustion of file descriptors.
+ (CVE-2018-5743) [GL #615]
+
5199. [security] In certain configurations, named could crash
if nxdomain-redirect was in use and a redirected
query resulted in an NXDOMAIN from the cache.
diff --git a/doc/arm/Bv9ARM-book.xml b/doc/arm/Bv9ARM-book.xml
index 9213ef588b..b18dee1356 100644
--- a/doc/arm/Bv9ARM-book.xml
+++ b/doc/arm/Bv9ARM-book.xml
@@ -8253,7 +8253,8 @@ avoid-v6-udp-ports { 40000; range 50000 60000; };
The number of file descriptors reserved for TCP, stdio,
etc. This needs to be big enough to cover the number of
- interfaces named listens on, tcp-clients as well as
+ interfaces named listens on plus
+ tcp-clients, as well as
to provide room for outgoing TCP queries and incoming zone
transfers. The default is 512.
The minimum value is 128 and the
diff --git a/doc/arm/notes.xml b/doc/arm/notes.xml
index 225a68245e..1d8747ae95 100644
--- a/doc/arm/notes.xml
+++ b/doc/arm/notes.xml
@@ -96,6 +96,13 @@
cache. This flaw is disclosed in CVE-2019-6467. [GL #880]
+
+
+ The TCP client quota set using the tcp-clients
+ option could be exceeded in some cases. This could lead to
+ exhaustion of file descriptors. (CVE-2018-5743) [GL #615]
+
+
diff --git a/lib/isc/include/isc/quota.h b/lib/isc/include/isc/quota.h
index 8e593ffb0e..16f6181cda 100644
--- a/lib/isc/include/isc/quota.h
+++ b/lib/isc/include/isc/quota.h
@@ -115,6 +115,13 @@ isc_quota_attach(isc_quota_t *quota, isc_quota_t **p);
* quota if successful (ISC_R_SUCCESS or ISC_R_SOFTQUOTA).
*/
+isc_result_t
+isc_quota_force(isc_quota_t *quota, isc_quota_t **p);
+/*%<
+ * Like isc_quota_attach, but will attach '*p' to the quota
+ * even if the hard quota has been exceeded.
+ */
+
void
isc_quota_detach(isc_quota_t **p);
/*%<
diff --git a/lib/isc/quota.c b/lib/isc/quota.c
index 1e25b402d4..cf63e05f52 100644
--- a/lib/isc/quota.c
+++ b/lib/isc/quota.c
@@ -85,20 +85,36 @@ isc_quota_release(isc_quota_t *quota) {
INSIST(atomic_fetch_sub("a->used, 1) > 0);
}
-isc_result_t
-isc_quota_attach(isc_quota_t *quota, isc_quota_t **p)
-{
+static isc_result_t
+doattach(isc_quota_t *quota, isc_quota_t **p, bool force) {
isc_result_t result;
- INSIST(p != NULL && *p == NULL);
+ REQUIRE(p != NULL && *p == NULL);
+
result = isc_quota_reserve(quota);
- if (result == ISC_R_SUCCESS || result == ISC_R_SOFTQUOTA)
+ if (result == ISC_R_SUCCESS || result == ISC_R_SOFTQUOTA) {
*p = quota;
+ } else if (result == ISC_R_QUOTA && force) {
+ /* attach anyway */
+ atomic_fetch_add("a->used, 1);
+ *p = quota;
+ result = ISC_R_SUCCESS;
+ }
+
return (result);
}
+isc_result_t
+isc_quota_attach(isc_quota_t *quota, isc_quota_t **p) {
+ return (doattach(quota, p, false));
+}
+
+isc_result_t
+isc_quota_force(isc_quota_t *quota, isc_quota_t **p) {
+ return (doattach(quota, p, true));
+}
+
void
-isc_quota_detach(isc_quota_t **p)
-{
+isc_quota_detach(isc_quota_t **p) {
INSIST(p != NULL && *p != NULL);
isc_quota_release(*p);
*p = NULL;
diff --git a/lib/isc/win32/libisc.def.in b/lib/isc/win32/libisc.def.in
index 4cec81fca1..4b66b3c9c1 100644
--- a/lib/isc/win32/libisc.def.in
+++ b/lib/isc/win32/libisc.def.in
@@ -449,6 +449,7 @@ isc_portset_removerange
isc_quota_attach
isc_quota_destroy
isc_quota_detach
+isc_quota_force
isc_quota_getmax
isc_quota_getsoft
isc_quota_getused
diff --git a/lib/ns/client.c b/lib/ns/client.c
index 4a43fb6d32..6edb46d6d7 100644
--- a/lib/ns/client.c
+++ b/lib/ns/client.c
@@ -243,7 +243,7 @@ static void ns_client_dumpmessage(ns_client_t *client, const char *reason);
static isc_result_t get_client(ns_clientmgr_t *manager, ns_interface_t *ifp,
dns_dispatch_t *disp, bool tcp);
static isc_result_t get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp,
- isc_socket_t *sock);
+ isc_socket_t *sock, ns_client_t *oldclient);
static void compute_cookie(ns_client_t *client, uint32_t when,
uint32_t nonce, const unsigned char *secret,
isc_buffer_t *buf);
@@ -318,6 +318,121 @@ read_settimeout(ns_client_t *client, bool newconn) {
}
}
+/*%
+ * Allocate a reference-counted object that will maintain a single pointer to
+ * the (also reference-counted) TCP client quota, shared between all the
+ * clients processing queries on a single TCP connection, so that all
+ * clients sharing the one socket will together consume only one slot in
+ * the 'tcp-clients' quota.
+ */
+static isc_result_t
+tcpconn_init(ns_client_t *client, bool force) {
+ isc_result_t result;
+ isc_quota_t *quota = NULL;
+ ns_tcpconn_t *tconn = NULL;
+
+ REQUIRE(client->tcpconn == NULL);
+
+ /*
+ * Try to attach to the quota first, so we won't pointlessly
+ * allocate memory for a tcpconn object if we can't get one.
+ */
+ if (force) {
+ result = isc_quota_force(&client->sctx->tcpquota, "a);
+ } else {
+ result = isc_quota_attach(&client->sctx->tcpquota, "a);
+ }
+ if (result != ISC_R_SUCCESS) {
+ return (result);
+ }
+
+ /*
+ * A global memory context is used for the allocation as different
+ * client structures may have different memory contexts assigned and a
+ * reference counter allocated here might need to be freed by a
+ * different client. The performance impact caused by memory context
+ * contention here is expected to be negligible, given that this code
+ * is only executed for TCP connections.
+ */
+ tconn = isc_mem_allocate(client->sctx->mctx, sizeof(*tconn));
+
+ isc_refcount_init(&tconn->refs, 1);
+ tconn->tcpquota = quota;
+ quota = NULL;
+ tconn->pipelined = false;
+
+ client->tcpconn = tconn;
+
+ return (ISC_R_SUCCESS);
+}
+
+/*%
+ * Increase the count of client structures sharing the TCP connection
+ * that 'source' is associated with; add a pointer to the same tcpconn
+ * to 'target', thus associating it with the same TCP connection.
+ */
+static void
+tcpconn_attach(ns_client_t *source, ns_client_t *target) {
+ int old_refs;
+
+ REQUIRE(source->tcpconn != NULL);
+ REQUIRE(target->tcpconn == NULL);
+ REQUIRE(source->tcpconn->pipelined);
+
+ old_refs = isc_refcount_increment(&source->tcpconn->refs);
+ INSIST(old_refs > 0);
+ target->tcpconn = source->tcpconn;
+}
+
+/*%
+ * Decrease the count of client structures sharing the TCP connection that
+ * 'client' is associated with. If this is the last client using this TCP
+ * connection, we detach from the TCP quota and free the tcpconn
+ * object. Either way, client->tcpconn is set to NULL.
+ */
+static void
+tcpconn_detach(ns_client_t *client) {
+ ns_tcpconn_t *tconn = NULL;
+ int old_refs;
+
+ REQUIRE(client->tcpconn != NULL);
+
+ tconn = client->tcpconn;
+ client->tcpconn = NULL;
+
+ old_refs = isc_refcount_decrement(&tconn->refs);
+ INSIST(old_refs > 0);
+
+ if (old_refs == 1) {
+ isc_quota_detach(&tconn->tcpquota);
+ isc_mem_free(client->sctx->mctx, tconn);
+ }
+}
+
+/*%
+ * Mark a client as active and increment the interface's 'ntcpactive'
+ * counter, as a signal that there is at least one client servicing
+ * TCP queries for the interface. If we reach the TCP client quota at
+ * some point, this will be used to determine whether a quota overrun
+ * should be permitted.
+ *
+ * Marking the client active with the 'tcpactive' flag ensures proper
+ * accounting, by preventing us from incrementing or decrementing
+ * 'ntcpactive' more than once per client.
+ */
+static void
+mark_tcp_active(ns_client_t *client, bool active) {
+ if (active && !client->tcpactive) {
+ atomic_fetch_add(&client->interface->ntcpactive, 1);
+ client->tcpactive = active;
+ } else if (!active && client->tcpactive) {
+ uint32_t old =
+ atomic_fetch_sub(&client->interface->ntcpactive, 1);
+ INSIST(old > 0);
+ client->tcpactive = active;
+ }
+}
+
/*%
* Check for a deactivation or shutdown request and take appropriate
* action. Returns true if either is in progress; in this case
@@ -407,7 +522,8 @@ exit_check(ns_client_t *client) {
INSIST(client->recursionquota == NULL);
if (NS_CLIENTSTATE_READING == client->newstate) {
- if (!client->pipelined) {
+ INSIST(client->tcpconn != NULL);
+ if (!client->tcpconn->pipelined) {
client_read(client, false);
client->newstate = NS_CLIENTSTATE_MAX;
return (true); /* We're done. */
@@ -425,10 +541,13 @@ exit_check(ns_client_t *client) {
*/
INSIST(client->recursionquota == NULL);
INSIST(client->newstate <= NS_CLIENTSTATE_READY);
- if (client->nreads > 0)
+
+ if (client->nreads > 0) {
dns_tcpmsg_cancelread(&client->tcpmsg);
- if (client->nreads != 0) {
- /* Still waiting for read cancel completion. */
+ }
+
+ /* Still waiting for read cancel completion. */
+ if (client->nreads > 0) {
return (true);
}
@@ -436,14 +555,49 @@ exit_check(ns_client_t *client) {
dns_tcpmsg_invalidate(&client->tcpmsg);
client->tcpmsg_valid = false;
}
+
+ /*
+ * Soon the client will be ready to accept a new TCP
+ * connection or UDP request, but we may have enough
+ * clients doing that already. Check whether this client
+ * needs to remain active and allow it go inactive if
+ * not.
+ *
+ * UDP clients always go inactive at this point, but a TCP
+ * client may need to stay active and return to READY
+ * state if no other clients are available to listen
+ * for TCP requests on this interface.
+ *
+ * Regardless, if we're going to FREED state, that means
+ * the system is shutting down and we don't need to
+ * retain clients.
+ */
+ if (client->mortal && TCP_CLIENT(client) &&
+ client->newstate != NS_CLIENTSTATE_FREED &&
+ (client->sctx->options & NS_SERVER_CLIENTTEST) == 0 &&
+ atomic_load(&client->interface->ntcpaccepting) == 0)
+ {
+ /* Nobody else is accepting */
+ client->mortal = false;
+ client->newstate = NS_CLIENTSTATE_READY;
+ }
+
+ /*
+ * Detach from TCP connection and TCP client quota,
+ * if appropriate. If this is the last reference to
+ * the TCP connection in our pipeline group, the
+ * TCP quota slot will be released.
+ */
+ if (client->tcpconn) {
+ tcpconn_detach(client);
+ }
+
if (client->tcpsocket != NULL) {
CTRACE("closetcp");
isc_socket_detach(&client->tcpsocket);
+ mark_tcp_active(client, false);
}
- if (client->tcpquota != NULL)
- isc_quota_detach(&client->tcpquota);
-
if (client->timerset) {
(void)isc_timer_reset(client->timer,
isc_timertype_inactive,
@@ -451,47 +605,26 @@ exit_check(ns_client_t *client) {
client->timerset = false;
}
- client->pipelined = false;
-
client->peeraddr_valid = false;
client->state = NS_CLIENTSTATE_READY;
- INSIST(client->recursionquota == NULL);
-
- /*
- * Now the client is ready to accept a new TCP connection
- * or UDP request, but we may have enough clients doing
- * that already. Check whether this client needs to remain
- * active and force it to go inactive if not.
- *
- * UDP clients go inactive at this point, but TCP clients
- * may remain active if we have fewer active TCP client
- * objects than desired due to an earlier quota exhaustion.
- */
- if (client->mortal && TCP_CLIENT(client) &&
- ((client->sctx->options & NS_SERVER_CLIENTTEST) == 0))
- {
- LOCK(&client->interface->lock);
- if (client->interface->ntcpcurrent <
- client->interface->ntcptarget)
- client->mortal = false;
- UNLOCK(&client->interface->lock);
- }
/*
* We don't need the client; send it to the inactive
* queue for recycling.
*/
if (client->mortal) {
- if (client->newstate > NS_CLIENTSTATE_INACTIVE)
+ if (client->newstate > NS_CLIENTSTATE_INACTIVE) {
client->newstate = NS_CLIENTSTATE_INACTIVE;
+ }
}
if (NS_CLIENTSTATE_READY == client->newstate) {
if (TCP_CLIENT(client)) {
client_accept(client);
- } else
+ } else {
client_udprecv(client);
+ }
client->newstate = NS_CLIENTSTATE_MAX;
return (true);
}
@@ -503,41 +636,50 @@ exit_check(ns_client_t *client) {
/*
* We are trying to enter the inactive state.
*/
- if (client->naccepts > 0)
+ if (client->naccepts > 0) {
isc_socket_cancel(client->tcplistener, client->task,
ISC_SOCKCANCEL_ACCEPT);
+ }
/* Still waiting for accept cancel completion. */
- if (! (client->naccepts == 0))
+ if (client->naccepts > 0) {
return (true);
+ }
/* Accept cancel is complete. */
- if (client->nrecvs > 0)
+ if (client->nrecvs > 0) {
isc_socket_cancel(client->udpsocket, client->task,
ISC_SOCKCANCEL_RECV);
+ }
/* Still waiting for recv cancel completion. */
- if (! (client->nrecvs == 0))
+ if (client->nrecvs > 0) {
return (true);
+ }
/* Still waiting for control event to be delivered */
- if (client->nctls > 0)
+ if (client->nctls > 0) {
return (true);
-
- /* Deactivate the client. */
- if (client->interface)
- ns_interface_detach(&client->interface);
+ }
INSIST(client->naccepts == 0);
INSIST(client->recursionquota == NULL);
- if (client->tcplistener != NULL)
+ if (client->tcplistener != NULL) {
isc_socket_detach(&client->tcplistener);
-
- if (client->udpsocket != NULL)
+ mark_tcp_active(client, false);
+ }
+ if (client->udpsocket != NULL) {
isc_socket_detach(&client->udpsocket);
+ }
- if (client->dispatch != NULL)
+ /* Deactivate the client. */
+ if (client->interface != NULL) {
+ ns_interface_detach(&client->interface);
+ }
+
+ if (client->dispatch != NULL) {
dns_dispatch_detach(&client->dispatch);
+ }
client->attributes = 0;
client->mortal = false;
@@ -568,8 +710,9 @@ exit_check(ns_client_t *client) {
ISC_QUEUE_PUSH(manager->inactive, client,
ilink);
}
- if (client->needshutdown)
+ if (client->needshutdown) {
isc_task_shutdown(client->task);
+ }
return (true);
}
}
@@ -685,7 +828,7 @@ client_start(isc_task_t *task, isc_event_t *event) {
return;
if (TCP_CLIENT(client)) {
- if (client->pipelined) {
+ if (client->tcpconn != NULL) {
client_read(client, false);
} else {
client_accept(client);
@@ -695,7 +838,6 @@ client_start(isc_task_t *task, isc_event_t *event) {
}
}
-
/*%
* The client's task has received a shutdown event.
*/
@@ -2283,6 +2425,7 @@ ns__client_request(isc_task_t *task, isc_event_t *event) {
client->nrecvs--;
} else {
INSIST(TCP_CLIENT(client));
+ INSIST(client->tcpconn != NULL);
REQUIRE(event->ev_type == DNS_EVENT_TCPMSG);
REQUIRE(event->ev_sender == &client->tcpmsg);
buffer = &client->tcpmsg.buffer;
@@ -2471,18 +2614,27 @@ ns__client_request(isc_task_t *task, isc_event_t *event) {
/*
* Pipeline TCP query processing.
*/
- if (client->message->opcode != dns_opcode_query)
- client->pipelined = false;
- if (TCP_CLIENT(client) && client->pipelined) {
- result = isc_quota_reserve(&client->sctx->tcpquota);
- if (result == ISC_R_SUCCESS)
- result = ns_client_replace(client);
+ if (TCP_CLIENT(client) &&
+ client->message->opcode != dns_opcode_query)
+ {
+ client->tcpconn->pipelined = false;
+ }
+ if (TCP_CLIENT(client) && client->tcpconn->pipelined) {
+ /*
+ * We're pipelining. Replace the client; the
+ * replacement can read the TCP socket looking
+ * for new messages and this one can process the
+ * current message asynchronously.
+ *
+ * There will now be at least three clients using this
+ * TCP socket - one accepting new connections,
+ * one reading an existing connection to get new
+ * messages, and one answering the message already
+ * received.
+ */
+ result = ns_client_replace(client);
if (result != ISC_R_SUCCESS) {
- ns_client_log(client, NS_LOGCATEGORY_CLIENT,
- NS_LOGMODULE_CLIENT, ISC_LOG_WARNING,
- "no more TCP clients(read): %s",
- isc_result_totext(result));
- client->pipelined = false;
+ client->tcpconn->pipelined = false;
}
}
@@ -3034,14 +3186,14 @@ client_create(ns_clientmgr_t *manager, ns_client_t **clientp) {
dns_name_init(&client->signername, NULL);
client->mortal = false;
client->sendcb = NULL;
- client->pipelined = false;
- client->tcpquota = NULL;
+ client->tcpconn = NULL;
client->recursionquota = NULL;
client->interface = NULL;
client->peeraddr_valid = false;
dns_ecs_init(&client->ecs);
client->needshutdown = ((client->sctx->options &
NS_SERVER_CLIENTTEST) != 0);
+ client->tcpactive = false;
ISC_EVENT_INIT(&client->ctlevent, sizeof(client->ctlevent), 0, NULL,
NS_EVENT_CLIENTCONTROL, client_start, client, client,
@@ -3138,10 +3290,11 @@ client_read(ns_client_t *client, bool newconn) {
static void
client_newconn(isc_task_t *task, isc_event_t *event) {
+ isc_result_t result;
ns_client_t *client = event->ev_arg;
isc_socket_newconnev_t *nevent = (isc_socket_newconnev_t *)event;
dns_aclenv_t *env = ns_interfacemgr_getaclenv(client->interface->mgr);
- isc_result_t result;
+ uint32_t old;
REQUIRE(event->ev_type == ISC_SOCKEVENT_NEWCONN);
REQUIRE(NS_CLIENT_VALID(client));
@@ -3151,13 +3304,18 @@ client_newconn(isc_task_t *task, isc_event_t *event) {
INSIST(client->state == NS_CLIENTSTATE_READY);
+ /*
+ * The accept() was successful and we're now establishing a new
+ * connection. We need to make note of it in the client and
+ * interface objects so client objects can do the right thing
+ * when going inactive in exit_check() (see comments in
+ * client_accept() for details).
+ */
INSIST(client->naccepts == 1);
client->naccepts--;
- LOCK(&client->interface->lock);
- INSIST(client->interface->ntcpcurrent > 0);
- client->interface->ntcpcurrent--;
- UNLOCK(&client->interface->lock);
+ old = atomic_fetch_sub(&client->interface->ntcpaccepting, 1);
+ INSIST(old > 0);
/*
* We must take ownership of the new socket before the exit
@@ -3190,6 +3348,7 @@ client_newconn(isc_task_t *task, isc_event_t *event) {
NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3),
"accept failed: %s",
isc_result_totext(nevent->result));
+ tcpconn_detach(client);
}
if (exit_check(client))
@@ -3225,21 +3384,13 @@ client_newconn(isc_task_t *task, isc_event_t *event) {
* telnetting to port 53 (once per CPU) will
* deny service to legitimate TCP clients.
*/
- client->pipelined = false;
- result = isc_quota_attach(&client->sctx->tcpquota,
- &client->tcpquota);
- if (result == ISC_R_SUCCESS)
- result = ns_client_replace(client);
- if (result != ISC_R_SUCCESS) {
- ns_client_log(client, NS_LOGCATEGORY_CLIENT,
- NS_LOGMODULE_CLIENT, ISC_LOG_WARNING,
- "no more TCP clients(accept): %s",
- isc_result_totext(result));
- } else if (client->sctx->keepresporder == NULL ||
- !dns_acl_allowed(&netaddr, NULL,
- client->sctx->keepresporder, env))
+ result = ns_client_replace(client);
+ if (result == ISC_R_SUCCESS &&
+ (client->sctx->keepresporder == NULL ||
+ !dns_acl_allowed(&netaddr, NULL,
+ client->sctx->keepresporder, env)))
{
- client->pipelined = true;
+ client->tcpconn->pipelined = true;
}
client_read(client, true);
@@ -3255,12 +3406,66 @@ client_accept(ns_client_t *client) {
CTRACE("accept");
+ /*
+ * Set up a new TCP connection. This means try to attach to the
+ * TCP client quota (tcp-clients), but fail if we're over quota.
+ */
+ result = tcpconn_init(client, false);
+ if (result != ISC_R_SUCCESS) {
+ bool exit;
+
+ ns_client_log(client, NS_LOGCATEGORY_CLIENT,
+ NS_LOGMODULE_CLIENT, ISC_LOG_WARNING,
+ "TCP client quota reached: %s",
+ isc_result_totext(result));
+
+ /*
+ * We have exceeded the system-wide TCP client quota. But,
+ * we can't just block this accept in all cases, because if
+ * we did, a heavy TCP load on other interfaces might cause
+ * this interface to be starved, with no clients able to
+ * accept new connections.
+ *
+ * So, we check here to see if any other clients are
+ * already servicing TCP queries on this interface (whether
+ * accepting, reading, or processing). If we find that at
+ * least one client other than this one is active, then
+ * it's okay *not* to call accept - we can let this
+ * client go inactive and another will take over when it's
+ * done.
+ *
+ * If there aren't enough active clients on the interface,
+ * then we can be a little bit flexible about the quota.
+ * We'll allow *one* extra client through to ensure we're
+ * listening on every interface; we do this by setting the
+ * 'force' option to tcpconn_init().
+ *
+ * (Note: In practice this means that the real TCP client
+ * quota is tcp-clients plus the number of listening
+ * interfaces plus 1.)
+ */
+ exit = (atomic_load(&client->interface->ntcpactive) >
+ (client->tcpactive ? 1U : 0U));
+ if (exit) {
+ client->newstate = NS_CLIENTSTATE_INACTIVE;
+ (void)exit_check(client);
+ return;
+ }
+
+ result = tcpconn_init(client, true);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+ }
+
+ /*
+ * If this client was set up using get_client() or get_worker(),
+ * then TCP is already marked active. However, if it was restarted
+ * from exit_check(), it might not be, so we take care of it now.
+ */
+ mark_tcp_active(client, true);
+
result = isc_socket_accept(client->tcplistener, client->task,
client_newconn, client);
if (result != ISC_R_SUCCESS) {
- UNEXPECTED_ERROR(__FILE__, __LINE__,
- "isc_socket_accept() failed: %s",
- isc_result_totext(result));
/*
* XXXRTH What should we do? We're trying to accept but
* it didn't work. If we just give up, then TCP
@@ -3268,13 +3473,37 @@ client_accept(ns_client_t *client) {
*
* For now, we just go idle.
*/
+ UNEXPECTED_ERROR(__FILE__, __LINE__,
+ "isc_socket_accept() failed: %s",
+ isc_result_totext(result));
+
+ tcpconn_detach(client);
+ mark_tcp_active(client, false);
return;
}
+
+ /*
+ * The client's 'naccepts' counter indicates that this client has
+ * called accept() and is waiting for a new connection. It should
+ * never exceed 1.
+ */
INSIST(client->naccepts == 0);
client->naccepts++;
- LOCK(&client->interface->lock);
- client->interface->ntcpcurrent++;
- UNLOCK(&client->interface->lock);
+
+ /*
+ * The interface's 'ntcpaccepting' counter is incremented when
+ * any client calls accept(), and decremented in client_newconn()
+ * once the connection is established.
+ *
+ * When the client object is shutting down after handling a TCP
+ * request (see exit_check()), if this value is at least one, that
+ * means another client has called accept() and is waiting to
+ * establish the next connection. That means the client may be
+ * be free to become inactive; otherwise it may need to start
+ * listening for connections itself to prevent the interface
+ * going dead.
+ */
+ atomic_fetch_add(&client->interface->ntcpaccepting, 1);
}
static void
@@ -3345,15 +3574,17 @@ ns_client_replace(ns_client_t *client) {
REQUIRE(client->manager != NULL);
tcp = TCP_CLIENT(client);
- if (tcp && client->pipelined) {
+ if (tcp && client->tcpconn != NULL && client->tcpconn->pipelined) {
result = get_worker(client->manager, client->interface,
- client->tcpsocket);
+ client->tcpsocket, client);
} else {
result = get_client(client->manager, client->interface,
client->dispatch, tcp);
+
}
- if (result != ISC_R_SUCCESS)
+ if (result != ISC_R_SUCCESS) {
return (result);
+ }
/*
* The responsibility for listening for new requests is hereby
@@ -3547,9 +3778,12 @@ get_client(ns_clientmgr_t *manager, ns_interface_t *ifp,
client->dscp = ifp->dscp;
if (tcp) {
+ mark_tcp_active(client, true);
+
client->attributes |= NS_CLIENTATTR_TCP;
isc_socket_attach(ifp->tcpsocket,
&client->tcplistener);
+
} else {
isc_socket_t *sock;
@@ -3567,13 +3801,16 @@ get_client(ns_clientmgr_t *manager, ns_interface_t *ifp,
}
static isc_result_t
-get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock) {
+get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock,
+ ns_client_t *oldclient)
+{
isc_result_t result = ISC_R_SUCCESS;
isc_event_t *ev;
ns_client_t *client;
MTRACE("get worker");
REQUIRE(manager != NULL);
+ REQUIRE(oldclient != NULL);
if (manager->exiting)
return (ISC_R_SHUTTINGDOWN);
@@ -3607,15 +3844,16 @@ get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock) {
client->newstate = client->state = NS_CLIENTSTATE_WORKING;
INSIST(client->recursionquota == NULL);
client->sctx = manager->sctx;
- client->tcpquota = &client->sctx->tcpquota;
client->dscp = ifp->dscp;
client->attributes |= NS_CLIENTATTR_TCP;
- client->pipelined = true;
client->mortal = true;
client->sendcb = NULL;
+ tcpconn_attach(oldclient, client);
+ mark_tcp_active(client, true);
+
isc_socket_attach(ifp->tcpsocket, &client->tcplistener);
isc_socket_attach(sock, &client->tcpsocket);
isc_socket_setname(client->tcpsocket, "worker-tcp", NULL);
diff --git a/lib/ns/include/ns/client.h b/lib/ns/include/ns/client.h
index cd6c9c8a47..776a36cd33 100644
--- a/lib/ns/include/ns/client.h
+++ b/lib/ns/include/ns/client.h
@@ -80,6 +80,13 @@
*** Types
***/
+/*% reference-counted TCP connection object */
+typedef struct ns_tcpconn {
+ isc_refcount_t refs;
+ isc_quota_t *tcpquota;
+ bool pipelined;
+} ns_tcpconn_t;
+
/*% nameserver client structure */
struct ns_client {
unsigned int magic;
@@ -95,7 +102,8 @@ struct ns_client {
int nupdates;
int nctls;
int references;
- bool needshutdown; /*
+ bool tcpactive;
+ bool needshutdown; /*
* Used by clienttest to get
* the client to go from
* inactive to free state
@@ -130,11 +138,10 @@ struct ns_client {
isc_time_t requesttime;
isc_stdtime_t now;
isc_time_t tnow;
- dns_name_t signername; /*%< [T]SIG key name */
- dns_name_t *signer; /*%< NULL if not valid sig */
- bool mortal; /*%< Die after handling request */
- bool pipelined; /*%< TCP queries not in sequence */
- isc_quota_t *tcpquota;
+ dns_name_t signername; /*%< [T]SIG key name */
+ dns_name_t *signer; /*%< NULL if not valid sig */
+ bool mortal; /*%< Die after handling request */
+ ns_tcpconn_t *tcpconn;
isc_quota_t *recursionquota;
ns_interface_t *interface;
@@ -143,7 +150,7 @@ struct ns_client {
isc_netaddr_t destaddr;
isc_sockaddr_t destsockaddr;
- dns_ecs_t ecs; /*%< EDNS client subnet sent by client */
+ dns_ecs_t ecs; /*%< EDNS client subnet sent by client */
struct in6_pktinfo pktinfo;
isc_dscp_t dscp;
diff --git a/lib/ns/include/ns/interfacemgr.h b/lib/ns/include/ns/interfacemgr.h
index 54846fdc27..7baafa95c0 100644
--- a/lib/ns/include/ns/interfacemgr.h
+++ b/lib/ns/include/ns/interfacemgr.h
@@ -76,9 +76,14 @@ struct ns_interface {
/*%< UDP dispatchers. */
isc_socket_t * tcpsocket; /*%< TCP socket. */
isc_dscp_t dscp; /*%< "listen-on" DSCP value */
- int ntcptarget; /*%< Desired number of concurrent
- TCP accepts */
- int ntcpcurrent; /*%< Current ditto, locked */
+ atomic_uint_fast32_t ntcpaccepting; /*%< Number of clients
+ ready to accept new
+ TCP connections on this
+ interface */
+ atomic_uint_fast32_t ntcpactive; /*%< Number of clients
+ servicing TCP queries
+ (whether accepting or
+ connected) */
int nudpdispatch; /*%< Number of UDP dispatches */
ns_clientmgr_t * clientmgr; /*%< Client manager. */
ISC_LINK(ns_interface_t) link;
diff --git a/lib/ns/interfacemgr.c b/lib/ns/interfacemgr.c
index 865abe7d31..f92cea8c62 100644
--- a/lib/ns/interfacemgr.c
+++ b/lib/ns/interfacemgr.c
@@ -425,8 +425,9 @@ ns_interface_create(ns_interfacemgr_t *mgr, isc_sockaddr_t *addr,
* connections will be handled in parallel even though there is
* only one client initially.
*/
- ifp->ntcptarget = 1;
- ifp->ntcpcurrent = 0;
+ atomic_init(&ifp->ntcpaccepting, 0);
+ atomic_init(&ifp->ntcpactive, 0);
+
ifp->nudpdispatch = 0;
ifp->dscp = -1;
@@ -561,9 +562,7 @@ ns_interface_accepttcp(ns_interface_t *ifp) {
*/
(void)isc_socket_filter(ifp->tcpsocket, "dataready");
- result = ns_clientmgr_createclients(ifp->clientmgr,
- ifp->ntcptarget, ifp,
- true);
+ result = ns_clientmgr_createclients(ifp->clientmgr, 1, ifp, true);
if (result != ISC_R_SUCCESS) {
UNEXPECTED_ERROR(__FILE__, __LINE__,
"TCP ns_clientmgr_createclients(): %s",