From a67b3255426160c55faa9d1d3aa6e7579439b2b4 Mon Sep 17 00:00:00 2001 From: Artem Boldariev Date: Wed, 4 Sep 2024 18:53:35 +0300 Subject: [PATCH 1/7] Implement TCP manual read timer control functionality This commit adds a manual TCP read timer control mode which is supposed to override automatic resetting of the timer when any data is received. That can be accomplished by `isc__nmhandle_set_manual_timer()`. This functionality is supposed to be used by multilevel networking transports which require finer grained control over the read timer (TLS Stream, DoH). The commit is essentially an implementation of the functionality from newer versions of BIND. --- lib/isc/netmgr/netmgr-int.h | 11 +++++++++++ lib/isc/netmgr/netmgr.c | 19 +++++++++++++++++++ lib/isc/netmgr/tcp.c | 26 ++++++++++++++++++++++---- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index a99153a8f2..48f2ef99f0 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -1229,6 +1229,7 @@ struct isc_nmsocket { isc_barrier_t barrier; bool barrier_initialised; + atomic_bool manual_read_timer; #ifdef NETMGR_TRACE void *backtrace[TRACE_SIZE]; int backtrace_size; @@ -1547,6 +1548,9 @@ isc__nm_tcp_settimeout(isc_nmhandle_t *handle, uint32_t timeout); * Set the read timeout for the TCP socket associated with 'handle'. */ +void +isc__nmhandle_tcp_set_manual_timer(isc_nmhandle_t *handle, const bool manual); + void isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ev0); void @@ -2278,3 +2282,10 @@ isc__nmsocket_writetimeout_cb(void *data, isc_result_t eresult); void isc__nmsocket_log_tls_session_reuse(isc_nmsocket_t *sock, isc_tls_t *tls); + +void +isc__nmhandle_set_manual_timer(isc_nmhandle_t *handle, const bool manual); +/* + * Set manual read timer control mode - so that it will not get reset + * automatically on read nor get started when read is initiated. + */ diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index a7b82ce69b..49982767bc 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -1627,6 +1627,7 @@ isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type, atomic_init(&sock->keepalive, false); atomic_init(&sock->connected, false); atomic_init(&sock->timedout, false); + atomic_init(&sock->manual_read_timer, false); atomic_init(&sock->active_child_connections, 0); @@ -3932,6 +3933,24 @@ isc__nmsocket_log_tls_session_reuse(isc_nmsocket_t *sock, isc_tls_t *tls) { client_sabuf, local_sabuf); } +void +isc__nmhandle_set_manual_timer(isc_nmhandle_t *handle, const bool manual) { + REQUIRE(VALID_NMHANDLE(handle)); + REQUIRE(VALID_NMSOCK(handle->sock)); + + isc_nmsocket_t *sock = handle->sock; + + switch (sock->type) { + case isc_nm_tcpsocket: + isc__nmhandle_tcp_set_manual_timer(handle, manual); + return; + default: + break; + }; + + UNREACHABLE(); +} + #ifdef NETMGR_TRACE /* * Dump all active sockets in netmgr. We output to stderr diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index db589aecc7..fbe1394ab9 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -784,7 +784,9 @@ isc__nm_async_tcpstartread(isc__networker_t *worker, isc__netievent_t *ev0) { return; } - isc__nmsocket_timer_start(sock); + if (!atomic_load(&sock->manual_read_timer)) { + isc__nmsocket_timer_start(sock); + } } void @@ -822,7 +824,9 @@ isc__nm_async_tcppauseread(isc__networker_t *worker, isc__netievent_t *ev0) { REQUIRE(sock->tid == isc_nm_tid()); UNUSED(worker); - isc__nmsocket_timer_stop(sock); + if (!atomic_load(&sock->manual_read_timer)) { + isc__nmsocket_timer_stop(sock); + } isc__nm_stop_reading(sock); } @@ -931,8 +935,10 @@ isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { } } - /* The timer will be updated */ - isc__nmsocket_timer_restart(sock); + if (!atomic_load(&sock->manual_read_timer)) { + /* The timer will be updated */ + isc__nmsocket_timer_restart(sock); + } } free: @@ -1521,3 +1527,15 @@ isc__nm_tcp_listener_nactive(isc_nmsocket_t *listener) { INSIST(nactive >= 0); return nactive; } + +void +isc__nmhandle_tcp_set_manual_timer(isc_nmhandle_t *handle, const bool manual) { + isc_nmsocket_t *sock; + + REQUIRE(VALID_NMHANDLE(handle)); + sock = handle->sock; + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tcpsocket); + + atomic_store(&sock->manual_read_timer, manual); +} From 13d521fa5fe7102733db15079fd49c7ccf5b0648 Mon Sep 17 00:00:00 2001 From: Artem Boldariev Date: Wed, 4 Sep 2024 21:35:35 +0300 Subject: [PATCH 2/7] Implement TLS manual read timer control functionality This commit adds a manual TLS read timer control mode which is supposed to override automatic resetting of the timer when any data is received. It both depends and complements similar functionality in TCP. --- lib/isc/netmgr/netmgr-int.h | 12 ++++ lib/isc/netmgr/netmgr.c | 32 +++++++++ lib/isc/netmgr/tlsstream.c | 137 ++++++++++++++++++++++++++++++++---- 3 files changed, 166 insertions(+), 15 deletions(-) diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index 48f2ef99f0..d3c790a857 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -1793,6 +1793,9 @@ isc__nm_tls_cleartimeout(isc_nmhandle_t *handle); * around. */ +void +isc__nmhandle_tls_set_manual_timer(isc_nmhandle_t *handle, const bool manual); + const char * isc__nm_tls_verify_tls_peer_result_string(const isc_nmhandle_t *handle); @@ -1810,6 +1813,15 @@ void isc__nmhandle_tls_setwritetimeout(isc_nmhandle_t *handle, uint64_t write_timeout); +bool +isc__nmsocket_tls_timer_running(isc_nmsocket_t *sock); + +void +isc__nmsocket_tls_timer_restart(isc_nmsocket_t *sock); + +void +isc__nmsocket_tls_timer_stop(isc_nmsocket_t *sock); + void isc__nm_http_stoplistening(isc_nmsocket_t *sock); diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 49982767bc..bc56d7a2ea 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -2137,6 +2137,15 @@ void isc__nmsocket_timer_restart(isc_nmsocket_t *sock) { REQUIRE(VALID_NMSOCK(sock)); + switch (sock->type) { +#if HAVE_LIBNGHTTP2 + case isc_nm_tlssocket: + return isc__nmsocket_tls_timer_restart(sock); +#endif /* HAVE_LIBNGHTTP2 */ + default: + break; + } + if (uv_is_closing((uv_handle_t *)&sock->read_timer)) { return; } @@ -2171,6 +2180,15 @@ bool isc__nmsocket_timer_running(isc_nmsocket_t *sock) { REQUIRE(VALID_NMSOCK(sock)); + switch (sock->type) { +#if HAVE_LIBNGHTTP2 + case isc_nm_tlssocket: + return isc__nmsocket_tls_timer_running(sock); +#endif /* HAVE_LIBNGHTTP2 */ + default: + break; + } + return uv_is_active((uv_handle_t *)&sock->read_timer); } @@ -2191,6 +2209,15 @@ isc__nmsocket_timer_stop(isc_nmsocket_t *sock) { REQUIRE(VALID_NMSOCK(sock)); + switch (sock->type) { +#if HAVE_LIBNGHTTP2 + case isc_nm_tlssocket: + return isc__nmsocket_tls_timer_stop(sock); +#endif /* HAVE_LIBNGHTTP2 */ + default: + break; + } + /* uv_timer_stop() is idempotent, no need to check if running */ r = uv_timer_stop(&sock->read_timer); @@ -3944,6 +3971,11 @@ isc__nmhandle_set_manual_timer(isc_nmhandle_t *handle, const bool manual) { case isc_nm_tcpsocket: isc__nmhandle_tcp_set_manual_timer(handle, manual); return; +#if HAVE_LIBNGHTTP2 + case isc_nm_tlssocket: + isc__nmhandle_tls_set_manual_timer(handle, manual); + return; +#endif /* HAVE_LIBNGHTTP2 */ default: break; }; diff --git a/lib/isc/netmgr/tlsstream.c b/lib/isc/netmgr/tlsstream.c index 51a2145df3..192d499c8e 100644 --- a/lib/isc/netmgr/tlsstream.c +++ b/lib/isc/netmgr/tlsstream.c @@ -60,6 +60,12 @@ tls_error_to_result(const int tls_err, const int tls_state, isc_tls_t *tls) { } } +static void +tls_read_start(isc_nmsocket_t *sock); + +static void +tls_read_stop(isc_nmsocket_t *sock); + static void tls_failed_read_cb(isc_nmsocket_t *sock, const isc_result_t result); @@ -203,8 +209,13 @@ tls_failed_read_cb(isc_nmsocket_t *sock, const isc_result_t result) { tls_call_connect_cb(sock, handle, result); isc__nmsocket_clearcb(sock); isc_nmhandle_detach(&handle); - } else if (sock->recv_cb != NULL && sock->statichandle != NULL && - (sock->recv_read || result == ISC_R_TIMEDOUT)) + goto do_destroy; + } + + isc__nmsocket_timer_stop(sock); + + if (sock->recv_cb != NULL && sock->statichandle != NULL && + (sock->recv_read || result == ISC_R_TIMEDOUT)) { isc__nm_uvreq_t *req = NULL; INSIST(VALID_NMHANDLE(sock->statichandle)); @@ -218,13 +229,13 @@ tls_failed_read_cb(isc_nmsocket_t *sock, const isc_result_t result) { } isc__nm_readcb(sock, req, result); if (result == ISC_R_TIMEDOUT && - (sock->outerhandle == NULL || - isc__nmsocket_timer_running(sock->outerhandle->sock))) + isc__nmsocket_timer_running(sock)) { destroy = false; } } +do_destroy: if (destroy) { isc__nmsocket_prep_destroy(sock); } @@ -344,6 +355,8 @@ tls_try_handshake(isc_nmsocket_t *sock, isc_result_t *presult) { INSIST(sock->statichandle == NULL); isc__nmsocket_log_tls_session_reuse(sock, sock->tlsstream.tls); tlshandle = isc__nmhandle_get(sock, &sock->peer, &sock->iface); + isc__nmsocket_timer_stop(sock); + tls_read_stop(sock); if (isc__nm_closing(sock)) { result = ISC_R_SHUTTINGDOWN; @@ -437,6 +450,7 @@ tls_do_bio(isc_nmsocket_t *sock, isc_region_t *received_data, sock->tlsstream.state = TLS_HANDSHAKE; rv = tls_try_handshake(sock, NULL); INSIST(SSL_is_init_finished(sock->tlsstream.tls) == 0); + isc__nmsocket_timer_restart(sock); } else if (sock->tlsstream.state == TLS_CLOSED) { return; } else { /* initialised and doing I/O */ @@ -502,6 +516,7 @@ tls_do_bio(isc_nmsocket_t *sock, isc_region_t *received_data, !atomic_load(&sock->readpaused) && sock->statichandle != NULL && !finish) { + bool was_new_data = false; uint8_t recv_buf[TLS_BUF_SIZE]; INSIST(sock->tlsstream.state > TLS_HANDSHAKE); while ((rv = SSL_read_ex(sock->tlsstream.tls, recv_buf, @@ -510,7 +525,7 @@ tls_do_bio(isc_nmsocket_t *sock, isc_region_t *received_data, isc_region_t region; region = (isc_region_t){ .base = &recv_buf[0], .length = len }; - + was_new_data = true; INSIST(VALID_NMHANDLE(sock->statichandle)); sock->recv_cb(sock->statichandle, ISC_R_SUCCESS, ®ion, sock->recv_cbarg); @@ -547,8 +562,29 @@ tls_do_bio(isc_nmsocket_t *sock, isc_region_t *received_data, break; } } + + if (was_new_data && !sock->manual_read_timer) { + /* + * Some data has been decrypted, it is the right + * time to stop the read timer as it will be + * restarted on the next read attempt. + */ + isc__nmsocket_timer_stop(sock); + } } } + + /* + * Setting 'finish' to 'true' means that we are about to close the + * TLS stream (we intend to send TLS shutdown message to the + * remote side). After that no new data can be received, so we + * should stop the timer regardless of the + * 'sock->manual_read_timer' value. + */ + if (finish) { + isc__nmsocket_timer_stop(sock); + } + errno = 0; tls_status = SSL_get_error(sock->tlsstream.tls, rv); saved_errno = errno; @@ -601,14 +637,7 @@ tls_do_bio(isc_nmsocket_t *sock, isc_region_t *received_data, return; } - INSIST(VALID_NMHANDLE(sock->outerhandle)); - - if (sock->tlsstream.reading) { - isc_nm_resumeread(sock->outerhandle); - } else if (sock->tlsstream.state == TLS_HANDSHAKE) { - sock->tlsstream.reading = true; - isc_nm_read(sock->outerhandle, tls_readcb, sock); - } + tls_read_start(sock); return; default: result = tls_error_to_result(tls_status, sock->tlsstream.state, @@ -743,6 +772,7 @@ tlslisten_acceptcb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { RUNTIME_CHECK(result == ISC_R_SUCCESS); /* TODO: catch failure code, detach tlssock, and log the error */ + isc__nmhandle_set_manual_timer(tlssock->outerhandle, true); tls_do_bio(tlssock, NULL, NULL, false); return result; } @@ -898,6 +928,29 @@ isc__nm_tls_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) { (isc__netievent_t *)ievent); } +static void +tls_read_start(isc_nmsocket_t *sock) { + INSIST(VALID_NMHANDLE(sock->outerhandle)); + + if (sock->tlsstream.reading) { + isc_nm_resumeread(sock->outerhandle); + } else if (sock->tlsstream.state == TLS_HANDSHAKE) { + sock->tlsstream.reading = true; + isc_nm_read(sock->outerhandle, tls_readcb, sock); + } + + if (!sock->manual_read_timer) { + isc__nmsocket_timer_start(sock); + } +} + +static void +tls_read_stop(isc_nmsocket_t *sock) { + if (sock->outerhandle != NULL) { + isc_nm_pauseread(sock->outerhandle); + } +} + void isc__nm_tls_pauseread(isc_nmhandle_t *handle) { REQUIRE(VALID_NMHANDLE(handle)); @@ -906,9 +959,11 @@ isc__nm_tls_pauseread(isc_nmhandle_t *handle) { if (atomic_compare_exchange_strong(&handle->sock->readpaused, &(bool){ false }, true)) { - if (handle->sock->outerhandle != NULL) { - isc_nm_pauseread(handle->sock->outerhandle); + if (!atomic_load(&handle->sock->manual_read_timer)) { + isc__nmsocket_timer_stop(handle->sock); } + + tls_read_stop(handle->sock); } } @@ -937,6 +992,7 @@ tls_close_direct(isc_nmsocket_t *sock) { * external references, we can close everything. */ if (sock->outerhandle != NULL) { + isc__nmsocket_timer_stop(sock); isc_nm_pauseread(sock->outerhandle); isc__nmsocket_clearcb(sock->outerhandle->sock); isc_nmhandle_detach(&sock->outerhandle); @@ -1085,6 +1141,7 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { */ handle->sock->tlsstream.tlssocket = tlssock; + isc__nmhandle_set_manual_timer(tlssock->outerhandle, true); tls_do_bio(tlssock, NULL, NULL, false); return; error: @@ -1251,6 +1308,44 @@ isc__nmhandle_tls_setwritetimeout(isc_nmhandle_t *handle, } } +bool +isc__nmsocket_tls_timer_running(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tlssocket); + + if (sock->outerhandle != NULL) { + INSIST(VALID_NMHANDLE(sock->outerhandle)); + REQUIRE(VALID_NMSOCK(sock->outerhandle->sock)); + return isc__nmsocket_timer_running(sock->outerhandle->sock); + } + + return false; +} + +void +isc__nmsocket_tls_timer_restart(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tlssocket); + + if (sock->outerhandle != NULL) { + INSIST(VALID_NMHANDLE(sock->outerhandle)); + REQUIRE(VALID_NMSOCK(sock->outerhandle->sock)); + isc__nmsocket_timer_restart(sock->outerhandle->sock); + } +} + +void +isc__nmsocket_tls_timer_stop(isc_nmsocket_t *sock) { + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tlssocket); + + if (sock->outerhandle != NULL) { + INSIST(VALID_NMHANDLE(sock->outerhandle)); + REQUIRE(VALID_NMSOCK(sock->outerhandle->sock)); + isc__nmsocket_timer_stop(sock->outerhandle->sock); + } +} + const char * isc__nm_tls_verify_tls_peer_result_string(const isc_nmhandle_t *handle) { isc_nmsocket_t *sock = NULL; @@ -1351,3 +1446,15 @@ tls_try_shutdown(isc_tls_t *tls, const bool force) { (void)SSL_shutdown(tls); } } + +void +isc__nmhandle_tls_set_manual_timer(isc_nmhandle_t *handle, const bool manual) { + isc_nmsocket_t *sock; + + REQUIRE(VALID_NMHANDLE(handle)); + sock = handle->sock; + REQUIRE(VALID_NMSOCK(sock)); + REQUIRE(sock->type == isc_nm_tlssocket); + + atomic_store(&sock->manual_read_timer, manual); +} From 125bfd71d3c8d0ad23477867f1e41a2392e03138 Mon Sep 17 00:00:00 2001 From: Artem Boldariev Date: Fri, 6 Sep 2024 17:28:58 +0300 Subject: [PATCH 3/7] Add isc__nm_async_run() This commit adds isc__nm_async_run() which is very similar to isc_async_run() in newer versions of BIND: it allows calling a callback asynchronously. Potentially, it can be used to replace some other async operations in other networking code, in particular the delayed I/O calls in TLS a TCP DNS transports to name a few and remove quiet a lot of code, but it we are unlikely to do that for the strictly maintenance only branch, so it is protected with DoH-related #ifdefs. It is implemented in a "universal" way mainly because doing it in the specific code requires the same amount of code and is not simpler. --- lib/isc/netmgr/netmgr-int.h | 57 ++++++++++++++++++++++++++++++++++++- lib/isc/netmgr/netmgr.c | 27 ++++++++++++++++++ 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index d3c790a857..f80fe9c1df 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -338,6 +338,7 @@ typedef enum isc__netievent_type { netievent_privilegedtask, netievent_settlsctx, + netievent_asyncrun, /* * event type values higher than this will be treated @@ -709,6 +710,42 @@ typedef struct isc__netievent__tlsctx { } #ifdef HAVE_LIBNGHTTP2 +typedef void (*isc__nm_asyncrun_cb_t)(void *); + +typedef struct isc__netievent__asyncrun { + isc__netievent_type type; + ISC_LINK(isc__netievent_t) link; + isc__nm_asyncrun_cb_t cb; + void *cbarg; +} isc__netievent__asyncrun_t; + +#define NETIEVENT_ASYNCRUN_TYPE(type) \ + typedef isc__netievent__asyncrun_t isc__netievent_##type##_t; + +#define NETIEVENT_ASYNCRUN_DECL(type) \ + isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ + isc_nm_t *nm, isc__nm_asyncrun_cb_t cb, void *cbarg); \ + void isc__nm_put_netievent_##type(isc_nm_t *nm, \ + isc__netievent_##type##_t *ievent); + +#define NETIEVENT_ASYNCRUN_DEF(type) \ + isc__netievent_##type##_t *isc__nm_get_netievent_##type( \ + isc_nm_t *nm, isc__nm_asyncrun_cb_t cb, void *cbarg) { \ + isc__netievent_##type##_t *ievent = \ + isc__nm_get_netievent(nm, netievent_##type); \ + ievent->cb = cb; \ + ievent->cbarg = cbarg; \ + \ + return (ievent); \ + } \ + \ + void isc__nm_put_netievent_##type(isc_nm_t *nm, \ + isc__netievent_##type##_t *ievent) { \ + ievent->cb = NULL; \ + ievent->cbarg = NULL; \ + isc__nm_put_netievent(nm, ievent); \ + } + typedef struct isc__netievent__http_eps { NETIEVENT__SOCKET; isc_nm_http_endpoints_t *endpoints; @@ -753,6 +790,7 @@ typedef union { isc__netievent_tlsconnect_t nitc; isc__netievent__tlsctx_t nitls; #ifdef HAVE_LIBNGHTTP2 + isc__netievent__asyncrun_t niasync; isc__netievent__http_eps_t nihttpeps; #endif /* HAVE_LIBNGHTTP2 */ } isc__netievent_storage_t; @@ -1914,7 +1952,10 @@ void isc__nm_http_set_max_streams(isc_nmsocket_t *listener, const uint32_t max_concurrent_streams); -#endif +void +isc__nm_async_asyncrun(isc__networker_t *worker, isc__netievent_t *ev0); + +#endif /* HAVE_LIBNGHTTP2 */ void isc__nm_async_settlsctx(isc__networker_t *worker, isc__netievent_t *ev0); @@ -2104,6 +2145,8 @@ NETIEVENT_SOCKET_TYPE(tlsdnscycle); NETIEVENT_SOCKET_REQ_TYPE(httpsend); NETIEVENT_SOCKET_TYPE(httpclose); NETIEVENT_SOCKET_HTTP_EPS_TYPE(httpendpoints); + +NETIEVENT_ASYNCRUN_TYPE(asyncrun); #endif /* HAVE_LIBNGHTTP2 */ NETIEVENT_SOCKET_REQ_TYPE(tcpconnect); @@ -2178,6 +2221,8 @@ NETIEVENT_SOCKET_DECL(tlsdnscycle); NETIEVENT_SOCKET_REQ_DECL(httpsend); NETIEVENT_SOCKET_DECL(httpclose); NETIEVENT_SOCKET_HTTP_EPS_DECL(httpendpoints); + +NETIEVENT_ASYNCRUN_DECL(asyncrun); #endif /* HAVE_LIBNGHTTP2 */ NETIEVENT_SOCKET_REQ_DECL(tcpconnect); @@ -2301,3 +2346,13 @@ isc__nmhandle_set_manual_timer(isc_nmhandle_t *handle, const bool manual); * Set manual read timer control mode - so that it will not get reset * automatically on read nor get started when read is initiated. */ + +#if HAVE_LIBNGHTTP2 +void +isc__nm_async_run(isc__networker_t *worker, isc__nm_asyncrun_cb_t cb, + void *cbarg); +/* + * Call the given callback asynchronously by the give network manager + * worker, pass the given argument to it. + */ +#endif /* HAVE_LIBNGHTTP2 */ diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index bc56d7a2ea..d9a5a07ef4 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -998,6 +998,8 @@ process_netievent(isc__networker_t *worker, isc__netievent_t *ievent) { NETIEVENT_CASE(httpsend); NETIEVENT_CASE(httpclose); NETIEVENT_CASE(httpendpoints); + + NETIEVENT_CASE(asyncrun); #endif NETIEVENT_CASE(settlsctx); NETIEVENT_CASE(sockstop); @@ -1116,6 +1118,8 @@ NETIEVENT_SOCKET_DEF(tlsdnsshutdown); NETIEVENT_SOCKET_REQ_DEF(httpsend); NETIEVENT_SOCKET_DEF(httpclose); NETIEVENT_SOCKET_HTTP_EPS_DEF(httpendpoints); + +NETIEVENT_ASYNCRUN_DEF(asyncrun); #endif /* HAVE_LIBNGHTTP2 */ NETIEVENT_SOCKET_REQ_DEF(tcpconnect); @@ -3983,6 +3987,29 @@ isc__nmhandle_set_manual_timer(isc_nmhandle_t *handle, const bool manual) { UNREACHABLE(); } +#if HAVE_LIBNGHTTP2 +void +isc__nm_async_run(isc__networker_t *worker, isc__nm_asyncrun_cb_t cb, + void *cbarg) { + isc__netievent__asyncrun_t *ievent = NULL; + REQUIRE(worker != NULL); + REQUIRE(cb != NULL); + + ievent = isc__nm_get_netievent_asyncrun(worker->mgr, cb, cbarg); + isc__nm_enqueue_ievent(worker, (isc__netievent_t *)ievent); +} + +void +isc__nm_async_asyncrun(isc__networker_t *worker, isc__netievent_t *ev0) { + isc__netievent_asyncrun_t *ievent = (isc__netievent_asyncrun_t *)ev0; + + UNUSED(worker); + + ievent->cb(ievent->cbarg); +} + +#endif /* HAVE_LIBNGHTTP2 */ + #ifdef NETMGR_TRACE /* * Dump all active sockets in netmgr. We output to stderr From 11a2956dce6f983d2bfcb532f5719791845b06ab Mon Sep 17 00:00:00 2001 From: Artem Boldariev Date: Thu, 4 Jul 2024 14:58:10 +0300 Subject: [PATCH 4/7] DoH: process data chunk by chunk instead of all at once Initially, our DNS-over-HTTP(S) implementation would try to process as much incoming data from the network as possible. However, that might be undesirable as we might create too many streams (each effectively backed by a ns_client_t object). That is too forgiving as it might overwhelm the server and trash its memory allocator, causing high CPU and memory usage. Instead of doing that, we resort to processing incoming data using a chunk-by-chunk processing strategy. That is, we split data into small chunks (currently 256 bytes) and process each of them asynchronously. However, we can process more than one chunk at once (up to 4 currently), given that the number of HTTP/2 streams has not increased while processing a chunk. That alone is not enough, though. In addition to the above, we should limit the number of active streams: these streams for which we have received a request and started processing it (the ones for which a read callback was called), as it is perfectly fine to have more opened streams than active ones. In the case we have reached or surpassed the limit of active streams, we stop reading AND processing the data from the remote peer. The number of active streams is effectively decreased only when responses associated with the active streams are sent to the remote peer. Overall, this strategy is very similar to the one used for other stream-based DNS transports like TCP and TLS. (cherry picked from commit 9846f395ad79bb50a5fa5ca6ab97ef904b3be35a) --- lib/isc/netmgr/http.c | 315 ++++++++++++++++++++++++++++++++---- lib/isc/netmgr/netmgr-int.h | 1 + 2 files changed, 286 insertions(+), 30 deletions(-) diff --git a/lib/isc/netmgr/http.c b/lib/isc/netmgr/http.c index 37d6b23654..43431f7bf5 100644 --- a/lib/isc/netmgr/http.c +++ b/lib/isc/netmgr/http.c @@ -85,6 +85,21 @@ #define INITIAL_DNS_MESSAGE_BUFFER_SIZE (512) +/* + * The value should be small enough to not allow a server to open too + * many streams at once. It should not be too small either because + * the incoming data will be split into too many chunks with each of + * them processed asynchronously. + */ +#define INCOMING_DATA_CHUNK_SIZE (256) + +/* + * Often processing a chunk does not change the number of streams. In + * that case we can process more than once, but we still should have a + * hard limit on that. + */ +#define INCOMING_DATA_MAX_CHUNKS_AT_ONCE (4) + typedef struct isc_nm_http_response_status { size_t code; size_t content_length; @@ -155,6 +170,15 @@ struct isc_nm_http_session { isc__nm_http_pending_callbacks_t pending_write_callbacks; isc_buffer_t *pending_write_data; + + /* + * The statistical values below are for usage on server-side + * only. They are meant to detect clients that are taking too many + * resources from the server. + */ + uint64_t received; /* How many requests have been received. */ + uint64_t submitted; /* How many responses were submitted to send */ + uint64_t processed; /* How many responses were processed. */ }; typedef enum isc_http_error_responses { @@ -177,6 +201,7 @@ typedef struct isc_http_send_req { void *cbarg; isc_buffer_t *pending_write_data; isc__nm_http_pending_callbacks_t pending_write_callbacks; + uint64_t submitted; } isc_http_send_req_t; #define HTTP_ENDPOINTS_MAGIC ISC_MAGIC('H', 'T', 'E', 'P') @@ -189,10 +214,20 @@ static bool http_send_outgoing(isc_nm_http_session_t *session, isc_nmhandle_t *httphandle, isc_nm_cb_t cb, void *cbarg); +static ssize_t +http_process_input_data(isc_nm_http_session_t *session, + isc_buffer_t *input_data); + +static inline bool +http_too_many_active_streams(isc_nm_http_session_t *session); + static void http_do_bio(isc_nm_http_session_t *session, isc_nmhandle_t *send_httphandle, isc_nm_cb_t send_cb, void *send_cbarg); +static void +http_do_bio_async(isc_nm_http_session_t *session); + static void failed_httpstream_read_cb(isc_nmsocket_t *sock, isc_result_t result, isc_nm_http_session_t *session); @@ -496,6 +531,15 @@ finish_http_session(isc_nm_http_session_t *session) { isc_nm_cancelread(session->handle); } + /* + * Free any unprocessed incoming data in order to not process + * it during indirect calls to http_do_bio() that might happen + * when calling the failed callbacks. + */ + if (session->buf != NULL) { + isc_buffer_free(&session->buf); + } + if (session->client) { client_call_failed_read_cb(ISC_R_UNEXPECTED, session); } else { @@ -656,6 +700,9 @@ on_server_stream_close_callback(int32_t stream_id, ISC_LIST_UNLINK(session->sstreams, &sock->h2, link); session->nsstreams--; + if (sock->h2.request_received) { + session->submitted++; + } /* * By making a call to isc__nmsocket_prep_destroy(), we ensure that @@ -967,6 +1014,103 @@ client_submit_request(isc_nm_http_session_t *session, http_cstream_t *stream) { return ISC_R_SUCCESS; } +static ssize_t +http_process_input_data(isc_nm_http_session_t *session, + isc_buffer_t *input_data) { + ssize_t readlen = 0; + ssize_t processed = 0; + isc_region_t chunk = { 0 }; + size_t before, after; + size_t i; + + REQUIRE(VALID_HTTP2_SESSION(session)); + REQUIRE(input_data != NULL); + + if (!http_session_active(session)) { + return 0; + } + + /* + * For clients that initiate request themselves just process + * everything. + */ + if (session->client) { + isc_buffer_remainingregion(input_data, &chunk); + if (chunk.length == 0) { + return 0; + } + + readlen = nghttp2_session_mem_recv(session->ngsession, + chunk.base, chunk.length); + + if (readlen >= 0) { + isc_buffer_forward(input_data, readlen); + } + + return readlen; + } + + /* + * If no streams are created during processing, we might process + * more than one chunk at a time. Still we should not overdo that + * to avoid processing too much data at once as such behaviour is + * known for trashing the memory allocator at times. + */ + for (before = after = session->nsstreams, i = 0; + after <= before && i < INCOMING_DATA_MAX_CHUNKS_AT_ONCE; + after = session->nsstreams, i++) + { + const uint64_t active_streams = + (session->received - session->processed); + + /* + * If there are non completed send requests in flight -let's + * not process any incoming data, as it could lead to piling + * up too much send data in send buffers. With many clients + * connected it can lead to excessive memory consumption on + * the server instance. + */ + if (session->sending > 0) { + break; + } + + /* + * If we have reached the maximum number of streams used, we + * might stop processing for now, as nghttp2 will happily + * consume as much data as possible. + */ + if (session->nsstreams >= session->max_concurrent_streams && + active_streams > 0) + { + break; + } + + if (http_too_many_active_streams(session)) { + break; + } + + isc_buffer_remainingregion(input_data, &chunk); + if (chunk.length == 0) { + break; + } + + chunk.length = ISC_MIN(chunk.length, INCOMING_DATA_CHUNK_SIZE); + + readlen = nghttp2_session_mem_recv(session->ngsession, + chunk.base, chunk.length); + + if (readlen >= 0) { + isc_buffer_forward(input_data, readlen); + processed += readlen; + } else { + isc_buffer_clear(input_data); + return readlen; + } + } + + return processed; +} + /* * Read callback from TLS socket. */ @@ -976,6 +1120,7 @@ http_readcb(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, isc_nm_http_session_t *session = (isc_nm_http_session_t *)data; isc_nm_http_session_t *tmpsess = NULL; ssize_t readlen; + isc_buffer_t input; REQUIRE(VALID_HTTP2_SESSION(session)); @@ -994,8 +1139,10 @@ http_readcb(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, goto done; } - readlen = nghttp2_session_mem_recv(session->ngsession, region->base, - region->length); + isc_buffer_init(&input, region->base, region->length); + isc_buffer_add(&input, region->length); + + readlen = http_process_input_data(session, &input); if (readlen < 0) { failed_read_cb(ISC_R_UNEXPECTED, session); goto done; @@ -1011,11 +1158,12 @@ http_readcb(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, isc_buffer_putmem(session->buf, region->base + readlen, unread_size); isc_nm_pauseread(session->handle); + http_do_bio_async(session); + } else { + /* We might have something to receive or send, do IO */ + http_do_bio(session, NULL, NULL, NULL); } - /* We might have something to receive or send, do IO */ - http_do_bio(session, NULL, NULL, NULL); - done: isc__nm_httpsession_detach(&tmpsess); } @@ -1053,14 +1201,18 @@ http_writecb(isc_nmhandle_t *handle, isc_result_t result, void *arg) { } isc_buffer_free(&req->pending_write_data); + session->processed += req->submitted; isc_mem_put(session->mctx, req, sizeof(*req)); session->sending--; - http_do_bio(session, NULL, NULL, NULL); - isc_nmhandle_detach(&transphandle); - if (result != ISC_R_SUCCESS && session->sending == 0) { + + if (result == ISC_R_SUCCESS) { + http_do_bio(session, NULL, NULL, NULL); + } else { finish_http_session(session); } + isc_nmhandle_detach(&transphandle); + isc__nm_httpsession_detach(&session); } @@ -1206,7 +1358,9 @@ http_send_outgoing(isc_nm_http_session_t *session, isc_nmhandle_t *httphandle, *send = (isc_http_send_req_t){ .pending_write_data = session->pending_write_data, .cb = cb, - .cbarg = cbarg }; + .cbarg = cbarg, + .submitted = session->submitted }; + session->submitted = 0; session->pending_write_data = NULL; move_pending_send_callbacks(session, send); @@ -1227,6 +1381,27 @@ nothing_to_send: return false; } +static inline bool +http_too_many_active_streams(isc_nm_http_session_t *session) { + const uint64_t active_streams = session->received - session->processed; + const uint64_t max_active_streams = ISC_MIN( + STREAM_CLIENTS_PER_CONN, session->max_concurrent_streams); + + if (session->client) { + return false; + } + + /* + * Do not process incoming data if there are too many active DNS + * clients (streams) per connection. + */ + if (active_streams >= max_active_streams) { + return true; + } + + return false; +} + static void http_do_bio(isc_nm_http_session_t *session, isc_nmhandle_t *send_httphandle, isc_nm_cb_t send_cb, void *send_cbarg) { @@ -1242,11 +1417,21 @@ http_do_bio(isc_nm_http_session_t *session, isc_nmhandle_t *send_httphandle, finish_http_session(session); } return; - } else if (nghttp2_session_want_read(session->ngsession) == 0 && - nghttp2_session_want_write(session->ngsession) == 0 && - session->pending_write_data == NULL) - { - session->closing = true; + } + + if (send_cb != NULL) { + INSIST(VALID_NMHANDLE(send_httphandle)); + (void)http_send_outgoing(session, send_httphandle, send_cb, + send_cbarg); + return; + } + + INSIST(send_httphandle == NULL); + INSIST(send_cb == NULL); + INSIST(send_cbarg == NULL); + + if (session->pending_write_data != NULL && session->sending == 0) { + (void)http_send_outgoing(session, NULL, NULL, NULL); return; } @@ -1259,18 +1444,49 @@ http_do_bio(isc_nm_http_session_t *session, isc_nmhandle_t *send_httphandle, size_t remaining = isc_buffer_remaininglength(session->buf); /* Leftover data in the buffer, use it */ - size_t readlen = nghttp2_session_mem_recv( - session->ngsession, - isc_buffer_current(session->buf), remaining); + size_t remaining_after = 0; + ssize_t readlen = 0; + isc_nm_http_session_t *tmpsess = NULL; - if (readlen == remaining) { + /* + * Let's ensure that HTTP/2 session and its associated + * data will not go "out of scope" too early. + */ + isc__nm_httpsession_attach(session, &tmpsess); + + readlen = http_process_input_data(session, + session->buf); + + remaining_after = + isc_buffer_remaininglength(session->buf); + + if (readlen < 0) { + failed_read_cb(ISC_R_UNEXPECTED, session); + } else if ((size_t)readlen == remaining) { isc_buffer_free(&session->buf); + http_do_bio(session, NULL, NULL, NULL); + } else if (remaining_after > 0 && + remaining_after < remaining) + { + /* + * We have processed a part of the data, now + * let's delay processing of whatever is left + * here. We want it to be an async operation so + * that we will: + * + * a) let other things run; + * b) have finer grained control over how much + * data is processed at once, because nghttp2 + * would happily consume as much data we pass to + * it and that could overwhelm the server. + */ + http_do_bio_async(session); } else { - isc_buffer_forward(session->buf, readlen); + (void)http_send_outgoing(session, NULL, NULL, + NULL); } - http_do_bio(session, send_httphandle, send_cb, - send_cbarg); + isc__nm_httpsession_detach(&tmpsess); return; } else { /* Resume reading, it's idempotent, wait for more */ @@ -1281,20 +1497,55 @@ http_do_bio(isc_nm_http_session_t *session, isc_nmhandle_t *send_httphandle, isc_nm_pauseread(session->handle); } - if (send_cb != NULL) { - INSIST(VALID_NMHANDLE(send_httphandle)); - (void)http_send_outgoing(session, send_httphandle, send_cb, - send_cbarg); - } else { - INSIST(send_httphandle == NULL); - INSIST(send_cb == NULL); - INSIST(send_cbarg == NULL); - (void)http_send_outgoing(session, NULL, NULL, NULL); + /* we might have some data to send after processing */ + (void)http_send_outgoing(session, NULL, NULL, NULL); + + if (nghttp2_session_want_read(session->ngsession) == 0 && + nghttp2_session_want_write(session->ngsession) == 0 && + session->pending_write_data == NULL) + { + session->closing = true; + isc_nm_pauseread(session->handle); + if (session->sending == 0) { + finish_http_session(session); + } } return; } +static void +http_do_bio_async_cb(void *arg) { + isc_nm_http_session_t *session = arg; + + REQUIRE(VALID_HTTP2_SESSION(session)); + + if (session->handle != NULL && + !isc__nmsocket_closing(session->handle->sock)) + { + http_do_bio(session, NULL, NULL, NULL); + } + + isc__nm_httpsession_detach(&session); +} + +static void +http_do_bio_async(isc_nm_http_session_t *session) { + isc_nm_http_session_t *tmpsess = NULL; + + REQUIRE(VALID_HTTP2_SESSION(session)); + + if (session->handle == NULL || + isc__nmsocket_closing(session->handle->sock)) + { + return; + } + isc__nm_httpsession_attach(session, &tmpsess); + isc__nm_async_run( + &session->handle->sock->mgr->workers[session->handle->sock->tid], + http_do_bio_async_cb, tmpsess); +} + static isc_result_t get_http_cstream(isc_nmsocket_t *sock, http_cstream_t **streamp) { http_cstream_t *cstream = sock->h2.connect.cstream; @@ -2039,6 +2290,10 @@ server_call_cb(isc_nmsocket_t *socket, const isc_result_t result, if (result != ISC_R_SUCCESS) { data = NULL; } + if (result == ISC_R_SUCCESS) { + socket->h2.request_received = true; + socket->h2.session->received++; + } socket->h2.cb(handle, result, data, socket->h2.cbarg); isc_nmhandle_detach(&handle); } diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index f80fe9c1df..4b9b587441 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -983,6 +983,7 @@ typedef struct isc_nmsocket_h2 { isc_nm_http_endpoints_t *peer_endpoints; + bool request_received; bool response_submitted; struct { char *uri; From ee42514be2cc56f5d31fdb9d0c6a838e0d372654 Mon Sep 17 00:00:00 2001 From: Artem Boldariev Date: Mon, 8 Jul 2024 23:27:29 +0300 Subject: [PATCH 5/7] DoH: floodding clients detection This commit adds logic to make code better protected against clients that send valid HTTP/2 data that is useless from a DNS server perspective. Firstly, it adds logic that protects against clients who send too little useful (=DNS) data. We achieve that by adding a check that eventually detects such clients with a nonfavorable useful to processed data ratio after the initial grace period. The grace period is limited to processing 128 KiB of data, which should be enough for sending the largest possible DNS message in a GET request and then some. This is the main safety belt that would detect even flooding clients that initially behave well in order to fool the checks server. Secondly, in addition to the above, we introduce additional checks to detect outright misbehaving clients earlier: The code will treat clients that open too many streams (50) without sending any data for processing as flooding ones; The clients that managed to send 1.5 KiB of data without opening a single stream or submitting at least some DNS data will be treated as flooding ones. Of course, the behaviour described above is nothing else but heuristical checks, so they can never be perfect. At the same time, they should be reasonable enough not to drop any valid clients, realatively easy to implement, and have negligible computational overhead. (cherry picked from commit 3425e4b1d04746520931e93ac7ef5979fd6b54fd) --- lib/isc/netmgr/http.c | 116 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/lib/isc/netmgr/http.c b/lib/isc/netmgr/http.c index 43431f7bf5..f48841856a 100644 --- a/lib/isc/netmgr/http.c +++ b/lib/isc/netmgr/http.c @@ -100,6 +100,22 @@ */ #define INCOMING_DATA_MAX_CHUNKS_AT_ONCE (4) +/* + * These constants define the grace period to help detect flooding clients. + * + * The first one defines how much data can be processed before opening + * a first stream and received at least some useful (=DNS) data. + * + * The second one defines how much data from a client we read before + * trying to drop a clients who sends not enough useful data. + * + * The third constant defines how many streams we agree to process + * before checking if there was at least one DNS request received. + */ +#define INCOMING_DATA_INITIAL_STREAM_SIZE (1536) +#define INCOMING_DATA_GRACE_SIZE (MAX_ALLOWED_DATA_IN_HEADERS) +#define MAX_STREAMS_BEFORE_FIRST_REQUEST (50) + typedef struct isc_nm_http_response_status { size_t code; size_t content_length; @@ -158,6 +174,7 @@ struct isc_nm_http_session { ISC_LIST(http_cstream_t) cstreams; ISC_LIST(isc_nmsocket_h2_t) sstreams; size_t nsstreams; + uint64_t total_opened_sstreams; isc_nmhandle_t *handle; isc_nmhandle_t *client_httphandle; @@ -179,6 +196,9 @@ struct isc_nm_http_session { uint64_t received; /* How many requests have been received. */ uint64_t submitted; /* How many responses were submitted to send */ uint64_t processed; /* How many responses were processed. */ + + uint64_t processed_incoming_data; + uint64_t processed_useful_data; /* DNS data */ }; typedef enum isc_http_error_responses { @@ -214,6 +234,12 @@ static bool http_send_outgoing(isc_nm_http_session_t *session, isc_nmhandle_t *httphandle, isc_nm_cb_t cb, void *cbarg); +static void +http_log_flooding_peer(isc_nm_http_session_t *session); + +static bool +http_is_flooding_peer(isc_nm_http_session_t *session); + static ssize_t http_process_input_data(isc_nm_http_session_t *session, isc_buffer_t *input_data); @@ -611,6 +637,7 @@ on_server_data_chunk_recv_callback(int32_t stream_id, const uint8_t *data, if (new_bufsize <= MAX_DNS_MESSAGE_SIZE && new_bufsize <= h2->content_length) { + session->processed_useful_data += len; isc_buffer_putmem(&h2->rbuf, data, len); break; } @@ -1045,6 +1072,7 @@ http_process_input_data(isc_nm_http_session_t *session, if (readlen >= 0) { isc_buffer_forward(input_data, readlen); + session->processed_incoming_data += readlen; } return readlen; @@ -1101,6 +1129,7 @@ http_process_input_data(isc_nm_http_session_t *session, if (readlen >= 0) { isc_buffer_forward(input_data, readlen); + session->processed_incoming_data += readlen; processed += readlen; } else { isc_buffer_clear(input_data); @@ -1111,6 +1140,83 @@ http_process_input_data(isc_nm_http_session_t *session, return processed; } +static void +http_log_flooding_peer(isc_nm_http_session_t *session) { + const int log_level = ISC_LOG_DEBUG(1); + if (session->handle != NULL && isc_log_wouldlog(isc_lctx, log_level)) { + char client_sabuf[ISC_SOCKADDR_FORMATSIZE]; + char local_sabuf[ISC_SOCKADDR_FORMATSIZE]; + + isc_sockaddr_format(&session->handle->sock->peer, client_sabuf, + sizeof(client_sabuf)); + isc_sockaddr_format(&session->handle->sock->iface, local_sabuf, + sizeof(local_sabuf)); + isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_NETMGR, log_level, + "Dropping a flooding HTTP/2 peer " + "%s (on %s) - processed: %" PRIu64 + " bytes, of them useful: %" PRIu64 "", + client_sabuf, local_sabuf, + session->processed_incoming_data, + session->processed_useful_data); + } +} + +static bool +http_is_flooding_peer(isc_nm_http_session_t *session) { + if (session->client) { + return false; + } + + /* + * A flooding client can try to open a lot of streams before + * submitting a request. Let's drop such clients. + */ + if (session->received == 0 && + session->total_opened_sstreams > MAX_STREAMS_BEFORE_FIRST_REQUEST) + { + return true; + } + + /* + * We have processed enough data to open at least one stream and + * get some useful data. + */ + if (session->processed_incoming_data > + INCOMING_DATA_INITIAL_STREAM_SIZE && + (session->total_opened_sstreams == 0 || + session->processed_useful_data == 0)) + { + return true; + } + + if (session->processed_incoming_data < INCOMING_DATA_GRACE_SIZE) { + return false; + } + + /* + * The overhead of DoH per DNS message can be minimum 160-180 + * bytes. We should allow more for extra information that can be + * included in headers, so let's use 256 bytes. Minimum DNS + * message size is 12 bytes. So, (256+12)/12=22. Even that can be + * too restricting for some edge cases, but should be good enough + * for any practical purposes. Not to mention that HTTP/2 may + * include legitimate data that is completely useless for DNS + * purposes... + * + * Anyway, at that point we should have processed enough requests + * for such clients (if any). + */ + if (session->processed_useful_data == 0 || + (session->processed_incoming_data / + session->processed_useful_data) > 22) + { + return true; + } + + return false; +} + /* * Read callback from TLS socket. */ @@ -1146,6 +1252,10 @@ http_readcb(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, if (readlen < 0) { failed_read_cb(ISC_R_UNEXPECTED, session); goto done; + } else if (http_is_flooding_peer(session)) { + http_log_flooding_peer(session); + failed_read_cb(ISC_R_RANGE, session); + goto done; } if ((size_t)readlen < region->length) { @@ -1462,6 +1572,9 @@ http_do_bio(isc_nm_http_session_t *session, isc_nmhandle_t *send_httphandle, if (readlen < 0) { failed_read_cb(ISC_R_UNEXPECTED, session); + } else if (http_is_flooding_peer(session)) { + http_log_flooding_peer(session); + failed_read_cb(ISC_R_RANGE, session); } else if ((size_t)readlen == remaining) { isc_buffer_free(&session->buf); http_do_bio(session, NULL, NULL, NULL); @@ -1921,6 +2034,7 @@ server_on_begin_headers_callback(nghttp2_session *ngsession, socket->tid = session->handle->sock->tid; ISC_LINK_INIT(&socket->h2, link); ISC_LIST_APPEND(session->sstreams, &socket->h2, link); + session->total_opened_sstreams++; nghttp2_session_set_stream_user_data(ngsession, frame->hd.stream_id, socket); @@ -1981,6 +2095,8 @@ server_handle_path_header(isc_nmsocket_t *socket, const uint8_t *value, socket->mgr->mctx, dns_value, dns_value_len, &socket->h2.query_data_len); + socket->h2.session->processed_useful_data += + dns_value_len; } else { socket->h2.query_too_large = true; return ISC_HTTP_ERROR_PAYLOAD_TOO_LARGE; From 796708775d178adc5256ce03956d134c9fd38a33 Mon Sep 17 00:00:00 2001 From: Artem Boldariev Date: Tue, 9 Jul 2024 23:23:11 +0300 Subject: [PATCH 6/7] DoH: introduce manual read timer control This commit introduces manual read timer control as used by StreamDNS and its underlying transports. Before that, DoH code would rely on the timer control provided by TCP, which would reset the timer any time some data arrived. Now, the timer is restarted only when a full DNS message is processed in line with other DNS transports. That change is required because we should not stop the timer when reading from the network is paused due to throttling. We need a way to drop timed-out clients, particularly those who refuse to read the data we send. (cherry picked from commit 609a41517b1631c320876a41c43c68c9a0ee0f9f) --- lib/isc/netmgr/http.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/isc/netmgr/http.c b/lib/isc/netmgr/http.c index f48841856a..d3e84b6602 100644 --- a/lib/isc/netmgr/http.c +++ b/lib/isc/netmgr/http.c @@ -555,6 +555,7 @@ finish_http_session(isc_nm_http_session_t *session) { if (!session->closed) { session->closed = true; isc_nm_cancelread(session->handle); + isc__nmsocket_timer_stop(session->handle->sock); } /* @@ -686,6 +687,9 @@ call_unlink_cstream_readcb(http_cstream_t *cstream, isc_buffer_usedregion(cstream->rbuf, &read_data); cstream->read_cb(session->client_httphandle, result, &read_data, cstream->read_cbarg); + if (result == ISC_R_SUCCESS) { + isc__nmsocket_timer_restart(session->handle->sock); + } put_http_cstream(session->mctx, cstream); } @@ -1548,6 +1552,7 @@ http_do_bio(isc_nm_http_session_t *session, isc_nmhandle_t *send_httphandle, if (nghttp2_session_want_read(session->ngsession) != 0) { if (!session->reading) { /* We have not yet started reading from this handle */ + isc__nmsocket_timer_start(session->handle->sock); isc_nm_read(session->handle, http_readcb, session); session->reading = true; } else if (session->buf != NULL) { @@ -1604,6 +1609,7 @@ http_do_bio(isc_nm_http_session_t *session, isc_nmhandle_t *send_httphandle, } else { /* Resume reading, it's idempotent, wait for more */ isc_nm_resumeread(session->handle); + isc__nmsocket_timer_start(session->handle->sock); } } else { /* We don't want more data, stop reading for now */ @@ -1788,6 +1794,7 @@ transport_connect_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { } http_transpost_tcp_nodelay(handle); + isc__nmhandle_set_manual_timer(session->handle, true); http_call_connect_cb(http_sock, session, result); @@ -2405,6 +2412,8 @@ server_call_cb(isc_nmsocket_t *socket, const isc_result_t result, handle = isc__nmhandle_get(socket, NULL, NULL); if (result != ISC_R_SUCCESS) { data = NULL; + } else if (socket->h2.session->handle != NULL) { + isc__nmsocket_timer_restart(socket->h2.session->handle->sock); } if (result == ISC_R_SUCCESS) { socket->h2.request_received = true; @@ -2846,6 +2855,8 @@ httplisten_acceptcb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { isc__nmsocket_attach(httplistensock, &session->serversocket); server_send_connection_header(session); + isc__nmhandle_set_manual_timer(session->handle, true); + /* TODO H2 */ http_do_bio(session, NULL, NULL, NULL); return ISC_R_SUCCESS; From 550b692343e398e0debe18ddb5908b4ccb183b91 Mon Sep 17 00:00:00 2001 From: Artem Boldariev Date: Thu, 18 Jul 2024 20:21:53 +0300 Subject: [PATCH 7/7] DoH: reduce excessive bad request logging We started using isc_nm_bad_request() more actively throughout codebase. In the case of HTTP/2 it can lead to a large count of useless "Bad Request" messages in the BIND log, as often we attempt to send such request over effectively finished HTTP/2 sessions. This commit fixes that. (cherry picked from commit 937b5f8349a6a5e15af254a53a659e39c7c1d179) --- lib/isc/netmgr/http.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/isc/netmgr/http.c b/lib/isc/netmgr/http.c index d3e84b6602..a00f4a7229 100644 --- a/lib/isc/netmgr/http.c +++ b/lib/isc/netmgr/http.c @@ -2434,6 +2434,12 @@ isc__nm_http_bad_request(isc_nmhandle_t *handle) { REQUIRE(!atomic_load(&sock->client)); REQUIRE(VALID_HTTP2_SESSION(sock->h2.session)); + if (sock->h2.response_submitted || + !http_session_active(sock->h2.session)) + { + return; + } + (void)server_send_error_response(ISC_HTTP_ERROR_BAD_REQUEST, sock->h2.session->ngsession, sock); }