From 282c4709b87ca370e6f00d4be39551064e2f19be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Thu, 14 Sep 2023 18:01:39 +0200 Subject: [PATCH 1/5] Rewrite the QID lookup table to cds_lfht Looking up unique message ID in the dns_dispatch has been using custom hash tables. Rewrite the custom hashtable to use cds_lfht API, removing one extra lock in the cold-cache resolver hot path. --- lib/dns/dispatch.c | 315 +++++++++++++-------------------- lib/isc/include/isc/sockaddr.h | 1 + 2 files changed, 122 insertions(+), 194 deletions(-) diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index d79614522e..01562839bf 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -20,6 +20,8 @@ #include #include +#include +#include #include #include #include @@ -32,6 +34,7 @@ #include #include #include +#include #include #include @@ -44,14 +47,6 @@ typedef ISC_LIST(dns_dispentry_t) dns_displist_t; -typedef struct dns_qid { - unsigned int magic; - isc_mutex_t lock; - unsigned int qid_nbuckets; /*%< hash table size */ - unsigned int qid_increment; /*%< id increment on collision */ - dns_displist_t *qid_table; /*%< the table itself */ -} dns_qid_t; - struct dns_dispatchmgr { /* Unlocked. */ unsigned int magic; @@ -65,7 +60,7 @@ struct dns_dispatchmgr { isc_mutex_t lock; ISC_LIST(dns_dispatch_t) list; - dns_qid_t *qid; + struct cds_lfht *qids; in_port_t *v4ports; /*%< available ports for IPv4 */ unsigned int nv4ports; /*%< # of available ports for IPv4 */ @@ -83,13 +78,13 @@ typedef enum { struct dns_dispentry { unsigned int magic; isc_refcount_t references; + isc_mem_t *mctx; dns_dispatch_t *disp; isc_loop_t *loop; isc_nmhandle_t *handle; /*%< netmgr handle for UDP connection */ dns_dispatchstate_t state; dns_transport_t *transport; isc_tlsctx_cache_t *tlsctx_cache; - unsigned int bucket; unsigned int retries; unsigned int timeout; isc_time_t start; @@ -103,10 +98,12 @@ struct dns_dispentry { void *arg; bool reading; isc_result_t result; - ISC_LINK(dns_dispentry_t) link; ISC_LINK(dns_dispentry_t) alink; ISC_LINK(dns_dispentry_t) plink; ISC_LINK(dns_dispentry_t) rlink; + + struct cds_lfht_node ht_node; + struct rcu_head rcu_head; }; struct dns_dispatch { @@ -138,9 +135,6 @@ struct dns_dispatch { unsigned int timedout; }; -#define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ') -#define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC) - #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p') #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC) @@ -153,19 +147,6 @@ struct dns_dispatch { #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r') #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC) -/*% - * Number of buckets in the QID hash table, and the value to - * increment the QID by when attempting to avoid collisions. - * The number of buckets should be prime, and the increment - * should be the next higher prime number. - */ -#ifndef DNS_QID_BUCKETS -#define DNS_QID_BUCKETS 16411 -#endif /* ifndef DNS_QID_BUCKETS */ -#ifndef DNS_QID_INCREMENT -#define DNS_QID_INCREMENT 16433 -#endif /* ifndef DNS_QID_INCREMENT */ - #if DNS_DISPATCH_TRACE #define dns_dispentry_ref(ptr) \ dns_dispentry__ref(ptr, __func__, __FILE__, __LINE__) @@ -180,33 +161,35 @@ ISC_REFCOUNT_TRACE_DECL(dns_dispentry); ISC_REFCOUNT_DECL(dns_dispentry); #endif +/* + * The number of attempts to find unique combination + */ +#define QID_MAX_TRIES 64 + +/* + * Initial and minimum QID table sizes. + */ +#define QIDS_INIT_SIZE (1 << 4) /* Must be power of 2 */ +#define QIDS_MIN_SIZE (1 << 4) /* Must be power of 2 */ + /* * Statics. */ static void dispatchmgr_destroy(dns_dispatchmgr_t *mgr); -static dns_dispentry_t * -entry_search(dns_qid_t *, const isc_sockaddr_t *, dns_messageid_t, in_port_t, - unsigned int); static void udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, void *arg); static void tcp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, void *arg); -static uint32_t -dns_hash(dns_qid_t *, const isc_sockaddr_t *, dns_messageid_t, in_port_t); static void dispentry_cancel(dns_dispentry_t *resp, isc_result_t result); static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, dns_dispatch_t **dispp); static void -qid_allocate(dns_dispatchmgr_t *mgr, dns_qid_t **qidp); -static void -qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp); -static void udp_startrecv(isc_nmhandle_t *handle, dns_dispentry_t *resp); static void udp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp); @@ -357,23 +340,6 @@ dispentry_log(dns_dispentry_t *resp, int level, const char *fmt, ...) { socktype2str(resp), resp, msgbuf); } -/* - * Return a hash of the destination and message id. - */ -static uint32_t -dns_hash(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id, - in_port_t port) { - uint32_t ret; - - ret = isc_sockaddr_hash(dest, true); - ret ^= ((uint32_t)id << 16) | port; - ret %= qid->qid_nbuckets; - - INSIST(ret < qid->qid_nbuckets); - - return (ret); -} - /*% * Choose a random port number for a dispatch entry. * The caller must hold the disp->lock @@ -414,31 +380,31 @@ setup_socket(dns_dispatch_t *disp, dns_dispentry_t *resp, return (ISC_R_SUCCESS); } -/* - * Find an entry for query ID 'id', socket address 'dest', and port number - * 'port'. - * Return NULL if no such entry exists. - */ -static dns_dispentry_t * -entry_search(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id, - in_port_t port, unsigned int bucket) { - dns_dispentry_t *res = NULL; +static uint32_t +qid_hash(const dns_dispentry_t *dispentry) { + /* + * TODO(OS): Add incremental isc_sockaddr_hash() function and then use + * isc_hash32 API + */ + uint32_t hashval = isc_sockaddr_hash(&dispentry->peer, true); + return (hashval ^ (((uint32_t)dispentry->id << 16) | dispentry->port)); +} - REQUIRE(VALID_QID(qid)); - REQUIRE(bucket < qid->qid_nbuckets); +static int +qid_match(struct cds_lfht_node *node, const void *key0) { + const dns_dispentry_t *dispentry = + caa_container_of(node, dns_dispentry_t, ht_node); + const dns_dispentry_t *key = key0; - res = ISC_LIST_HEAD(qid->qid_table[bucket]); + return (dispentry->id == key->id && dispentry->port == key->port && + isc_sockaddr_equal(&dispentry->peer, &key->peer)); +} - while (res != NULL) { - if (res->id == id && isc_sockaddr_equal(dest, &res->peer) && - res->port == port) - { - return (res); - } - res = ISC_LIST_NEXT(res, link); - } - - return (NULL); +static void +dispentry_destroy_rcu(struct rcu_head *rcu_head) { + dns_dispentry_t *resp = caa_container_of(rcu_head, dns_dispentry_t, + rcu_head); + isc_mem_putanddetach(&resp->mctx, resp, sizeof(*resp)); } static void @@ -460,7 +426,6 @@ dispentry_destroy(dns_dispentry_t *resp) { resp->magic = 0; - INSIST(!ISC_LINK_LINKED(resp, link)); INSIST(!ISC_LINK_LINKED(resp, plink)); INSIST(!ISC_LINK_LINKED(resp, alink)); INSIST(!ISC_LINK_LINKED(resp, rlink)); @@ -481,9 +446,9 @@ dispentry_destroy(dns_dispentry_t *resp) { dns_transport_detach(&resp->transport); } - isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp)); - dns_dispatch_detach(&disp); /* DISPATCH001 */ + + call_rcu(&resp->rcu_head, dispentry_destroy_rcu); } #if DNS_DISPATCH_TRACE @@ -677,15 +642,16 @@ tcp_recv_oldest(dns_dispatch_t *disp, dns_dispentry_t **respp) { return (ISC_R_NOTFOUND); } +/* + * NOTE: Must be RCU read locked! + */ static isc_result_t -tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, dns_qid_t *qid, +tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, isc_sockaddr_t *peer, dns_dispentry_t **respp) { isc_buffer_t source; dns_messageid_t id; unsigned int flags; - unsigned int bucket; isc_result_t result = ISC_R_SUCCESS; - dns_dispentry_t *resp = NULL; dispatch_log(disp, LVL(90), "TCP read success, length == %d, addr = %p", region->length, region->base); @@ -718,26 +684,32 @@ tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, dns_qid_t *qid, * We have a valid response; find the associated dispentry object * and call the caller back. */ - bucket = dns_hash(qid, peer, id, disp->localport); - LOCK(&qid->lock); - resp = entry_search(qid, peer, id, disp->localport, bucket); + dns_dispentry_t key = { + .id = id, + .peer = *peer, + .port = disp->localport, + }; + struct cds_lfht_iter iter; + cds_lfht_lookup(disp->mgr->qids, qid_hash(&key), qid_match, &key, + &iter); + + dns_dispentry_t *resp = cds_lfht_entry(cds_lfht_iter_get_node(&iter), + dns_dispentry_t, ht_node); if (resp != NULL) { - if (resp->reading) { - *respp = resp; - } else { + if (!resp->reading) { /* * We already got a message for this QID and weren't * expecting any more. */ result = ISC_R_UNEXPECTED; + } else { + *respp = resp; } } else { - /* We are not expecting this DNS message */ result = ISC_R_NOTFOUND; } - dispatch_log(disp, LVL(90), "search for response in bucket %d: %s", - bucket, isc_result_totext(result)); - UNLOCK(&qid->lock); + dispatch_log(disp, LVL(90), "search for response in hashtable: %s", + isc_result_totext(result)); return (result); } @@ -801,7 +773,6 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, void *arg) { dns_dispatch_t *disp = (dns_dispatch_t *)arg; dns_dispentry_t *resp = NULL; - dns_qid_t *qid = NULL; char buf[ISC_SOCKADDR_FORMATSIZE]; isc_sockaddr_t peer; dns_displist_t resps = ISC_LIST_INITIALIZER; @@ -810,8 +781,6 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, REQUIRE(VALID_DISPATCH(disp)); - qid = disp->mgr->qid; - LOCK(&disp->lock); INSIST(disp->reading); disp->reading = false; @@ -821,6 +790,7 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, peer = isc_nmhandle_peeraddr(handle); + rcu_read_lock(); /* * Phase 1: Process timeout and success. */ @@ -833,7 +803,7 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, break; case ISC_R_SUCCESS: /* We got an answer */ - result = tcp_recv_success(disp, region, qid, &peer, &resp); + result = tcp_recv_success(disp, region, &peer, &resp); break; default: @@ -917,6 +887,7 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, } UNLOCK(&disp->lock); + rcu_read_unlock(); /* * Phase 6: Process all scheduled callbacks. @@ -1019,15 +990,18 @@ dns_dispatchmgr_create(isc_mem_t *mctx, isc_nm_t *nm, ISC_LIST_INIT(mgr->list); - create_default_portset(mctx, AF_INET, &v4portset); - create_default_portset(mctx, AF_INET6, &v6portset); + create_default_portset(mgr->mctx, AF_INET, &v4portset); + create_default_portset(mgr->mctx, AF_INET6, &v6portset); setavailports(mgr, v4portset, v6portset); - isc_portset_destroy(mctx, &v4portset); - isc_portset_destroy(mctx, &v6portset); + isc_portset_destroy(mgr->mctx, &v4portset); + isc_portset_destroy(mgr->mctx, &v6portset); + + mgr->qids = cds_lfht_new(QIDS_INIT_SIZE, QIDS_MIN_SIZE, 0, + CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, + NULL); - qid_allocate(mgr, &mgr->qid); mgr->magic = DNS_DISPATCHMGR_MAGIC; *mgrp = mgr; @@ -1071,7 +1045,7 @@ dispatchmgr_destroy(dns_dispatchmgr_t *mgr) { mgr->magic = 0; isc_mutex_destroy(&mgr->lock); - qid_destroy(mgr->mctx, &mgr->qid); + RUNTIME_CHECK(!cds_lfht_destroy(mgr->qids, NULL)); if (mgr->blackhole != NULL) { dns_acl_detach(&mgr->blackhole); @@ -1104,45 +1078,6 @@ dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) { isc_stats_attach(stats, &mgr->stats); } -static void -qid_allocate(dns_dispatchmgr_t *mgr, dns_qid_t **qidp) { - dns_qid_t *qid = NULL; - unsigned int i; - - REQUIRE(qidp != NULL && *qidp == NULL); - - qid = isc_mem_get(mgr->mctx, sizeof(*qid)); - *qid = (dns_qid_t){ .qid_nbuckets = DNS_QID_BUCKETS, - .qid_increment = DNS_QID_INCREMENT }; - - qid->qid_table = isc_mem_cget(mgr->mctx, DNS_QID_BUCKETS, - sizeof(dns_displist_t)); - for (i = 0; i < qid->qid_nbuckets; i++) { - ISC_LIST_INIT(qid->qid_table[i]); - } - - isc_mutex_init(&qid->lock); - qid->magic = QID_MAGIC; - *qidp = qid; -} - -static void -qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) { - dns_qid_t *qid = NULL; - - REQUIRE(qidp != NULL); - qid = *qidp; - *qidp = NULL; - - REQUIRE(VALID_QID(qid)); - - qid->magic = 0; - isc_mem_cput(mctx, qid->qid_table, qid->qid_nbuckets, - sizeof(dns_displist_t)); - isc_mutex_destroy(&qid->lock); - isc_mem_put(mctx, qid, sizeof(*qid)); -} - /* * Allocate and set important limits. */ @@ -1451,12 +1386,9 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, dispatch_cb_t response, void *arg, dns_messageid_t *idp, dns_dispentry_t **respp) { dns_dispentry_t *resp = NULL; - dns_qid_t *qid = NULL; in_port_t localport; - dns_messageid_t id; - unsigned int bucket; - bool ok = false; int i = 0; + isc_result_t result = ISC_R_UNSET; REQUIRE(VALID_DISPATCH(disp)); REQUIRE(dest != NULL); @@ -1476,8 +1408,6 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, return (ISC_R_CANCELED); } - qid = disp->mgr->qid; - localport = isc_sockaddr_getport(&disp->local); resp = isc_mem_get(disp->mgr->mctx, sizeof(*resp)); @@ -1490,7 +1420,6 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, .sent = sent, .response = response, .arg = arg, - .link = ISC_LINK_INITIALIZER, .alink = ISC_LINK_INITIALIZER, .plink = ISC_LINK_INITIALIZER, .rlink = ISC_LINK_INITIALIZER, @@ -1504,8 +1433,7 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, isc_refcount_init(&resp->references, 1); /* DISPENTRY000 */ if (disp->socktype == isc_socktype_udp) { - isc_result_t result = setup_socket(disp, resp, dest, - &localport); + result = setup_socket(disp, resp, dest, &localport); if (result != ISC_R_SUCCESS) { isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp)); UNLOCK(&disp->lock); @@ -1514,47 +1442,43 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, } } - /* - * Try somewhat hard to find a unique ID. Start with - * a random number unless DNS_DISPATCHOPT_FIXEDID is set, - * in which case we start with the ID passed in via *idp. - */ - if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) { - id = *idp; - } else { - id = (dns_messageid_t)isc_random16(); - } - - LOCK(&qid->lock); + rcu_read_lock(); do { - dns_dispentry_t *entry = NULL; - bucket = dns_hash(qid, dest, id, localport); - entry = entry_search(qid, dest, id, localport, bucket); - if (entry == NULL) { - ok = true; + /* + * Try somewhat hard to find a unique ID. Start with + * a random number unless DNS_DISPATCHOPT_FIXEDID is set, + * in which case we start with the ID passed in via *idp. + */ + resp->id = ((options & DNS_DISPATCHOPT_FIXEDID) != 0) + ? *idp + : (dns_messageid_t)isc_random16(); + + struct cds_lfht_node *node = + cds_lfht_add_unique(disp->mgr->qids, qid_hash(resp), + qid_match, resp, &resp->ht_node); + + if (node != &resp->ht_node) { + result = ISC_R_EXISTS; + if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) { + /* + * When using fixed ID, we either must + * use it or fail + */ + goto fail; + } + } else { + result = ISC_R_SUCCESS; break; } - if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) { - /* When using fixed ID, we either must use it or fail */ - break; - } - id += qid->qid_increment; - id &= 0x0000ffff; - } while (i++ < 64); - - if (ok) { - resp->id = id; - resp->bucket = bucket; - ISC_LIST_APPEND(qid->qid_table[bucket], resp, link); - } - UNLOCK(&qid->lock); - - if (!ok) { + } while (i++ < QID_MAX_TRIES); +fail: + if (result != ISC_R_SUCCESS) { isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp)); - UNLOCK(&disp->lock); return (ISC_R_NOMORE); } + isc_mem_attach(disp->mgr->mctx, &resp->mctx); + if (transport != NULL) { dns_transport_attach(transport, &resp->transport); } @@ -1571,9 +1495,10 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, ? dns_resstatscounter_disprequdp : dns_resstatscounter_dispreqtcp); + rcu_read_unlock(); UNLOCK(&disp->lock); - *idp = id; + *idp = resp->id; *respp = resp; return (ISC_R_SUCCESS); @@ -1612,6 +1537,9 @@ dns_dispatch_getnext(dns_dispentry_t *resp) { return (result); } +/* + * NOTE: Must be RCU read locked! + */ static void udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { REQUIRE(VALID_RESPONSE(resp)); @@ -1619,8 +1547,6 @@ udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { REQUIRE(VALID_DISPATCHMGR(resp->disp->mgr)); dns_dispatch_t *disp = resp->disp; - dns_dispatchmgr_t *mgr = disp->mgr; - dns_qid_t *qid = mgr->qid; bool respond = false; LOCK(&disp->lock); @@ -1662,9 +1588,8 @@ udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { dec_stats(disp->mgr, dns_resstatscounter_disprequdp); - LOCK(&qid->lock); - ISC_LIST_UNLINK(qid->qid_table[resp->bucket], resp, link); - UNLOCK(&qid->lock); + (void)cds_lfht_del(disp->mgr->qids, &resp->ht_node); + resp->state = DNS_DISPATCHSTATE_CANCELED; unlock: @@ -1677,6 +1602,9 @@ unlock: } } +/* + * NOTE: Must be RCU read locked! + */ static void tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { REQUIRE(VALID_RESPONSE(resp)); @@ -1684,8 +1612,6 @@ tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { REQUIRE(VALID_DISPATCHMGR(resp->disp->mgr)); dns_dispatch_t *disp = resp->disp; - dns_dispatchmgr_t *mgr = disp->mgr; - dns_qid_t *qid = mgr->qid; dns_displist_t resps = ISC_LIST_INITIALIZER; LOCK(&disp->lock); @@ -1761,9 +1687,8 @@ tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { dec_stats(disp->mgr, dns_resstatscounter_dispreqtcp); - LOCK(&qid->lock); - ISC_LIST_UNLINK(qid->qid_table[resp->bucket], resp, link); - UNLOCK(&qid->lock); + (void)cds_lfht_del(disp->mgr->qids, &resp->ht_node); + resp->state = DNS_DISPATCHSTATE_CANCELED; unlock: @@ -1787,6 +1712,7 @@ dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { dns_dispatch_t *disp = resp->disp; + rcu_read_lock(); switch (disp->socktype) { case isc_socktype_udp: udp_dispentry_cancel(resp, result); @@ -1797,6 +1723,7 @@ dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { default: UNREACHABLE(); } + rcu_read_unlock(); } void diff --git a/lib/isc/include/isc/sockaddr.h b/lib/isc/include/isc/sockaddr.h index 5d86d49895..0e6452e614 100644 --- a/lib/isc/include/isc/sockaddr.h +++ b/lib/isc/include/isc/sockaddr.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include From 6fd06c461bec2e397a8fc79ffa508785cf164740 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Fri, 15 Sep 2023 11:36:28 +0200 Subject: [PATCH 2/5] Make dns_dispatch bound to threads Instead of high number of dispatches (4 * named_g_udpdisp)[1], make the dispatches bound to threads and make dns_dispatchset_t create a dispatch for each thread (event loop). This required couple of other changes: 1. The dns_dispatch_createudp() must be called on loop, so the isc_tid() is already initialized - changes to nsupdate and mdig were required. 2. The dns_requestmgr had only a single dispatch per v4 and v6. Instead of using single dispatch, use dns_dispatchset_t for each protocol - this is same as dns_resolver. --- bin/delv/delv.c | 2 +- bin/named/include/named/globals.h | 1 - bin/named/main.c | 14 +--- bin/named/server.c | 9 +- bin/nsupdate/nsupdate.c | 5 +- bin/tools/mdig.c | 81 ++++++++---------- lib/dns/client.c | 5 +- lib/dns/dispatch.c | 133 ++++++++++-------------------- lib/dns/include/dns/dispatch.h | 6 +- lib/dns/include/dns/resolver.h | 11 +-- lib/dns/include/dns/view.h | 4 +- lib/dns/request.c | 24 +++--- lib/dns/resolver.c | 10 +-- lib/dns/view.c | 6 +- lib/isc/include/isc/tid.h | 4 +- lib/isc/tid.c | 6 +- lib/ns/include/ns/interfacemgr.h | 3 - tests/dns/dispatch_test.c | 14 +++- tests/dns/resolver_test.c | 2 +- 19 files changed, 132 insertions(+), 208 deletions(-) diff --git a/bin/delv/delv.c b/bin/delv/delv.c index d8f71d7e24..26adc81266 100644 --- a/bin/delv/delv.c +++ b/bin/delv/delv.c @@ -2165,7 +2165,7 @@ run_server(void *arg) { dns_view_initsecroots(view); CHECK(setup_dnsseckeys(NULL, view)); - CHECK(dns_view_createresolver(view, loopmgr, 1, netmgr, 0, + CHECK(dns_view_createresolver(view, loopmgr, netmgr, 0, tlsctx_client_cache, dispatch, NULL)); isc_stats_create(mctx, &resstats, dns_resstatscounter_max); diff --git a/bin/named/include/named/globals.h b/bin/named/include/named/globals.h index 03e35f2cd4..1f6edbc09e 100644 --- a/bin/named/include/named/globals.h +++ b/bin/named/include/named/globals.h @@ -50,7 +50,6 @@ EXTERN isc_mem_t *named_g_mctx INIT(NULL); EXTERN unsigned int named_g_cpus INIT(0); -EXTERN unsigned int named_g_udpdisp INIT(0); EXTERN isc_loop_t *named_g_mainloop INIT(NULL); EXTERN isc_loopmgr_t *named_g_loopmgr INIT(NULL); EXTERN bool named_g_loopmgr_running INIT(false); diff --git a/bin/named/main.c b/bin/named/main.c index 0e427a8b0e..521536caa1 100644 --- a/bin/named/main.c +++ b/bin/named/main.c @@ -949,9 +949,7 @@ parse_command_line(int argc, char *argv[]) { parse_T_opt(isc_commandline_argument); break; case 'U': - named_g_udpdisp = parse_int(isc_commandline_argument, - "number of UDP listeners " - "per interface"); + /* Obsolete. No longer in use. Ignore. */ break; case 'u': named_g_username = isc_commandline_argument; @@ -1044,16 +1042,6 @@ create_managers(void) { ISC_LOG_INFO, "found %u CPU%s, using %u worker thread%s", named_g_cpus_detected, named_g_cpus_detected == 1 ? "" : "s", named_g_cpus, named_g_cpus == 1 ? "" : "s"); - if (named_g_udpdisp == 0) { - named_g_udpdisp = named_g_cpus_detected; - } - if (named_g_udpdisp > named_g_cpus) { - named_g_udpdisp = named_g_cpus; - } - isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL, - NAMED_LOGMODULE_SERVER, ISC_LOG_INFO, - "using %u UDP listener%s per interface", named_g_udpdisp, - named_g_udpdisp == 1 ? "" : "s"); isc_managers_create(&named_g_mctx, named_g_cpus, &named_g_loopmgr, &named_g_netmgr); diff --git a/bin/named/server.c b/bin/named/server.c index 714c1b10b3..bd3ca9bb0e 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -4033,7 +4033,7 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config, named_cache_t *nsc; bool zero_no_soattl; dns_acl_t *clients = NULL, *mapped = NULL, *excluded = NULL; - unsigned int query_timeout, ndisp; + unsigned int query_timeout; bool old_rpz_ok = false; dns_dyndbctx_t *dctx = NULL; unsigned int resolver_param; @@ -4685,9 +4685,8 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config, goto cleanup; } - ndisp = 4 * ISC_MIN(named_g_udpdisp, MAX_UDP_DISPATCH); CHECK(dns_view_createresolver( - view, named_g_loopmgr, ndisp, named_g_netmgr, resopts, + view, named_g_loopmgr, named_g_netmgr, resopts, named_g_server->tlsctx_client_cache, dispatch4, dispatch6)); if (resstats == NULL) { @@ -12148,10 +12147,6 @@ named_server_status(named_server_t *server, isc_buffer_t **text) { snprintf(line, sizeof(line), "worker threads: %u\n", named_g_cpus); CHECK(putstr(text, line)); - snprintf(line, sizeof(line), "UDP listeners per interface: %u\n", - named_g_udpdisp); - CHECK(putstr(text, line)); - snprintf(line, sizeof(line), "number of zones: %u (%u automatic)\n", zonecount, automatic); CHECK(putstr(text, line)); diff --git a/bin/nsupdate/nsupdate.c b/bin/nsupdate/nsupdate.c index 3e673f493d..5746933935 100644 --- a/bin/nsupdate/nsupdate.c +++ b/bin/nsupdate/nsupdate.c @@ -798,7 +798,7 @@ create_name(const char *str, char *namedata, size_t len, dns_name_t *name) { } static void -setup_system(void) { +setup_system(void *arg ISC_ATTR_UNUSED) { isc_result_t result; isc_sockaddr_t bind_any, bind_any6; isc_sockaddrlist_t *nslist; @@ -3475,8 +3475,7 @@ main(int argc, char **argv) { timeoutms = timeout * 1000; isc_nm_settimeouts(netmgr, timeoutms, timeoutms, timeoutms, timeoutms); - setup_system(); - + isc_loopmgr_setup(loopmgr, setup_system, NULL); isc_loopmgr_setup(loopmgr, getinput, NULL); isc_loopmgr_teardown(loopmgr, shutdown_program, NULL); isc_loopmgr_run(loopmgr); diff --git a/bin/tools/mdig.c b/bin/tools/mdig.c index f88edf49f6..6b2facf09b 100644 --- a/bin/tools/mdig.c +++ b/bin/tools/mdig.c @@ -116,6 +116,12 @@ static unsigned char cookie_secret[33]; static int onfly = 0; static char hexcookie[81]; +static isc_sockaddr_t bind_any; +static isc_nm_t *netmgr = NULL; +static dns_dispatchmgr_t *dispatchmgr = NULL; +static dns_dispatch_t *dispatchvx = NULL; +static dns_view_t *view = NULL; + struct query { char textname[MXNAME]; /*% Name we're going to be * looking up */ @@ -2069,43 +2075,42 @@ set_source_ports(dns_dispatchmgr_t *manager) { } static void -teardown_view(void *arg) { - dns_view_t *view = arg; +teardown(void *arg ISC_ATTR_UNUSED) { dns_view_detach(&view); -} - -static void -teardown_requestmgr(void *arg) { - dns_requestmgr_t *mgr = arg; - - dns_requestmgr_shutdown(mgr); - dns_requestmgr_detach(&mgr); -} - -static void -teardown_dispatch(void *arg) { - dns_dispatch_t *dispatchv4 = arg; - dns_dispatch_detach(&dispatchv4); -} - -static void -teardown_dispatchmgr(void *arg) { - dns_dispatchmgr_t *dispatchmgr = arg; + dns_requestmgr_shutdown(requestmgr); + dns_requestmgr_detach(&requestmgr); + dns_dispatch_detach(&dispatchvx); dns_dispatchmgr_detach(&dispatchmgr); } +static void +setup(void *arg ISC_ATTR_UNUSED) { + RUNCHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); + + set_source_ports(dispatchmgr); + + if (have_ipv4) { + isc_sockaddr_any(&bind_any); + } else { + isc_sockaddr_any6(&bind_any); + } + RUNCHECK(dns_dispatch_createudp( + dispatchmgr, have_src ? &srcaddr : &bind_any, &dispatchvx)); + + RUNCHECK(dns_requestmgr_create( + mctx, loopmgr, dispatchmgr, have_ipv4 ? dispatchvx : NULL, + have_ipv6 ? dispatchvx : NULL, &requestmgr)); + + RUNCHECK(dns_view_create(mctx, NULL, 0, "_mdig", &view)); +} + /*% Main processing routine for mdig */ int main(int argc, char *argv[]) { struct query *query = NULL; isc_result_t result; - isc_sockaddr_t bind_any; isc_log_t *lctx = NULL; isc_logconfig_t *lcfg = NULL; - isc_nm_t *netmgr = NULL; - dns_dispatchmgr_t *dispatchmgr = NULL; - dns_dispatch_t *dispatchvx = NULL; - dns_view_t *view = NULL; unsigned int i; int ns; @@ -2151,30 +2156,10 @@ main(int argc, char *argv[]) { fatal("can't choose between IPv4 and IPv6"); } - RUNCHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); - - set_source_ports(dispatchmgr); - - if (have_ipv4) { - isc_sockaddr_any(&bind_any); - } else { - isc_sockaddr_any6(&bind_any); - } - RUNCHECK(dns_dispatch_createudp( - dispatchmgr, have_src ? &srcaddr : &bind_any, &dispatchvx)); - - RUNCHECK(dns_requestmgr_create( - mctx, loopmgr, dispatchmgr, have_ipv4 ? dispatchvx : NULL, - have_ipv6 ? dispatchvx : NULL, &requestmgr)); - - RUNCHECK(dns_view_create(mctx, NULL, 0, "_mdig", &view)); - query = ISC_LIST_HEAD(queries); + isc_loopmgr_setup(loopmgr, setup, NULL); isc_loopmgr_setup(loopmgr, sendqueries, query); - isc_loopmgr_teardown(loopmgr, teardown_view, view); - isc_loopmgr_teardown(loopmgr, teardown_requestmgr, requestmgr); - isc_loopmgr_teardown(loopmgr, teardown_dispatch, dispatchvx); - isc_loopmgr_teardown(loopmgr, teardown_dispatchmgr, dispatchmgr); + isc_loopmgr_teardown(loopmgr, teardown, NULL); /* * Stall to the start of a new second. diff --git a/lib/dns/client.c b/lib/dns/client.c index 205c9764bc..a8100f7ab6 100644 --- a/lib/dns/client.c +++ b/lib/dns/client.c @@ -215,9 +215,8 @@ createview(isc_mem_t *mctx, dns_rdataclass_t rdclass, isc_loopmgr_t *loopmgr, /* Initialize view security roots */ dns_view_initsecroots(view); - CHECK(dns_view_createresolver(view, loopmgr, 1, nm, 0, - tlsctx_client_cache, dispatchv4, - dispatchv6)); + CHECK(dns_view_createresolver(view, loopmgr, nm, 0, tlsctx_client_cache, + dispatchv4, dispatchv6)); CHECK(dns_db_create(mctx, "rbt", dns_rootname, dns_dbtype_cache, rdclass, 0, NULL, &view->cachedb)); diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index 01562839bf..38b6577ed7 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -19,7 +19,6 @@ #include #include -#include #include #include #include @@ -119,8 +118,6 @@ struct dns_dispatch { /*% Locked by mgr->lock. */ ISC_LINK(dns_dispatch_t) link; - /* Locked by "lock". */ - isc_mutex_t lock; /*%< locks all below */ isc_socktype_t socktype; dns_dispatchstate_t state; isc_refcount_t references; @@ -130,7 +127,7 @@ struct dns_dispatch { dns_displist_t pending; dns_displist_t active; - unsigned int requests; /*%< how many requests we have */ + uint_fast32_t requests; /*%< how many requests we have */ unsigned int timedout; }; @@ -188,7 +185,7 @@ static void dispentry_cancel(dns_dispentry_t *resp, isc_result_t result); static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, - dns_dispatch_t **dispp); + uint32_t tid, dns_dispatch_t **dispp); static void udp_startrecv(isc_nmhandle_t *handle, dns_dispentry_t *resp); static void @@ -417,10 +414,8 @@ dispentry_destroy(dns_dispentry_t *resp) { */ dispentry_cancel(resp, ISC_R_CANCELED); - LOCK(&disp->lock); INSIST(disp->requests > 0); disp->requests--; - UNLOCK(&disp->lock); isc_refcount_destroy(&resp->references); @@ -504,7 +499,7 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, disp = resp->disp; - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); INSIST(resp->reading); resp->reading = false; @@ -518,7 +513,7 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, eresult = ISC_R_CANCELED; } - dispentry_log(resp, LVL(90), "read callback:%s, requests %d", + dispentry_log(resp, LVL(90), "read callback:%s, requests %" PRIuFAST32, isc_result_totext(eresult), disp->requests); if (eresult != ISC_R_SUCCESS) { @@ -617,8 +612,6 @@ next: udp_dispatch_getnext(resp, timeout); done: - UNLOCK(&disp->lock); - if (respond) { dispentry_log(resp, LVL(90), "UDP read callback on %p: %s", handle, isc_result_totext(eresult)); @@ -781,11 +774,11 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, REQUIRE(VALID_DISPATCH(disp)); - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); INSIST(disp->reading); disp->reading = false; - dispatch_log(disp, LVL(90), "TCP read:%s:requests %u", + dispatch_log(disp, LVL(90), "TCP read:%s:requests %" PRIuFAST32, isc_result_totext(result), disp->requests); peer = isc_nmhandle_peeraddr(handle); @@ -886,7 +879,6 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, isc_nmhandle_settimeout(handle, timeout); } - UNLOCK(&disp->lock); rcu_read_unlock(); /* @@ -1082,7 +1074,7 @@ dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) { * Allocate and set important limits. */ static void -dispatch_allocate(dns_dispatchmgr_t *mgr, isc_socktype_t type, +dispatch_allocate(dns_dispatchmgr_t *mgr, isc_socktype_t type, uint32_t tid, dns_dispatch_t **dispp) { dns_dispatch_t *disp = NULL; @@ -1100,7 +1092,7 @@ dispatch_allocate(dns_dispatchmgr_t *mgr, isc_socktype_t type, .link = ISC_LINK_INITIALIZER, .active = ISC_LIST_INITIALIZER, .pending = ISC_LIST_INITIALIZER, - .tid = isc_tid(), + .tid = tid, .magic = DISPATCH_MAGIC, }; @@ -1110,7 +1102,6 @@ dispatch_allocate(dns_dispatchmgr_t *mgr, isc_socktype_t type, __func__, __FILE__, __LINE__, disp); #endif isc_refcount_init(&disp->references, 1); /* DISPATCH000 */ - isc_mutex_init(&disp->lock); *dispp = disp; } @@ -1125,7 +1116,7 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, LOCK(&mgr->lock); - dispatch_allocate(mgr, isc_socktype_tcp, &disp); + dispatch_allocate(mgr, isc_socktype_tcp, isc_tid(), &disp); disp->peer = *destaddr; @@ -1181,13 +1172,12 @@ dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, isc_sockaddr_t sockname; isc_sockaddr_t peeraddr; - LOCK(&disp->lock); - if (disp->tid != isc_tid()) { - UNLOCK(&disp->lock); continue; } + REQUIRE(disp->tid == isc_tid()); + if (disp->handle != NULL) { sockname = isc_nmhandle_localaddr(disp->handle); peeraddr = isc_nmhandle_peeraddr(disp->handle); @@ -1207,7 +1197,6 @@ dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, (localaddr != NULL && !isc_sockaddr_eqaddr(localaddr, &sockname))) { - UNLOCK(&disp->lock); continue; } @@ -1240,8 +1229,6 @@ dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, UNREACHABLE(); } - UNLOCK(&disp->lock); - if (disp_connected != NULL) { break; } @@ -1281,7 +1268,7 @@ dns_dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, REQUIRE(dispp != NULL && *dispp == NULL); LOCK(&mgr->lock); - result = dispatch_createudp(mgr, localaddr, &disp); + result = dispatch_createudp(mgr, localaddr, isc_tid(), &disp); if (result == ISC_R_SUCCESS) { *dispp = disp; } @@ -1292,7 +1279,7 @@ dns_dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, - dns_dispatch_t **dispp) { + uint32_t tid, dns_dispatch_t **dispp) { isc_result_t result = ISC_R_SUCCESS; dns_dispatch_t *disp = NULL; isc_sockaddr_t sa_any; @@ -1308,7 +1295,7 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, } } - dispatch_allocate(mgr, isc_socktype_udp, &disp); + dispatch_allocate(mgr, isc_socktype_udp, tid, &disp); if (isc_log_wouldlog(dns_lctx, 90)) { char addrbuf[ISC_SOCKADDR_FORMATSIZE]; @@ -1361,8 +1348,6 @@ dispatch_destroy(dns_dispatch_t *disp) { isc_nmhandle_detach(&disp->handle); } - isc_mutex_destroy(&disp->lock); - isc_mem_put(mgr->mctx, disp, sizeof(*disp)); /* @@ -1401,10 +1386,9 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, REQUIRE(sent != NULL); REQUIRE(loop != NULL); - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); if (disp->state == DNS_DISPATCHSTATE_CANCELED) { - UNLOCK(&disp->lock); return (ISC_R_CANCELED); } @@ -1436,7 +1420,6 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, result = setup_socket(disp, resp, dest, &localport); if (result != ISC_R_SUCCESS) { isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp)); - UNLOCK(&disp->lock); inc_stats(disp->mgr, dns_resstatscounter_dispsockfail); return (result); } @@ -1496,7 +1479,6 @@ fail: : dns_resstatscounter_dispreqtcp); rcu_read_unlock(); - UNLOCK(&disp->lock); *idp = resp->id; *respp = resp; @@ -1521,7 +1503,7 @@ dns_dispatch_getnext(dns_dispentry_t *resp) { return (ISC_R_TIMEDOUT); } - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); switch (disp->socktype) { case isc_socktype_udp: udp_dispatch_getnext(resp, timeout); @@ -1532,7 +1514,6 @@ dns_dispatch_getnext(dns_dispentry_t *resp) { default: UNREACHABLE(); } - UNLOCK(&disp->lock); return (result); } @@ -1549,10 +1530,10 @@ udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { dns_dispatch_t *disp = resp->disp; bool respond = false; - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); dispentry_log(resp, LVL(90), "canceling response: %s, %s/%s (%s/%s), " - "requests %u", + "requests %" PRIuFAST32, isc_result_totext(result), state2str(resp->state), resp->reading ? "reading" : "not reading", state2str(disp->state), @@ -1593,8 +1574,6 @@ udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { resp->state = DNS_DISPATCHSTATE_CANCELED; unlock: - UNLOCK(&disp->lock); - if (respond) { dispentry_log(resp, LVL(90), "read callback: %s", isc_result_totext(result)); @@ -1614,10 +1593,10 @@ tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { dns_dispatch_t *disp = resp->disp; dns_displist_t resps = ISC_LIST_INITIALIZER; - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); dispentry_log(resp, LVL(90), "canceling response: %s, %s/%s (%s/%s), " - "requests %u", + "requests %" PRIuFAST32, isc_result_totext(result), state2str(resp->state), resp->reading ? "reading" : "not reading", state2str(disp->state), @@ -1692,7 +1671,6 @@ tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { resp->state = DNS_DISPATCHSTATE_CANCELED; unlock: - UNLOCK(&disp->lock); /* * NOTE: Calling the response callback directly from here should be done @@ -1797,7 +1775,7 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { localbuf, peerbuf, isc_result_totext(eresult)); } - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); INSIST(disp->state == DNS_DISPATCHSTATE_CONNECTING); /* @@ -1833,8 +1811,6 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { disp->state = DNS_DISPATCHSTATE_NONE; } - UNLOCK(&disp->lock); - for (resp = ISC_LIST_HEAD(resps); resp != NULL; resp = next) { next = ISC_LIST_NEXT(resp, rlink); ISC_LIST_UNLINK(resps, resp, rlink); @@ -1856,8 +1832,7 @@ udp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { dispentry_log(resp, LVL(90), "connected: %s", isc_result_totext(eresult)); - LOCK(&disp->lock); - + REQUIRE(disp->tid == isc_tid()); switch (resp->state) { case DNS_DISPATCHSTATE_CANCELED: eresult = ISC_R_CANCELED; @@ -1885,7 +1860,6 @@ udp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { /* probably a port collision; try a different one */ result = setup_socket(disp, resp, &resp->peer, &localport); if (result == ISC_R_SUCCESS) { - UNLOCK(&disp->lock); udp_dispatch_connect(disp, resp); goto detach; } @@ -1897,7 +1871,6 @@ udp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { break; } unlock: - UNLOCK(&disp->lock); dispentry_log(resp, LVL(90), "connect callback: %s", isc_result_totext(eresult)); @@ -1909,12 +1882,11 @@ detach: static void udp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); resp->state = DNS_DISPATCHSTATE_CONNECTING; resp->start = isc_loop_now(resp->loop); dns_dispentry_ref(resp); /* DISPENTRY004 */ ISC_LIST_APPEND(disp->pending, resp, plink); - UNLOCK(&disp->lock); isc_nm_udpconnect(disp->mgr->nm, &resp->local, &resp->peer, udp_connected, resp, resp->timeout); @@ -1944,7 +1916,7 @@ tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { } /* Check whether the dispatch is already connecting or connected. */ - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); switch (disp->state) { case DNS_DISPATCHSTATE_NONE: /* First connection, continue with connecting */ @@ -1953,7 +1925,6 @@ tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { resp->start = isc_loop_now(resp->loop); dns_dispentry_ref(resp); /* DISPENTRY005 */ ISC_LIST_APPEND(disp->pending, resp, plink); - UNLOCK(&disp->lock); char localbuf[ISC_SOCKADDR_FORMATSIZE]; char peerbuf[ISC_SOCKADDR_FORMATSIZE]; @@ -1979,7 +1950,6 @@ tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { resp->start = isc_loop_now(resp->loop); dns_dispentry_ref(resp); /* DISPENTRY005 */ ISC_LIST_APPEND(disp->pending, resp, plink); - UNLOCK(&disp->lock); break; case DNS_DISPATCHSTATE_CONNECTED: @@ -1996,7 +1966,6 @@ tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { tcp_startrecv(disp, resp); } - UNLOCK(&disp->lock); /* We are already connected; call the connected cb */ dispentry_log(resp, LVL(90), "connect callback: %s", isc_result_totext(ISC_R_SUCCESS)); @@ -2105,7 +2074,7 @@ dns_dispatch_resume(dns_dispentry_t *resp, uint16_t timeout) { dispentry_log(resp, LVL(90), "resume"); - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); switch (disp->socktype) { case isc_socktype_udp: { udp_dispatch_getnext(resp, timeout); @@ -2119,8 +2088,6 @@ dns_dispatch_resume(dns_dispentry_t *resp, uint16_t timeout) { default: UNREACHABLE(); } - - UNLOCK(&disp->lock); } void @@ -2180,31 +2147,25 @@ dns_dispentry_getlocaladdress(dns_dispentry_t *resp, isc_sockaddr_t *addrp) { dns_dispatch_t * dns_dispatchset_get(dns_dispatchset_t *dset) { - dns_dispatch_t *disp = NULL; + uint32_t tid = isc_tid(); /* check that dispatch set is configured */ if (dset == NULL || dset->ndisp == 0) { return (NULL); } - LOCK(&dset->lock); - disp = dset->dispatches[dset->cur]; - dset->cur++; - if (dset->cur == dset->ndisp) { - dset->cur = 0; - } - UNLOCK(&dset->lock); + INSIST(tid < dset->ndisp); - return (disp); + return (dset->dispatches[tid]); } isc_result_t dns_dispatchset_create(isc_mem_t *mctx, dns_dispatch_t *source, - dns_dispatchset_t **dsetp, int n) { + dns_dispatchset_t **dsetp, uint32_t ndisp) { isc_result_t result; dns_dispatchset_t *dset = NULL; dns_dispatchmgr_t *mgr = NULL; - int i, j; + size_t i; REQUIRE(VALID_DISPATCH(source)); REQUIRE(source->socktype == isc_socktype_udp); @@ -2213,21 +2174,19 @@ dns_dispatchset_create(isc_mem_t *mctx, dns_dispatch_t *source, mgr = source->mgr; dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t)); - *dset = (dns_dispatchset_t){ .ndisp = n }; - - isc_mutex_init(&dset->lock); - - dset->dispatches = isc_mem_cget(mctx, n, sizeof(dns_dispatch_t *)); + *dset = (dns_dispatchset_t){ .ndisp = ndisp }; isc_mem_attach(mctx, &dset->mctx); + dset->dispatches = isc_mem_cget(dset->mctx, ndisp, + sizeof(dns_dispatch_t *)); + dset->dispatches[0] = NULL; dns_dispatch_attach(source, &dset->dispatches[0]); /* DISPATCH004 */ LOCK(&mgr->lock); - for (i = 1; i < n; i++) { - dset->dispatches[i] = NULL; - result = dispatch_createudp(mgr, &source->local, + for (i = 1; i < dset->ndisp; i++) { + result = dispatch_createudp(mgr, &source->local, i, &dset->dispatches[i]); if (result != ISC_R_SUCCESS) { goto fail; @@ -2242,34 +2201,28 @@ dns_dispatchset_create(isc_mem_t *mctx, dns_dispatch_t *source, fail: UNLOCK(&mgr->lock); - for (j = 0; j < i; j++) { + for (size_t j = 0; j < i; j++) { dns_dispatch_detach(&(dset->dispatches[j])); /* DISPATCH004 */ } - isc_mem_cput(mctx, dset->dispatches, n, sizeof(dns_dispatch_t *)); - if (dset->mctx == mctx) { - isc_mem_detach(&dset->mctx); - } + isc_mem_cput(dset->mctx, dset->dispatches, ndisp, + sizeof(dns_dispatch_t *)); - isc_mutex_destroy(&dset->lock); - isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t)); + isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t)); return (result); } void dns_dispatchset_destroy(dns_dispatchset_t **dsetp) { - dns_dispatchset_t *dset = NULL; - int i; - REQUIRE(dsetp != NULL && *dsetp != NULL); - dset = *dsetp; + dns_dispatchset_t *dset = *dsetp; *dsetp = NULL; - for (i = 0; i < dset->ndisp; i++) { + + for (size_t i = 0; i < dset->ndisp; i++) { dns_dispatch_detach(&(dset->dispatches[i])); /* DISPATCH004 */ } isc_mem_cput(dset->mctx, dset->dispatches, dset->ndisp, sizeof(dns_dispatch_t *)); - isc_mutex_destroy(&dset->lock); isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t)); } diff --git a/lib/dns/include/dns/dispatch.h b/lib/dns/include/dns/dispatch.h index d5caf9af78..0813224f8b 100644 --- a/lib/dns/include/dns/dispatch.h +++ b/lib/dns/include/dns/dispatch.h @@ -69,9 +69,7 @@ ISC_LANG_BEGINDECLS struct dns_dispatchset { isc_mem_t *mctx; dns_dispatch_t **dispatches; - int ndisp; - int cur; - isc_mutex_t lock; + uint32_t ndisp; }; /* @@ -358,7 +356,7 @@ dns_dispatchset_get(dns_dispatchset_t *dset); isc_result_t dns_dispatchset_create(isc_mem_t *mctx, dns_dispatch_t *source, - dns_dispatchset_t **dsetp, int n); + dns_dispatchset_t **dsetp, uint32_t n); /*%< * Given a valid dispatch 'source', create a dispatch set containing * 'n' UDP dispatches, with the remainder filled out by clones of the diff --git a/lib/dns/include/dns/resolver.h b/lib/dns/include/dns/resolver.h index 0c6af9c816..01725a13ec 100644 --- a/lib/dns/include/dns/resolver.h +++ b/lib/dns/include/dns/resolver.h @@ -165,9 +165,8 @@ enum { #define DNS_QMIN_MAX_NO_DELEGATION 3 isc_result_t -dns_resolver_create(dns_view_t *view, isc_loopmgr_t *loopmgr, - unsigned int ndisp, isc_nm_t *nm, unsigned int options, - isc_tlsctx_cache_t *tlsctx_cache, +dns_resolver_create(dns_view_t *view, isc_loopmgr_t *loopmgr, isc_nm_t *nm, + unsigned int options, isc_tlsctx_cache_t *tlsctx_cache, dns_dispatch_t *dispatchv4, dns_dispatch_t *dispatchv6, dns_resolver_t **resp); @@ -183,17 +182,15 @@ dns_resolver_create(dns_view_t *view, isc_loopmgr_t *loopmgr, * *\li 'view' is a valid view. * - *\li 'ndisp' > 0. - * *\li 'nm' is a valid network manager. * *\li 'tlsctx_cache' != NULL. * *\li 'dispatchv4' is a dispatch with an IPv4 UDP socket, or is NULL. - * If not NULL, 'ndisp' clones of it will be created by the resolver. + * If not NULL, clones per loop of it will be created by the resolver. * *\li 'dispatchv6' is a dispatch with an IPv6 UDP socket, or is NULL. - * If not NULL, 'ndisp' clones of it will be created by the resolver. + * If not NULL, clones per loop of it will be created by the resolver. * *\li resp != NULL && *resp == NULL. * diff --git a/lib/dns/include/dns/view.h b/lib/dns/include/dns/view.h index 7e1babaa16..8904a5a485 100644 --- a/lib/dns/include/dns/view.h +++ b/lib/dns/include/dns/view.h @@ -365,8 +365,8 @@ dns_view_weakdetach(dns_view_t **targetp); isc_result_t dns_view_createresolver(dns_view_t *view, isc_loopmgr_t *loopmgr, - unsigned int ndisp, isc_nm_t *netmgr, - unsigned int options, isc_tlsctx_cache_t *tlsctx_cache, + isc_nm_t *netmgr, unsigned int options, + isc_tlsctx_cache_t *tlsctx_cache, dns_dispatch_t *dispatchv4, dns_dispatch_t *dispatchv6); /*%< * Create a resolver and address database for the view. diff --git a/lib/dns/request.c b/lib/dns/request.c index a5a7291570..46affed3f1 100644 --- a/lib/dns/request.c +++ b/lib/dns/request.c @@ -54,8 +54,8 @@ struct dns_requestmgr { atomic_bool shuttingdown; dns_dispatchmgr_t *dispatchmgr; - dns_dispatch_t *dispatchv4; - dns_dispatch_t *dispatchv6; + dns_dispatchset_t *dispatches4; + dns_dispatchset_t *dispatches6; dns_requestlist_t *requests; }; @@ -150,10 +150,14 @@ dns_requestmgr_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, dns_dispatchmgr_attach(dispatchmgr, &requestmgr->dispatchmgr); if (dispatchv4 != NULL) { - dns_dispatch_attach(dispatchv4, &requestmgr->dispatchv4); + dns_dispatchset_create(requestmgr->mctx, dispatchv4, + &requestmgr->dispatches4, + isc_loopmgr_nloops(requestmgr->loopmgr)); } if (dispatchv6 != NULL) { - dns_dispatch_attach(dispatchv6, &requestmgr->dispatchv6); + dns_dispatchset_create(requestmgr->mctx, dispatchv6, + &requestmgr->dispatches6, + isc_loopmgr_nloops(requestmgr->loopmgr)); } isc_refcount_init(&requestmgr->references, 1); @@ -232,11 +236,11 @@ requestmgr_destroy(dns_requestmgr_t *requestmgr) { isc_mem_cput(requestmgr->mctx, requestmgr->requests, nloops, sizeof(requestmgr->requests[0])); - if (requestmgr->dispatchv4 != NULL) { - dns_dispatch_detach(&requestmgr->dispatchv4); + if (requestmgr->dispatches4 != NULL) { + dns_dispatchset_destroy(&requestmgr->dispatches4); } - if (requestmgr->dispatchv6 != NULL) { - dns_dispatch_detach(&requestmgr->dispatchv6); + if (requestmgr->dispatches6 != NULL) { + dns_dispatchset_destroy(&requestmgr->dispatches6); } if (requestmgr->dispatchmgr != NULL) { dns_dispatchmgr_detach(&requestmgr->dispatchmgr); @@ -360,11 +364,11 @@ udp_dispatch(dns_requestmgr_t *requestmgr, const isc_sockaddr_t *srcaddr, if (srcaddr == NULL) { switch (isc_sockaddr_pf(destaddr)) { case PF_INET: - disp = requestmgr->dispatchv4; + disp = dns_dispatchset_get(requestmgr->dispatches4); break; case PF_INET6: - disp = requestmgr->dispatchv6; + disp = dns_dispatchset_get(requestmgr->dispatches6); break; default: diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index c21513dda8..f65b7e4a92 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -9930,9 +9930,8 @@ spillattimer_countdown(void *arg) { } isc_result_t -dns_resolver_create(dns_view_t *view, isc_loopmgr_t *loopmgr, - unsigned int ndisp, isc_nm_t *nm, unsigned int options, - isc_tlsctx_cache_t *tlsctx_cache, +dns_resolver_create(dns_view_t *view, isc_loopmgr_t *loopmgr, isc_nm_t *nm, + unsigned int options, isc_tlsctx_cache_t *tlsctx_cache, dns_dispatch_t *dispatchv4, dns_dispatch_t *dispatchv6, dns_resolver_t **resp) { dns_resolver_t *res = NULL; @@ -9942,7 +9941,6 @@ dns_resolver_create(dns_view_t *view, isc_loopmgr_t *loopmgr, */ REQUIRE(DNS_VIEW_VALID(view)); - REQUIRE(ndisp > 0); REQUIRE(resp != NULL && *resp == NULL); REQUIRE(tlsctx_cache != NULL); REQUIRE(dispatchv4 != NULL || dispatchv6 != NULL); @@ -9989,12 +9987,12 @@ dns_resolver_create(dns_view_t *view, isc_loopmgr_t *loopmgr, if (dispatchv4 != NULL) { dns_dispatchset_create(res->mctx, dispatchv4, &res->dispatches4, - ndisp); + isc_loopmgr_nloops(res->loopmgr)); } if (dispatchv6 != NULL) { dns_dispatchset_create(res->mctx, dispatchv6, &res->dispatches6, - ndisp); + isc_loopmgr_nloops(res->loopmgr)); } isc_mutex_init(&res->lock); diff --git a/lib/dns/view.c b/lib/dns/view.c index 6855d02eda..e07ff434eb 100644 --- a/lib/dns/view.c +++ b/lib/dns/view.c @@ -589,8 +589,8 @@ dns_view_weakdetach(dns_view_t **viewp) { isc_result_t dns_view_createresolver(dns_view_t *view, isc_loopmgr_t *loopmgr, - unsigned int ndisp, isc_nm_t *netmgr, - unsigned int options, isc_tlsctx_cache_t *tlsctx_cache, + isc_nm_t *netmgr, unsigned int options, + isc_tlsctx_cache_t *tlsctx_cache, dns_dispatch_t *dispatchv4, dns_dispatch_t *dispatchv6) { isc_result_t result; @@ -601,7 +601,7 @@ dns_view_createresolver(dns_view_t *view, isc_loopmgr_t *loopmgr, REQUIRE(view->resolver == NULL); REQUIRE(view->dispatchmgr != NULL); - result = dns_resolver_create(view, loopmgr, ndisp, netmgr, options, + result = dns_resolver_create(view, loopmgr, netmgr, options, tlsctx_cache, dispatchv4, dispatchv6, &view->resolver); if (result != ISC_R_SUCCESS) { diff --git a/lib/isc/include/isc/tid.h b/lib/isc/include/isc/tid.h index 20bafac595..e630c4f24f 100644 --- a/lib/isc/include/isc/tid.h +++ b/lib/isc/include/isc/tid.h @@ -28,11 +28,11 @@ isc_tid_count(void); * Returns the number of threads. */ -extern thread_local uint32_t tid_local; +extern thread_local uint32_t isc__tid_local; static inline uint32_t isc_tid(void) { - return (tid_local); + return (isc__tid_local); } /*%< * Returns the thread ID of the currently-running loop. diff --git a/lib/isc/tid.c b/lib/isc/tid.c index 397fac1c8f..d19a413554 100644 --- a/lib/isc/tid.c +++ b/lib/isc/tid.c @@ -23,7 +23,7 @@ /** * Private */ -thread_local uint32_t tid_local = ISC_TID_UNKNOWN; +thread_local uint32_t isc__tid_local = ISC_TID_UNKNOWN; /* * Zero is a better nonsense value in this case than ISC_TID_UNKNOWN; @@ -37,8 +37,8 @@ static uint32_t tid_count = 0; void isc__tid_init(uint32_t tid) { - REQUIRE(tid_local == ISC_TID_UNKNOWN || tid_local == tid); - tid_local = tid; + REQUIRE(isc__tid_local == ISC_TID_UNKNOWN || isc__tid_local == tid); + isc__tid_local = tid; } void diff --git a/lib/ns/include/ns/interfacemgr.h b/lib/ns/include/ns/interfacemgr.h index 651f4d429b..aea5b2dfac 100644 --- a/lib/ns/include/ns/interfacemgr.h +++ b/lib/ns/include/ns/interfacemgr.h @@ -65,9 +65,6 @@ #define NS_INTERFACEFLAG_ANYADDR 0x01U /*%< bound to "any" address */ #define NS_INTERFACEFLAG_LISTENING 0x02U /*%< listening */ -#define MAX_UDP_DISPATCH \ - 128 /*%< Maximum number of UDP dispatchers \ - * to start per interface */ /*% The nameserver interface structure */ struct ns_interface { unsigned int magic; /*%< Magic number. */ diff --git a/tests/dns/dispatch_test.c b/tests/dns/dispatch_test.c index 0780003bab..150ad405ac 100644 --- a/tests/dns/dispatch_test.c +++ b/tests/dns/dispatch_test.c @@ -261,11 +261,12 @@ ISC_LOOP_TEST_IMPL(dispatchset_create) { isc_loopmgr_shutdown(loopmgr); } -/* test dispatch set round-robin */ +/* test dispatch set per-loop dispatch */ ISC_LOOP_TEST_IMPL(dispatchset_get) { isc_result_t result; dns_dispatchset_t *dset = NULL; dns_dispatch_t *d1, *d2, *d3, *d4, *d5; + uint32_t tid_saved; UNUSED(arg); @@ -291,10 +292,21 @@ ISC_LOOP_TEST_IMPL(dispatchset_get) { result = make_dispatchset(4, &dset); assert_int_equal(result, ISC_R_SUCCESS); + /* + * Temporarily modify and then restore the current thread's + * ID value, in order to confirm that different threads get + * different dispatch sets but the same thread gets the same + * one. + */ + tid_saved = isc__tid_local; d1 = dns_dispatchset_get(dset); + isc__tid_local++; d2 = dns_dispatchset_get(dset); + isc__tid_local++; d3 = dns_dispatchset_get(dset); + isc__tid_local++; d4 = dns_dispatchset_get(dset); + isc__tid_local = tid_saved; d5 = dns_dispatchset_get(dset); assert_ptr_equal(d1, d5); diff --git a/tests/dns/resolver_test.c b/tests/dns/resolver_test.c index 09106aadd6..3f53f9f351 100644 --- a/tests/dns/resolver_test.c +++ b/tests/dns/resolver_test.c @@ -76,7 +76,7 @@ mkres(dns_resolver_t **resolverp) { isc_result_t result; isc_tlsctx_cache_create(mctx, &tlsctx_cache); - result = dns_resolver_create(view, loopmgr, 1, netmgr, 0, tlsctx_cache, + result = dns_resolver_create(view, loopmgr, netmgr, 0, tlsctx_cache, dispatch, NULL, resolverp); assert_int_equal(result, ISC_R_SUCCESS); } From c9b4b45943759f13eed43a19576cc7755453f990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Fri, 15 Sep 2023 14:38:02 +0200 Subject: [PATCH 3/5] Replace the linked list of TCP dispatches with hash table Reusing TCP connections with dns_dispatch_gettcp() used linear linked list to lookup existing outgoing TCP connections that could be reused. Replace the linked list with per-loop cds_lfht hashtable to speedup the lookups. We use cds_lfht because it allows non-unique node insertion that we need to check for dispatches in different connection states. --- bin/delv/delv.c | 2 +- bin/named/server.c | 8 +- bin/nsupdate/nsupdate.c | 2 +- bin/tests/system/pipelined/pipequeries.c | 2 +- bin/tools/mdig.c | 2 +- lib/dns/client.c | 3 +- lib/dns/dispatch.c | 181 +++++++++++++---------- lib/dns/include/dns/dispatch.h | 3 +- tests/dns/dispatch_test.c | 24 ++- tests/libtest/dns.c | 3 +- tests/libtest/ns.c | 2 +- 11 files changed, 131 insertions(+), 101 deletions(-) diff --git a/bin/delv/delv.c b/bin/delv/delv.c index 26adc81266..3a95d3f93d 100644 --- a/bin/delv/delv.c +++ b/bin/delv/delv.c @@ -2142,7 +2142,7 @@ run_server(void *arg) { ns_server_create(mctx, matchview, &sctx); - CHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); + CHECK(dns_dispatchmgr_create(mctx, loopmgr, netmgr, &dispatchmgr)); isc_sockaddr_any(&any); CHECK(dns_dispatch_createudp(dispatchmgr, &any, &dispatch)); CHECK(ns_interfacemgr_create(mctx, sctx, loopmgr, netmgr, dispatchmgr, diff --git a/bin/named/server.c b/bin/named/server.c index bd3ca9bb0e..2831951ac8 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -1312,8 +1312,8 @@ get_view_querysource_dispatch(const cfg_obj_t **maps, int af, isc_sockaddr_format(&sa, buf, sizeof(buf)); isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL, NAMED_LOGMODULE_SERVER, ISC_LOG_ERROR, - "could not get query source dispatcher (%s)", - buf); + "could not get query source dispatcher (%s): %s", + buf, isc_result_totext(result)); return (result); } @@ -9897,8 +9897,8 @@ run_server(void *arg) { dns_zonemgr_create(named_g_mctx, named_g_loopmgr, named_g_netmgr, &server->zonemgr); - CHECKFATAL(dns_dispatchmgr_create(named_g_mctx, named_g_netmgr, - &named_g_dispatchmgr), + CHECKFATAL(dns_dispatchmgr_create(named_g_mctx, named_g_loopmgr, + named_g_netmgr, &named_g_dispatchmgr), "creating dispatch manager"); dns_dispatchmgr_setstats(named_g_dispatchmgr, server->resolverstats); diff --git a/bin/nsupdate/nsupdate.c b/bin/nsupdate/nsupdate.c index 5746933935..fc42badc64 100644 --- a/bin/nsupdate/nsupdate.c +++ b/bin/nsupdate/nsupdate.c @@ -919,7 +919,7 @@ setup_system(void *arg ISC_ATTR_UNUSED) { irs_resconf_destroy(&resconf); - result = dns_dispatchmgr_create(gmctx, netmgr, &dispatchmgr); + result = dns_dispatchmgr_create(gmctx, loopmgr, netmgr, &dispatchmgr); check_result(result, "dns_dispatchmgr_create"); result = dst_lib_init(gmctx, NULL); diff --git a/bin/tests/system/pipelined/pipequeries.c b/bin/tests/system/pipelined/pipequeries.c index bfe0c1b84a..ceb26dac84 100644 --- a/bin/tests/system/pipelined/pipequeries.c +++ b/bin/tests/system/pipelined/pipequeries.c @@ -275,7 +275,7 @@ main(int argc, char *argv[]) { RUNCHECK(dst_lib_init(mctx, NULL)); - RUNCHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); + RUNCHECK(dns_dispatchmgr_create(mctx, loopmgr, netmgr, &dispatchmgr)); RUNCHECK(dns_dispatch_createudp( dispatchmgr, have_src ? &srcaddr : &bind_any, &dispatchv4)); diff --git a/bin/tools/mdig.c b/bin/tools/mdig.c index 6b2facf09b..4de30ab65f 100644 --- a/bin/tools/mdig.c +++ b/bin/tools/mdig.c @@ -2085,7 +2085,7 @@ teardown(void *arg ISC_ATTR_UNUSED) { static void setup(void *arg ISC_ATTR_UNUSED) { - RUNCHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); + RUNCHECK(dns_dispatchmgr_create(mctx, loopmgr, netmgr, &dispatchmgr)); set_source_ports(dispatchmgr); diff --git a/lib/dns/client.c b/lib/dns/client.c index a8100f7ab6..486cda4af7 100644 --- a/lib/dns/client.c +++ b/lib/dns/client.c @@ -252,7 +252,8 @@ dns_client_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, isc_nm_t *nm, .nm = nm, }; - result = dns_dispatchmgr_create(mctx, nm, &client->dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, nm, + &client->dispatchmgr); if (result != ISC_R_SUCCESS) { goto cleanup_client; } diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index 38b6577ed7..0807666868 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -55,9 +55,9 @@ struct dns_dispatchmgr { isc_stats_t *stats; isc_nm_t *nm; - /* Locked by "lock". */ - isc_mutex_t lock; - ISC_LIST(dns_dispatch_t) list; + uint32_t nloops; + + struct cds_lfht **tcps; struct cds_lfht *qids; @@ -109,15 +109,13 @@ struct dns_dispatch { /* Unlocked. */ unsigned int magic; /*%< magic */ uint32_t tid; + isc_mem_t *mctx; dns_dispatchmgr_t *mgr; /*%< dispatch manager */ isc_nmhandle_t *handle; /*%< netmgr handle for TCP connection */ isc_sockaddr_t local; /*%< local address */ in_port_t localport; /*%< local UDP port */ isc_sockaddr_t peer; /*%< peer address (TCP) */ - /*% Locked by mgr->lock. */ - ISC_LINK(dns_dispatch_t) link; - isc_socktype_t socktype; dns_dispatchstate_t state; isc_refcount_t references; @@ -130,6 +128,9 @@ struct dns_dispatch { uint_fast32_t requests; /*%< how many requests we have */ unsigned int timedout; + + struct cds_lfht_node ht_node; + struct rcu_head rcu_head; }; #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p') @@ -339,7 +340,6 @@ dispentry_log(dns_dispentry_t *resp, int level, const char *fmt, ...) { /*% * Choose a random port number for a dispatch entry. - * The caller must hold the disp->lock */ static isc_result_t setup_socket(dns_dispatch_t *disp, dns_dispentry_t *resp, @@ -957,7 +957,7 @@ setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset, */ isc_result_t -dns_dispatchmgr_create(isc_mem_t *mctx, isc_nm_t *nm, +dns_dispatchmgr_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, isc_nm_t *nm, dns_dispatchmgr_t **mgrp) { dns_dispatchmgr_t *mgr = NULL; isc_portset_t *v4portset = NULL; @@ -967,7 +967,10 @@ dns_dispatchmgr_create(isc_mem_t *mctx, isc_nm_t *nm, REQUIRE(mgrp != NULL && *mgrp == NULL); mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t)); - *mgr = (dns_dispatchmgr_t){ .magic = 0 }; + *mgr = (dns_dispatchmgr_t){ + .magic = 0, + .nloops = isc_loopmgr_nloops(loopmgr), + }; #if DNS_DISPATCH_TRACE fprintf(stderr, "dns_dispatchmgr__init:%s:%s:%d:%p->references = 1\n", @@ -978,9 +981,12 @@ dns_dispatchmgr_create(isc_mem_t *mctx, isc_nm_t *nm, isc_mem_attach(mctx, &mgr->mctx); isc_nm_attach(nm, &mgr->nm); - isc_mutex_init(&mgr->lock); - - ISC_LIST_INIT(mgr->list); + mgr->tcps = isc_mem_cget(mgr->mctx, mgr->nloops, sizeof(mgr->tcps[0])); + for (size_t i = 0; i < mgr->nloops; i++) { + mgr->tcps[i] = cds_lfht_new( + 2, 2, 0, CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, + NULL); + } create_default_portset(mgr->mctx, AF_INET, &v4portset); create_default_portset(mgr->mctx, AF_INET6, &v6portset); @@ -1035,10 +1041,14 @@ dispatchmgr_destroy(dns_dispatchmgr_t *mgr) { isc_refcount_destroy(&mgr->references); mgr->magic = 0; - isc_mutex_destroy(&mgr->lock); RUNTIME_CHECK(!cds_lfht_destroy(mgr->qids, NULL)); + for (size_t i = 0; i < mgr->nloops; i++) { + RUNTIME_CHECK(!cds_lfht_destroy(mgr->tcps[i], NULL)); + } + isc_mem_cput(mgr->mctx, mgr->tcps, mgr->nloops, sizeof(mgr->tcps[0])); + if (mgr->blackhole != NULL) { dns_acl_detach(&mgr->blackhole); } @@ -1064,7 +1074,6 @@ dispatchmgr_destroy(dns_dispatchmgr_t *mgr) { void dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) { REQUIRE(VALID_DISPATCHMGR(mgr)); - REQUIRE(ISC_LIST_EMPTY(mgr->list)); REQUIRE(mgr->stats == NULL); isc_stats_attach(stats, &mgr->stats); @@ -1089,13 +1098,14 @@ dispatch_allocate(dns_dispatchmgr_t *mgr, isc_socktype_t type, uint32_t tid, disp = isc_mem_get(mgr->mctx, sizeof(*disp)); *disp = (dns_dispatch_t){ .socktype = type, - .link = ISC_LINK_INITIALIZER, .active = ISC_LIST_INITIALIZER, .pending = ISC_LIST_INITIALIZER, .tid = tid, .magic = DISPATCH_MAGIC, }; + isc_mem_attach(mgr->mctx, &disp->mctx); + dns_dispatchmgr_attach(mgr, &disp->mgr); #if DNS_DISPATCH_TRACE fprintf(stderr, "dns_dispatch__init:%s:%s:%d:%p->references = 1\n", @@ -1106,17 +1116,50 @@ dispatch_allocate(dns_dispatchmgr_t *mgr, isc_socktype_t type, uint32_t tid, *dispp = disp; } +struct dispatch_key { + const isc_sockaddr_t *local; + const isc_sockaddr_t *peer; +}; + +static uint32_t +dispatch_hash(struct dispatch_key *key) { + uint32_t hashval = isc_sockaddr_hash(key->peer, false); + if (key->local) { + hashval ^= isc_sockaddr_hash(key->local, true); + } + + return (hashval); +} + +static int +dispatch_match(struct cds_lfht_node *node, const void *key0) { + dns_dispatch_t *disp = caa_container_of(node, dns_dispatch_t, ht_node); + const struct dispatch_key *key = key0; + isc_sockaddr_t local; + isc_sockaddr_t peer; + + if (disp->handle != NULL) { + local = isc_nmhandle_localaddr(disp->handle); + peer = isc_nmhandle_peeraddr(disp->handle); + } else { + local = disp->local; + peer = disp->peer; + } + + return (isc_sockaddr_equal(&peer, key->peer) && + (key->local == NULL || isc_sockaddr_equal(&local, key->local))); +} + isc_result_t dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, const isc_sockaddr_t *destaddr, dns_dispatch_t **dispp) { dns_dispatch_t *disp = NULL; + uint32_t tid = isc_tid(); REQUIRE(VALID_DISPATCHMGR(mgr)); REQUIRE(destaddr != NULL); - LOCK(&mgr->lock); - - dispatch_allocate(mgr, isc_socktype_tcp, isc_tid(), &disp); + dispatch_allocate(mgr, isc_socktype_tcp, tid, &disp); disp->peer = *destaddr; @@ -1132,10 +1175,14 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, /* * Append it to the dispatcher list. */ + struct dispatch_key key = { + .local = &disp->local, + .peer = &disp->peer, + }; - /* FIXME: There should be a lookup hashtable here */ - ISC_LIST_APPEND(mgr->list, disp, link); - UNLOCK(&mgr->lock); + rcu_read_lock(); + cds_lfht_add(mgr->tcps[tid], dispatch_hash(&key), &disp->ht_node); + rcu_read_unlock(); if (isc_log_wouldlog(dns_lctx, 90)) { char addrbuf[ISC_SOCKADDR_FORMATSIZE]; @@ -1159,46 +1206,25 @@ dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, dns_dispatch_t *disp_connected = NULL; dns_dispatch_t *disp_fallback = NULL; isc_result_t result = ISC_R_NOTFOUND; + uint32_t tid = isc_tid(); REQUIRE(VALID_DISPATCHMGR(mgr)); REQUIRE(destaddr != NULL); REQUIRE(dispp != NULL && *dispp == NULL); - LOCK(&mgr->lock); + struct dispatch_key key = { + .local = localaddr, + .peer = destaddr, + }; - for (dns_dispatch_t *disp = ISC_LIST_HEAD(mgr->list); disp != NULL; - disp = ISC_LIST_NEXT(disp, link)) - { - isc_sockaddr_t sockname; - isc_sockaddr_t peeraddr; - - if (disp->tid != isc_tid()) { - continue; - } - - REQUIRE(disp->tid == isc_tid()); - - if (disp->handle != NULL) { - sockname = isc_nmhandle_localaddr(disp->handle); - peeraddr = isc_nmhandle_peeraddr(disp->handle); - } else { - sockname = disp->local; - peeraddr = disp->peer; - } - - /* - * The conditions match: - * 1. socktype is TCP - * 2. destination address is same - * 3. local address is either NULL or same - */ - if (disp->socktype != isc_socktype_tcp || - !isc_sockaddr_equal(destaddr, &peeraddr) || - (localaddr != NULL && - !isc_sockaddr_eqaddr(localaddr, &sockname))) - { - continue; - } + rcu_read_lock(); + struct cds_lfht_iter iter; + dns_dispatch_t *disp = NULL; + cds_lfht_for_each_entry_duplicate(mgr->tcps[tid], dispatch_hash(&key), + dispatch_match, &key, &iter, disp, + ht_node) { + INSIST(disp->tid == isc_tid()); + INSIST(disp->socktype == isc_socktype_tcp); switch (disp->state) { case DNS_DISPATCHSTATE_NONE: @@ -1233,6 +1259,7 @@ dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, break; } } + rcu_read_unlock(); if (disp_connected != NULL) { /* We found connected dispatch */ @@ -1252,8 +1279,6 @@ dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, result = ISC_R_SUCCESS; } - UNLOCK(&mgr->lock); - return (result); } @@ -1267,12 +1292,10 @@ dns_dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, REQUIRE(localaddr != NULL); REQUIRE(dispp != NULL && *dispp == NULL); - LOCK(&mgr->lock); result = dispatch_createudp(mgr, localaddr, isc_tid(), &disp); if (result == ISC_R_SUCCESS) { *dispp = disp; } - UNLOCK(&mgr->lock); return (result); } @@ -1321,25 +1344,30 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, return (result); } +static void +dispatch_destroy_rcu(struct rcu_head *rcu_head) { + dns_dispatch_t *disp = caa_container_of(rcu_head, dns_dispatch_t, + rcu_head); + + isc_mem_putanddetach(&disp->mctx, disp, sizeof(*disp)); +} + static void dispatch_destroy(dns_dispatch_t *disp) { dns_dispatchmgr_t *mgr = disp->mgr; + uint32_t tid = isc_tid(); isc_refcount_destroy(&disp->references); disp->magic = 0; - LOCK(&mgr->lock); - if (ISC_LINK_LINKED(disp, link)) { - ISC_LIST_UNLINK(disp->mgr->list, disp, link); + if (disp->socktype == isc_socktype_tcp) { + (void)cds_lfht_del(mgr->tcps[tid], &disp->ht_node); } - UNLOCK(&mgr->lock); INSIST(disp->requests == 0); INSIST(ISC_LIST_EMPTY(disp->pending)); INSIST(ISC_LIST_EMPTY(disp->active)); - INSIST(!ISC_LINK_LINKED(disp, link)); - dispatch_log(disp, LVL(90), "destroying dispatch %p", disp); if (disp->handle) { @@ -1347,14 +1375,9 @@ dispatch_destroy(dns_dispatch_t *disp) { disp->handle, &disp->handle); isc_nmhandle_detach(&disp->handle); } + dns_dispatchmgr_detach(&disp->mgr); - isc_mem_put(mgr->mctx, disp, sizeof(*disp)); - - /* - * Because dispatch uses mgr->mctx, we must detach after freeing - * dispatch, not before. - */ - dns_dispatchmgr_detach(&mgr); + call_rcu(&disp->rcu_head, dispatch_destroy_rcu); } #if DNS_DISPATCH_TRACE @@ -1394,7 +1417,7 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, localport = isc_sockaddr_getport(&disp->local); - resp = isc_mem_get(disp->mgr->mctx, sizeof(*resp)); + resp = isc_mem_get(disp->mctx, sizeof(*resp)); *resp = (dns_dispentry_t){ .port = localport, .timeout = timeout, @@ -1419,7 +1442,7 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, if (disp->socktype == isc_socktype_udp) { result = setup_socket(disp, resp, dest, &localport); if (result != ISC_R_SUCCESS) { - isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp)); + isc_mem_put(disp->mctx, resp, sizeof(*resp)); inc_stats(disp->mgr, dns_resstatscounter_dispsockfail); return (result); } @@ -1456,11 +1479,11 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, } while (i++ < QID_MAX_TRIES); fail: if (result != ISC_R_SUCCESS) { - isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp)); + isc_mem_put(disp->mctx, resp, sizeof(*resp)); return (ISC_R_NOMORE); } - isc_mem_attach(disp->mgr->mctx, &resp->mctx); + isc_mem_attach(disp->mctx, &resp->mctx); if (transport != NULL) { dns_transport_attach(transport, &resp->transport); @@ -1907,7 +1930,7 @@ tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { result = dns_transport_get_tlsctx( resp->transport, &resp->peer, resp->tlsctx_cache, - resp->disp->mgr->mctx, &tlsctx, &sess_cache); + resp->mctx, &tlsctx, &sess_cache); if (result != ISC_R_SUCCESS) { return (result); @@ -2184,7 +2207,6 @@ dns_dispatchset_create(isc_mem_t *mctx, dns_dispatch_t *source, dset->dispatches[0] = NULL; dns_dispatch_attach(source, &dset->dispatches[0]); /* DISPATCH004 */ - LOCK(&mgr->lock); for (i = 1; i < dset->ndisp; i++) { result = dispatch_createudp(mgr, &source->local, i, &dset->dispatches[i]); @@ -2193,14 +2215,11 @@ dns_dispatchset_create(isc_mem_t *mctx, dns_dispatch_t *source, } } - UNLOCK(&mgr->lock); *dsetp = dset; return (ISC_R_SUCCESS); fail: - UNLOCK(&mgr->lock); - for (size_t j = 0; j < i; j++) { dns_dispatch_detach(&(dset->dispatches[j])); /* DISPATCH004 */ } diff --git a/lib/dns/include/dns/dispatch.h b/lib/dns/include/dns/dispatch.h index 0813224f8b..0e302f288e 100644 --- a/lib/dns/include/dns/dispatch.h +++ b/lib/dns/include/dns/dispatch.h @@ -77,7 +77,8 @@ struct dns_dispatchset { #define DNS_DISPATCHOPT_FIXEDID 0x00000001U isc_result_t -dns_dispatchmgr_create(isc_mem_t *mctx, isc_nm_t *nm, dns_dispatchmgr_t **mgrp); +dns_dispatchmgr_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, isc_nm_t *nm, + dns_dispatchmgr_t **mgrp); /*%< * Creates a new dispatchmgr object, and sets the available ports * to the default range (1024-65535). diff --git a/tests/dns/dispatch_test.c b/tests/dns/dispatch_test.c index 150ad405ac..bce2565d82 100644 --- a/tests/dns/dispatch_test.c +++ b/tests/dns/dispatch_test.c @@ -245,7 +245,8 @@ ISC_LOOP_TEST_IMPL(dispatchset_create) { UNUSED(arg); - result = dns_dispatchmgr_create(mctx, connect_nm, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, connect_nm, + &dispatchmgr); assert_int_equal(result, ISC_R_SUCCESS); result = make_dispatchset(1, &dset); @@ -270,7 +271,8 @@ ISC_LOOP_TEST_IMPL(dispatchset_get) { UNUSED(arg); - result = dns_dispatchmgr_create(mctx, connect_nm, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, connect_nm, + &dispatchmgr); assert_int_equal(result, ISC_R_SUCCESS); result = make_dispatchset(1, &dset); @@ -463,7 +465,8 @@ ISC_LOOP_TEST_IMPL(dispatch_timeout_tcp_connect) { testdata.region.base = testdata.message; testdata.region.length = sizeof(testdata.message); - result = dns_dispatchmgr_create(mctx, connect_nm, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, connect_nm, + &dispatchmgr); assert_int_equal(result, ISC_R_SUCCESS); result = dns_dispatch_createtcp(dispatchmgr, &tcp_connect_addr, @@ -508,7 +511,8 @@ ISC_LOOP_TEST_IMPL(dispatch_timeout_tcp_response) { isc_loop_teardown(isc_loop_main(loopmgr), stop_listening, sock); /* Client */ - result = dns_dispatchmgr_create(mctx, connect_nm, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, connect_nm, + &dispatchmgr); assert_int_equal(result, ISC_R_SUCCESS); result = dns_dispatch_createtcp(dispatchmgr, &tcp_connect_addr, @@ -542,7 +546,8 @@ ISC_LOOP_TEST_IMPL(dispatch_tcp_response) { testdata.region.base = testdata.message; testdata.region.length = sizeof(testdata.message); - result = dns_dispatchmgr_create(mctx, connect_nm, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, connect_nm, + &dispatchmgr); assert_int_equal(result, ISC_R_SUCCESS); result = dns_dispatch_createtcp(dispatchmgr, &tcp_connect_addr, @@ -579,7 +584,8 @@ ISC_LOOP_TEST_IMPL(dispatch_tls_response) { testdata.region.base = testdata.message; testdata.region.length = sizeof(testdata.message); - result = dns_dispatchmgr_create(mctx, connect_nm, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, connect_nm, + &dispatchmgr); assert_int_equal(result, ISC_R_SUCCESS); result = dns_dispatch_createtcp(dispatchmgr, &tls_connect_addr, @@ -606,7 +612,8 @@ ISC_LOOP_TEST_IMPL(dispatch_timeout_udp_response) { uint16_t id; /* Server */ - result = dns_dispatchmgr_create(mctx, connect_nm, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, connect_nm, + &dispatchmgr); assert_int_equal(result, ISC_R_SUCCESS); result = isc_nm_listenudp(netmgr, ISC_NM_LISTEN_ONE, &udp_server_addr, @@ -648,7 +655,8 @@ ISC_LOOP_TEST_IMPL(dispatch_getnext) { testdata.region.base = testdata.message; testdata.region.length = sizeof(testdata.message); - result = dns_dispatchmgr_create(mctx, connect_nm, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, connect_nm, + &dispatchmgr); assert_int_equal(result, ISC_R_SUCCESS); result = dns_dispatch_createudp(dispatchmgr, &udp_connect_addr, diff --git a/tests/libtest/dns.c b/tests/libtest/dns.c index d50c012caf..73fb630807 100644 --- a/tests/libtest/dns.c +++ b/tests/libtest/dns.c @@ -68,7 +68,8 @@ dns_test_makeview(const char *name, bool with_dispatchmgr, bool with_cache, dns_dispatchmgr_t *dispatchmgr = NULL; if (with_dispatchmgr) { - result = dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, netmgr, + &dispatchmgr); if (result != ISC_R_SUCCESS) { return (result); } diff --git a/tests/libtest/ns.c b/tests/libtest/ns.c index 19b2f67699..4c49474ccc 100644 --- a/tests/libtest/ns.c +++ b/tests/libtest/ns.c @@ -85,7 +85,7 @@ setup_server(void **state) { ns_server_create(mctx, matchview, &sctx); - result = dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, netmgr, &dispatchmgr); if (result != ISC_R_SUCCESS) { goto cleanup; } From aa0971027caddd84387012e873e2918ddb52bab0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Fri, 15 Sep 2023 15:59:28 +0200 Subject: [PATCH 4/5] Cleanup unused .localport member of dns_dispatch_t and some macros The .localport member of dns_dispatch_t structure was unused, clean it up. Cleanup unused and/or redundant macros. --- lib/dns/dispatch.c | 150 +++++++++++++++++++++++---------------------- 1 file changed, 76 insertions(+), 74 deletions(-) diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index 0807666868..3ba6d91b32 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -109,16 +109,15 @@ struct dns_dispatch { /* Unlocked. */ unsigned int magic; /*%< magic */ uint32_t tid; + isc_socktype_t socktype; + isc_refcount_t references; isc_mem_t *mctx; dns_dispatchmgr_t *mgr; /*%< dispatch manager */ isc_nmhandle_t *handle; /*%< netmgr handle for TCP connection */ isc_sockaddr_t local; /*%< local address */ - in_port_t localport; /*%< local UDP port */ isc_sockaddr_t peer; /*%< peer address (TCP) */ - isc_socktype_t socktype; dns_dispatchstate_t state; - isc_refcount_t references; bool reading; @@ -136,9 +135,6 @@ struct dns_dispatch { #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p') #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC) -#define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c') -#define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC) - #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p') #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC) @@ -199,8 +195,6 @@ tcp_dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp, static void udp_dispatch_getnext(dns_dispentry_t *resp, int32_t timeout); -#define LVL(x) ISC_LOG_DEBUG(x) - static const char * socktype2str(dns_dispentry_t *resp) { dns_transport_type_t transport_type = DNS_TRANSPORT_UDP; @@ -425,11 +419,12 @@ dispentry_destroy(dns_dispentry_t *resp) { INSIST(!ISC_LINK_LINKED(resp, alink)); INSIST(!ISC_LINK_LINKED(resp, rlink)); - dispentry_log(resp, LVL(90), "destroying"); + dispentry_log(resp, ISC_LOG_DEBUG(90), "destroying"); if (resp->handle != NULL) { - dispentry_log(resp, LVL(90), "detaching handle %p from %p", - resp->handle, &resp->handle); + dispentry_log(resp, ISC_LOG_DEBUG(90), + "detaching handle %p from %p", resp->handle, + &resp->handle); isc_nmhandle_detach(&resp->handle); } @@ -513,7 +508,8 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, eresult = ISC_R_CANCELED; } - dispentry_log(resp, LVL(90), "read callback:%s, requests %" PRIuFAST32, + dispentry_log(resp, ISC_LOG_DEBUG(90), + "read callback:%s, requests %" PRIuFAST32, isc_result_totext(eresult), disp->requests); if (eresult != ISC_R_SUCCESS) { @@ -537,11 +533,11 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, NULL) == ISC_R_SUCCESS && match > 0) { - if (isc_log_wouldlog(dns_lctx, LVL(10))) { + if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(10))) { char netaddrstr[ISC_NETADDR_FORMATSIZE]; isc_netaddr_format(&netaddr, netaddrstr, sizeof(netaddrstr)); - dispentry_log(resp, LVL(10), + dispentry_log(resp, ISC_LOG_DEBUG(10), "blackholed packet from %s", netaddrstr); } goto next; @@ -557,12 +553,12 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, if (dres != ISC_R_SUCCESS) { char netaddrstr[ISC_NETADDR_FORMATSIZE]; isc_netaddr_format(&netaddr, netaddrstr, sizeof(netaddrstr)); - dispentry_log(resp, LVL(10), "got garbage packet from %s", - netaddrstr); + dispentry_log(resp, ISC_LOG_DEBUG(10), + "got garbage packet from %s", netaddrstr); goto next; } - dispentry_log(resp, LVL(92), + dispentry_log(resp, ISC_LOG_DEBUG(92), "got valid DNS message header, /QR %c, id %u", (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id); @@ -577,7 +573,8 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, * The QID and the address must match the expected ones. */ if (resp->id != id || !isc_sockaddr_equal(&peer, &resp->peer)) { - dispentry_log(resp, LVL(90), "response doesn't match"); + dispentry_log(resp, ISC_LOG_DEBUG(90), + "response doesn't match"); inc_stats(disp->mgr, dns_resstatscounter_mismatch); goto next; } @@ -613,8 +610,9 @@ next: done: if (respond) { - dispentry_log(resp, LVL(90), "UDP read callback on %p: %s", - handle, isc_result_totext(eresult)); + dispentry_log(resp, ISC_LOG_DEBUG(90), + "UDP read callback on %p: %s", handle, + isc_result_totext(eresult)); resp->response(eresult, region, resp->arg); } @@ -646,7 +644,8 @@ tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, unsigned int flags; isc_result_t result = ISC_R_SUCCESS; - dispatch_log(disp, LVL(90), "TCP read success, length == %d, addr = %p", + dispatch_log(disp, ISC_LOG_DEBUG(90), + "TCP read success, length == %d, addr = %p", region->length, region->base); /* @@ -656,11 +655,11 @@ tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, isc_buffer_add(&source, region->length); result = dns_message_peekheader(&source, &id, &flags); if (result != ISC_R_SUCCESS) { - dispatch_log(disp, LVL(10), "got garbage packet"); + dispatch_log(disp, ISC_LOG_DEBUG(10), "got garbage packet"); return (ISC_R_UNEXPECTED); } - dispatch_log(disp, LVL(92), + dispatch_log(disp, ISC_LOG_DEBUG(92), "got valid DNS message header, /QR %c, id %u", (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id); @@ -669,7 +668,8 @@ tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, * reading. */ if ((flags & DNS_MESSAGEFLAG_QR) == 0) { - dispatch_log(disp, LVL(10), "got DNS query instead of answer"); + dispatch_log(disp, ISC_LOG_DEBUG(10), + "got DNS query instead of answer"); return (ISC_R_UNEXPECTED); } @@ -680,7 +680,7 @@ tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, dns_dispentry_t key = { .id = id, .peer = *peer, - .port = disp->localport, + .port = isc_sockaddr_getport(&disp->local), }; struct cds_lfht_iter iter; cds_lfht_lookup(disp->mgr->qids, qid_hash(&key), qid_match, &key, @@ -701,7 +701,8 @@ tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, } else { result = ISC_R_NOTFOUND; } - dispatch_log(disp, LVL(90), "search for response in hashtable: %s", + dispatch_log(disp, ISC_LOG_DEBUG(90), + "search for response in hashtable: %s", isc_result_totext(result)); return (result); @@ -741,7 +742,7 @@ tcp_recv_processall(dns_displist_t *resps, isc_region_t *region) { next = ISC_LIST_NEXT(resp, rlink); ISC_LIST_UNLINK(*resps, resp, rlink); - dispentry_log(resp, LVL(90), "read callback: %s", + dispentry_log(resp, ISC_LOG_DEBUG(90), "read callback: %s", isc_result_totext(resp->result)); resp->response(resp->result, region, resp->arg); dns_dispentry_detach(&resp); /* DISPENTRY009 */ @@ -778,7 +779,8 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, INSIST(disp->reading); disp->reading = false; - dispatch_log(disp, LVL(90), "TCP read:%s:requests %" PRIuFAST32, + dispatch_log(disp, ISC_LOG_DEBUG(90), + "TCP read:%s:requests %" PRIuFAST32, isc_result_totext(result), disp->requests); peer = isc_nmhandle_peeraddr(handle); @@ -854,7 +856,8 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, case ISC_R_EOF: case ISC_R_CONNECTIONRESET: isc_sockaddr_format(&peer, buf, sizeof(buf)); - dispatch_log(disp, LVL(90), "shutting down TCP: %s: %s", buf, + dispatch_log(disp, ISC_LOG_DEBUG(90), + "shutting down TCP: %s: %s", buf, isc_result_totext(result)); tcp_recv_shutdown(disp, &resps, result); break; @@ -1190,7 +1193,7 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, isc_sockaddr_format(&disp->local, addrbuf, ISC_SOCKADDR_FORMATSIZE); - mgr_log(mgr, LVL(90), + mgr_log(mgr, ISC_LOG_DEBUG(90), "dns_dispatch_createtcp: created TCP dispatch %p for " "%s", disp, addrbuf); @@ -1325,7 +1328,7 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, isc_sockaddr_format(localaddr, addrbuf, ISC_SOCKADDR_FORMATSIZE); - mgr_log(mgr, LVL(90), + mgr_log(mgr, ISC_LOG_DEBUG(90), "dispatch_createudp: created UDP dispatch %p for %s", disp, addrbuf); } @@ -1368,11 +1371,12 @@ dispatch_destroy(dns_dispatch_t *disp) { INSIST(ISC_LIST_EMPTY(disp->pending)); INSIST(ISC_LIST_EMPTY(disp->active)); - dispatch_log(disp, LVL(90), "destroying dispatch %p", disp); + dispatch_log(disp, ISC_LOG_DEBUG(90), "destroying dispatch %p", disp); if (disp->handle) { - dispatch_log(disp, LVL(90), "detaching TCP handle %p from %p", - disp->handle, &disp->handle); + dispatch_log(disp, ISC_LOG_DEBUG(90), + "detaching TCP handle %p from %p", disp->handle, + &disp->handle); isc_nmhandle_detach(&disp->handle); } dns_dispatchmgr_detach(&disp->mgr); @@ -1393,11 +1397,6 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, dispatch_cb_t connected, dispatch_cb_t sent, dispatch_cb_t response, void *arg, dns_messageid_t *idp, dns_dispentry_t **respp) { - dns_dispentry_t *resp = NULL; - in_port_t localport; - int i = 0; - isc_result_t result = ISC_R_UNSET; - REQUIRE(VALID_DISPATCH(disp)); REQUIRE(dest != NULL); REQUIRE(respp != NULL && *respp == NULL); @@ -1408,18 +1407,15 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, REQUIRE(response != NULL); REQUIRE(sent != NULL); REQUIRE(loop != NULL); - REQUIRE(disp->tid == isc_tid()); if (disp->state == DNS_DISPATCHSTATE_CANCELED) { return (ISC_R_CANCELED); } - localport = isc_sockaddr_getport(&disp->local); - - resp = isc_mem_get(disp->mctx, sizeof(*resp)); + in_port_t localport = isc_sockaddr_getport(&disp->local); + dns_dispentry_t *resp = isc_mem_get(disp->mctx, sizeof(*resp)); *resp = (dns_dispentry_t){ - .port = localport, .timeout = timeout, .peer = *dest, .loop = loop, @@ -1440,7 +1436,8 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, isc_refcount_init(&resp->references, 1); /* DISPENTRY000 */ if (disp->socktype == isc_socktype_udp) { - result = setup_socket(disp, resp, dest, &localport); + isc_result_t result = setup_socket(disp, resp, dest, + &localport); if (result != ISC_R_SUCCESS) { isc_mem_put(disp->mctx, resp, sizeof(*resp)); inc_stats(disp->mgr, dns_resstatscounter_dispsockfail); @@ -1448,6 +1445,8 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, } } + isc_result_t result = ISC_R_NOMORE; + size_t i = 0; rcu_read_lock(); do { /* @@ -1464,7 +1463,6 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, qid_match, resp, &resp->ht_node); if (node != &resp->ht_node) { - result = ISC_R_EXISTS; if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) { /* * When using fixed ID, we either must @@ -1480,7 +1478,7 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, fail: if (result != ISC_R_SUCCESS) { isc_mem_put(disp->mctx, resp, sizeof(*resp)); - return (ISC_R_NOMORE); + return (result); } isc_mem_attach(disp->mctx, &resp->mctx); @@ -1518,7 +1516,7 @@ dns_dispatch_getnext(dns_dispentry_t *resp) { isc_result_t result = ISC_R_SUCCESS; int32_t timeout = -1; - dispentry_log(resp, LVL(90), "getnext for QID %d", resp->id); + dispentry_log(resp, ISC_LOG_DEBUG(90), "getnext for QID %d", resp->id); isc_time_t now = isc_loop_now(resp->loop); timeout = resp->timeout - dispentry_runtime(resp, &now); @@ -1554,7 +1552,7 @@ udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { bool respond = false; REQUIRE(disp->tid == isc_tid()); - dispentry_log(resp, LVL(90), + dispentry_log(resp, ISC_LOG_DEBUG(90), "canceling response: %s, %s/%s (%s/%s), " "requests %" PRIuFAST32, isc_result_totext(result), state2str(resp->state), @@ -1577,8 +1575,8 @@ udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { case DNS_DISPATCHSTATE_CONNECTED: if (resp->reading) { respond = true; - dispentry_log(resp, LVL(90), "canceling read on %p", - resp->handle); + dispentry_log(resp, ISC_LOG_DEBUG(90), + "canceling read on %p", resp->handle); isc_nm_cancelread(resp->handle); } break; @@ -1598,7 +1596,7 @@ udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { unlock: if (respond) { - dispentry_log(resp, LVL(90), "read callback: %s", + dispentry_log(resp, ISC_LOG_DEBUG(90), "read callback: %s", isc_result_totext(result)); resp->response(result, NULL, resp->arg); } @@ -1617,7 +1615,7 @@ tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { dns_displist_t resps = ISC_LIST_INITIALIZER; REQUIRE(disp->tid == isc_tid()); - dispentry_log(resp, LVL(90), + dispentry_log(resp, ISC_LOG_DEBUG(90), "canceling response: %s, %s/%s (%s/%s), " "requests %" PRIuFAST32, isc_result_totext(result), state2str(resp->state), @@ -1664,14 +1662,14 @@ tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { isc_nmhandle_settimeout(disp->handle, 1000); if (!disp->reading) { - dispentry_log(resp, LVL(90), + dispentry_log(resp, ISC_LOG_DEBUG(90), "final 1 second timeout on %p", disp->handle); tcp_startrecv(disp, NULL); } #else if (disp->reading) { - dispentry_log(resp, LVL(90), + dispentry_log(resp, ISC_LOG_DEBUG(90), "canceling read on %p", disp->handle); isc_nm_cancelread(disp->handle); @@ -1742,11 +1740,11 @@ static void udp_startrecv(isc_nmhandle_t *handle, dns_dispentry_t *resp) { REQUIRE(VALID_RESPONSE(resp)); - dispentry_log(resp, LVL(90), "attaching handle %p to %p", handle, - &resp->handle); + dispentry_log(resp, ISC_LOG_DEBUG(90), "attaching handle %p to %p", + handle, &resp->handle); isc_nmhandle_attach(handle, &resp->handle); dns_dispentry_ref(resp); /* DISPENTRY003 */ - dispentry_log(resp, LVL(90), "reading"); + dispentry_log(resp, ISC_LOG_DEBUG(90), "reading"); isc_nm_read(resp->handle, udp_recv, resp); resp->reading = true; } @@ -1758,10 +1756,11 @@ tcp_startrecv(dns_dispatch_t *disp, dns_dispentry_t *resp) { dns_dispatch_ref(disp); /* DISPATCH002 */ if (resp != NULL) { - dispentry_log(resp, LVL(90), "reading from %p", disp->handle); + dispentry_log(resp, ISC_LOG_DEBUG(90), "reading from %p", + disp->handle); INSIST(!isc_time_isepoch(&resp->start)); } else { - dispatch_log(disp, LVL(90), + dispatch_log(disp, ISC_LOG_DEBUG(90), "TCP reading without response from %p", disp->handle); } @@ -1794,8 +1793,9 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { ISC_SOCKADDR_FORMATSIZE); } - dispatch_log(disp, LVL(90), "connected from %s to %s: %s", - localbuf, peerbuf, isc_result_totext(eresult)); + dispatch_log(disp, ISC_LOG_DEBUG(90), + "connected from %s to %s: %s", localbuf, peerbuf, + isc_result_totext(eresult)); } REQUIRE(disp->tid == isc_tid()); @@ -1817,7 +1817,7 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { resp->state = DNS_DISPATCHSTATE_CONNECTED; ISC_LIST_APPEND(disp->active, resp, alink); resp->reading = true; - dispentry_log(resp, LVL(90), "start reading"); + dispentry_log(resp, ISC_LOG_DEBUG(90), "start reading"); } else { resp->state = DNS_DISPATCHSTATE_NONE; } @@ -1838,7 +1838,7 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { next = ISC_LIST_NEXT(resp, rlink); ISC_LIST_UNLINK(resps, resp, rlink); - dispentry_log(resp, LVL(90), "connect callback: %s", + dispentry_log(resp, ISC_LOG_DEBUG(90), "connect callback: %s", isc_result_totext(resp->result)); resp->connected(resp->result, NULL, resp->arg); dns_dispentry_detach(&resp); /* DISPENTRY005 */ @@ -1852,7 +1852,7 @@ udp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { dns_dispentry_t *resp = (dns_dispentry_t *)arg; dns_dispatch_t *disp = resp->disp; - dispentry_log(resp, LVL(90), "connected: %s", + dispentry_log(resp, ISC_LOG_DEBUG(90), "connected: %s", isc_result_totext(eresult)); REQUIRE(disp->tid == isc_tid()); @@ -1895,7 +1895,7 @@ udp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { } unlock: - dispentry_log(resp, LVL(90), "connect callback: %s", + dispentry_log(resp, ISC_LOG_DEBUG(90), "connect callback: %s", isc_result_totext(eresult)); resp->connected(eresult, NULL, resp->arg); @@ -1958,7 +1958,7 @@ tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { ISC_SOCKADDR_FORMATSIZE); dns_dispatch_ref(disp); /* DISPATCH003 */ - dispentry_log(resp, LVL(90), + dispentry_log(resp, ISC_LOG_DEBUG(90), "connecting from %s to %s, timeout %u", localbuf, peerbuf, resp->timeout); @@ -1981,7 +1981,8 @@ tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { /* Add the resp to the reading list */ ISC_LIST_APPEND(disp->active, resp, alink); - dispentry_log(resp, LVL(90), "already connected; attaching"); + dispentry_log(resp, ISC_LOG_DEBUG(90), + "already connected; attaching"); resp->reading = true; if (!disp->reading) { @@ -1990,7 +1991,7 @@ tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { } /* We are already connected; call the connected cb */ - dispentry_log(resp, LVL(90), "connect callback: %s", + dispentry_log(resp, ISC_LOG_DEBUG(90), "connect callback: %s", isc_result_totext(ISC_R_SUCCESS)); resp->connected(ISC_R_SUCCESS, NULL, resp->arg); break; @@ -2032,7 +2033,8 @@ send_done(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { REQUIRE(VALID_DISPATCH(disp)); - dispentry_log(resp, LVL(90), "sent: %s", isc_result_totext(result)); + dispentry_log(resp, ISC_LOG_DEBUG(90), "sent: %s", + isc_result_totext(result)); resp->sent(result, NULL, resp->arg); @@ -2049,7 +2051,7 @@ tcp_dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp, int32_t timeout) { REQUIRE(timeout <= INT16_MAX); - dispentry_log(resp, LVL(90), "continue reading"); + dispentry_log(resp, ISC_LOG_DEBUG(90), "continue reading"); if (!resp->reading) { ISC_LIST_APPEND(disp->active, resp, alink); @@ -2081,7 +2083,7 @@ udp_dispatch_getnext(dns_dispentry_t *resp, int32_t timeout) { isc_nmhandle_settimeout(resp->handle, timeout); } - dispentry_log(resp, LVL(90), "continue reading"); + dispentry_log(resp, ISC_LOG_DEBUG(90), "continue reading"); dns_dispentry_ref(resp); /* DISPENTRY003 */ isc_nm_read(resp->handle, udp_recv, resp); @@ -2095,7 +2097,7 @@ dns_dispatch_resume(dns_dispentry_t *resp, uint16_t timeout) { dns_dispatch_t *disp = resp->disp; - dispentry_log(resp, LVL(90), "resume"); + dispentry_log(resp, ISC_LOG_DEBUG(90), "resume"); REQUIRE(disp->tid == isc_tid()); switch (disp->socktype) { @@ -2121,7 +2123,7 @@ dns_dispatch_send(dns_dispentry_t *resp, isc_region_t *r) { dns_dispatch_t *disp = resp->disp; isc_nmhandle_t *sendhandle = NULL; - dispentry_log(resp, LVL(90), "sending"); + dispentry_log(resp, ISC_LOG_DEBUG(90), "sending"); switch (disp->socktype) { case isc_socktype_udp: isc_nmhandle_attach(resp->handle, &sendhandle); From 405860022f0ca33f29c291ffc73379b327c1bbe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Fri, 15 Sep 2023 17:26:35 +0200 Subject: [PATCH 5/5] Add CHANGES note for [GL !8304] --- CHANGES | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGES b/CHANGES index 6c69cf74b1..45a341eb51 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,10 @@ +6249. [cleanup] Reduce the number of reserved UDP dispatches + to the number of loops, replace the round-robin + mechanism in dns_dispatchset_t with dispatches + pinned to loops, and use lock-free hash tables + for looking up query IDs and active TCP + connections. [GL !8304] + 6248. [func] Add an option "resolver-use-dns64", which enables application of DNS64 rules to server addresses when sending recursive queries. This allows