diff --git a/CHANGES b/CHANGES index 6c69cf74b1..45a341eb51 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,10 @@ +6249. [cleanup] Reduce the number of reserved UDP dispatches + to the number of loops, replace the round-robin + mechanism in dns_dispatchset_t with dispatches + pinned to loops, and use lock-free hash tables + for looking up query IDs and active TCP + connections. [GL !8304] + 6248. [func] Add an option "resolver-use-dns64", which enables application of DNS64 rules to server addresses when sending recursive queries. This allows diff --git a/bin/delv/delv.c b/bin/delv/delv.c index d8f71d7e24..3a95d3f93d 100644 --- a/bin/delv/delv.c +++ b/bin/delv/delv.c @@ -2142,7 +2142,7 @@ run_server(void *arg) { ns_server_create(mctx, matchview, &sctx); - CHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); + CHECK(dns_dispatchmgr_create(mctx, loopmgr, netmgr, &dispatchmgr)); isc_sockaddr_any(&any); CHECK(dns_dispatch_createudp(dispatchmgr, &any, &dispatch)); CHECK(ns_interfacemgr_create(mctx, sctx, loopmgr, netmgr, dispatchmgr, @@ -2165,7 +2165,7 @@ run_server(void *arg) { dns_view_initsecroots(view); CHECK(setup_dnsseckeys(NULL, view)); - CHECK(dns_view_createresolver(view, loopmgr, 1, netmgr, 0, + CHECK(dns_view_createresolver(view, loopmgr, netmgr, 0, tlsctx_client_cache, dispatch, NULL)); isc_stats_create(mctx, &resstats, dns_resstatscounter_max); diff --git a/bin/named/include/named/globals.h b/bin/named/include/named/globals.h index 03e35f2cd4..1f6edbc09e 100644 --- a/bin/named/include/named/globals.h +++ b/bin/named/include/named/globals.h @@ -50,7 +50,6 @@ EXTERN isc_mem_t *named_g_mctx INIT(NULL); EXTERN unsigned int named_g_cpus INIT(0); -EXTERN unsigned int named_g_udpdisp INIT(0); EXTERN isc_loop_t *named_g_mainloop INIT(NULL); EXTERN isc_loopmgr_t *named_g_loopmgr INIT(NULL); EXTERN bool named_g_loopmgr_running INIT(false); diff --git a/bin/named/main.c b/bin/named/main.c index 0e427a8b0e..521536caa1 100644 --- a/bin/named/main.c +++ b/bin/named/main.c @@ -949,9 +949,7 @@ parse_command_line(int argc, char *argv[]) { parse_T_opt(isc_commandline_argument); break; case 'U': - named_g_udpdisp = parse_int(isc_commandline_argument, - "number of UDP listeners " - "per interface"); + /* Obsolete. No longer in use. Ignore. */ break; case 'u': named_g_username = isc_commandline_argument; @@ -1044,16 +1042,6 @@ create_managers(void) { ISC_LOG_INFO, "found %u CPU%s, using %u worker thread%s", named_g_cpus_detected, named_g_cpus_detected == 1 ? "" : "s", named_g_cpus, named_g_cpus == 1 ? "" : "s"); - if (named_g_udpdisp == 0) { - named_g_udpdisp = named_g_cpus_detected; - } - if (named_g_udpdisp > named_g_cpus) { - named_g_udpdisp = named_g_cpus; - } - isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL, - NAMED_LOGMODULE_SERVER, ISC_LOG_INFO, - "using %u UDP listener%s per interface", named_g_udpdisp, - named_g_udpdisp == 1 ? "" : "s"); isc_managers_create(&named_g_mctx, named_g_cpus, &named_g_loopmgr, &named_g_netmgr); diff --git a/bin/named/server.c b/bin/named/server.c index 714c1b10b3..2831951ac8 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -1312,8 +1312,8 @@ get_view_querysource_dispatch(const cfg_obj_t **maps, int af, isc_sockaddr_format(&sa, buf, sizeof(buf)); isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL, NAMED_LOGMODULE_SERVER, ISC_LOG_ERROR, - "could not get query source dispatcher (%s)", - buf); + "could not get query source dispatcher (%s): %s", + buf, isc_result_totext(result)); return (result); } @@ -4033,7 +4033,7 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config, named_cache_t *nsc; bool zero_no_soattl; dns_acl_t *clients = NULL, *mapped = NULL, *excluded = NULL; - unsigned int query_timeout, ndisp; + unsigned int query_timeout; bool old_rpz_ok = false; dns_dyndbctx_t *dctx = NULL; unsigned int resolver_param; @@ -4685,9 +4685,8 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config, goto cleanup; } - ndisp = 4 * ISC_MIN(named_g_udpdisp, MAX_UDP_DISPATCH); CHECK(dns_view_createresolver( - view, named_g_loopmgr, ndisp, named_g_netmgr, resopts, + view, named_g_loopmgr, named_g_netmgr, resopts, named_g_server->tlsctx_client_cache, dispatch4, dispatch6)); if (resstats == NULL) { @@ -9898,8 +9897,8 @@ run_server(void *arg) { dns_zonemgr_create(named_g_mctx, named_g_loopmgr, named_g_netmgr, &server->zonemgr); - CHECKFATAL(dns_dispatchmgr_create(named_g_mctx, named_g_netmgr, - &named_g_dispatchmgr), + CHECKFATAL(dns_dispatchmgr_create(named_g_mctx, named_g_loopmgr, + named_g_netmgr, &named_g_dispatchmgr), "creating dispatch manager"); dns_dispatchmgr_setstats(named_g_dispatchmgr, server->resolverstats); @@ -12148,10 +12147,6 @@ named_server_status(named_server_t *server, isc_buffer_t **text) { snprintf(line, sizeof(line), "worker threads: %u\n", named_g_cpus); CHECK(putstr(text, line)); - snprintf(line, sizeof(line), "UDP listeners per interface: %u\n", - named_g_udpdisp); - CHECK(putstr(text, line)); - snprintf(line, sizeof(line), "number of zones: %u (%u automatic)\n", zonecount, automatic); CHECK(putstr(text, line)); diff --git a/bin/nsupdate/nsupdate.c b/bin/nsupdate/nsupdate.c index 3e673f493d..fc42badc64 100644 --- a/bin/nsupdate/nsupdate.c +++ b/bin/nsupdate/nsupdate.c @@ -798,7 +798,7 @@ create_name(const char *str, char *namedata, size_t len, dns_name_t *name) { } static void -setup_system(void) { +setup_system(void *arg ISC_ATTR_UNUSED) { isc_result_t result; isc_sockaddr_t bind_any, bind_any6; isc_sockaddrlist_t *nslist; @@ -919,7 +919,7 @@ setup_system(void) { irs_resconf_destroy(&resconf); - result = dns_dispatchmgr_create(gmctx, netmgr, &dispatchmgr); + result = dns_dispatchmgr_create(gmctx, loopmgr, netmgr, &dispatchmgr); check_result(result, "dns_dispatchmgr_create"); result = dst_lib_init(gmctx, NULL); @@ -3475,8 +3475,7 @@ main(int argc, char **argv) { timeoutms = timeout * 1000; isc_nm_settimeouts(netmgr, timeoutms, timeoutms, timeoutms, timeoutms); - setup_system(); - + isc_loopmgr_setup(loopmgr, setup_system, NULL); isc_loopmgr_setup(loopmgr, getinput, NULL); isc_loopmgr_teardown(loopmgr, shutdown_program, NULL); isc_loopmgr_run(loopmgr); diff --git a/bin/tests/system/pipelined/pipequeries.c b/bin/tests/system/pipelined/pipequeries.c index bfe0c1b84a..ceb26dac84 100644 --- a/bin/tests/system/pipelined/pipequeries.c +++ b/bin/tests/system/pipelined/pipequeries.c @@ -275,7 +275,7 @@ main(int argc, char *argv[]) { RUNCHECK(dst_lib_init(mctx, NULL)); - RUNCHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); + RUNCHECK(dns_dispatchmgr_create(mctx, loopmgr, netmgr, &dispatchmgr)); RUNCHECK(dns_dispatch_createudp( dispatchmgr, have_src ? &srcaddr : &bind_any, &dispatchv4)); diff --git a/bin/tools/mdig.c b/bin/tools/mdig.c index f88edf49f6..4de30ab65f 100644 --- a/bin/tools/mdig.c +++ b/bin/tools/mdig.c @@ -116,6 +116,12 @@ static unsigned char cookie_secret[33]; static int onfly = 0; static char hexcookie[81]; +static isc_sockaddr_t bind_any; +static isc_nm_t *netmgr = NULL; +static dns_dispatchmgr_t *dispatchmgr = NULL; +static dns_dispatch_t *dispatchvx = NULL; +static dns_view_t *view = NULL; + struct query { char textname[MXNAME]; /*% Name we're going to be * looking up */ @@ -2069,43 +2075,42 @@ set_source_ports(dns_dispatchmgr_t *manager) { } static void -teardown_view(void *arg) { - dns_view_t *view = arg; +teardown(void *arg ISC_ATTR_UNUSED) { dns_view_detach(&view); -} - -static void -teardown_requestmgr(void *arg) { - dns_requestmgr_t *mgr = arg; - - dns_requestmgr_shutdown(mgr); - dns_requestmgr_detach(&mgr); -} - -static void -teardown_dispatch(void *arg) { - dns_dispatch_t *dispatchv4 = arg; - dns_dispatch_detach(&dispatchv4); -} - -static void -teardown_dispatchmgr(void *arg) { - dns_dispatchmgr_t *dispatchmgr = arg; + dns_requestmgr_shutdown(requestmgr); + dns_requestmgr_detach(&requestmgr); + dns_dispatch_detach(&dispatchvx); dns_dispatchmgr_detach(&dispatchmgr); } +static void +setup(void *arg ISC_ATTR_UNUSED) { + RUNCHECK(dns_dispatchmgr_create(mctx, loopmgr, netmgr, &dispatchmgr)); + + set_source_ports(dispatchmgr); + + if (have_ipv4) { + isc_sockaddr_any(&bind_any); + } else { + isc_sockaddr_any6(&bind_any); + } + RUNCHECK(dns_dispatch_createudp( + dispatchmgr, have_src ? &srcaddr : &bind_any, &dispatchvx)); + + RUNCHECK(dns_requestmgr_create( + mctx, loopmgr, dispatchmgr, have_ipv4 ? dispatchvx : NULL, + have_ipv6 ? dispatchvx : NULL, &requestmgr)); + + RUNCHECK(dns_view_create(mctx, NULL, 0, "_mdig", &view)); +} + /*% Main processing routine for mdig */ int main(int argc, char *argv[]) { struct query *query = NULL; isc_result_t result; - isc_sockaddr_t bind_any; isc_log_t *lctx = NULL; isc_logconfig_t *lcfg = NULL; - isc_nm_t *netmgr = NULL; - dns_dispatchmgr_t *dispatchmgr = NULL; - dns_dispatch_t *dispatchvx = NULL; - dns_view_t *view = NULL; unsigned int i; int ns; @@ -2151,30 +2156,10 @@ main(int argc, char *argv[]) { fatal("can't choose between IPv4 and IPv6"); } - RUNCHECK(dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr)); - - set_source_ports(dispatchmgr); - - if (have_ipv4) { - isc_sockaddr_any(&bind_any); - } else { - isc_sockaddr_any6(&bind_any); - } - RUNCHECK(dns_dispatch_createudp( - dispatchmgr, have_src ? &srcaddr : &bind_any, &dispatchvx)); - - RUNCHECK(dns_requestmgr_create( - mctx, loopmgr, dispatchmgr, have_ipv4 ? dispatchvx : NULL, - have_ipv6 ? dispatchvx : NULL, &requestmgr)); - - RUNCHECK(dns_view_create(mctx, NULL, 0, "_mdig", &view)); - query = ISC_LIST_HEAD(queries); + isc_loopmgr_setup(loopmgr, setup, NULL); isc_loopmgr_setup(loopmgr, sendqueries, query); - isc_loopmgr_teardown(loopmgr, teardown_view, view); - isc_loopmgr_teardown(loopmgr, teardown_requestmgr, requestmgr); - isc_loopmgr_teardown(loopmgr, teardown_dispatch, dispatchvx); - isc_loopmgr_teardown(loopmgr, teardown_dispatchmgr, dispatchmgr); + isc_loopmgr_teardown(loopmgr, teardown, NULL); /* * Stall to the start of a new second. diff --git a/lib/dns/client.c b/lib/dns/client.c index 205c9764bc..486cda4af7 100644 --- a/lib/dns/client.c +++ b/lib/dns/client.c @@ -215,9 +215,8 @@ createview(isc_mem_t *mctx, dns_rdataclass_t rdclass, isc_loopmgr_t *loopmgr, /* Initialize view security roots */ dns_view_initsecroots(view); - CHECK(dns_view_createresolver(view, loopmgr, 1, nm, 0, - tlsctx_client_cache, dispatchv4, - dispatchv6)); + CHECK(dns_view_createresolver(view, loopmgr, nm, 0, tlsctx_client_cache, + dispatchv4, dispatchv6)); CHECK(dns_db_create(mctx, "rbt", dns_rootname, dns_dbtype_cache, rdclass, 0, NULL, &view->cachedb)); @@ -253,7 +252,8 @@ dns_client_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, isc_nm_t *nm, .nm = nm, }; - result = dns_dispatchmgr_create(mctx, nm, &client->dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, nm, + &client->dispatchmgr); if (result != ISC_R_SUCCESS) { goto cleanup_client; } diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index d79614522e..3ba6d91b32 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -19,7 +19,8 @@ #include #include -#include +#include +#include #include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include #include #include @@ -44,14 +46,6 @@ typedef ISC_LIST(dns_dispentry_t) dns_displist_t; -typedef struct dns_qid { - unsigned int magic; - isc_mutex_t lock; - unsigned int qid_nbuckets; /*%< hash table size */ - unsigned int qid_increment; /*%< id increment on collision */ - dns_displist_t *qid_table; /*%< the table itself */ -} dns_qid_t; - struct dns_dispatchmgr { /* Unlocked. */ unsigned int magic; @@ -61,11 +55,11 @@ struct dns_dispatchmgr { isc_stats_t *stats; isc_nm_t *nm; - /* Locked by "lock". */ - isc_mutex_t lock; - ISC_LIST(dns_dispatch_t) list; + uint32_t nloops; - dns_qid_t *qid; + struct cds_lfht **tcps; + + struct cds_lfht *qids; in_port_t *v4ports; /*%< available ports for IPv4 */ unsigned int nv4ports; /*%< # of available ports for IPv4 */ @@ -83,13 +77,13 @@ typedef enum { struct dns_dispentry { unsigned int magic; isc_refcount_t references; + isc_mem_t *mctx; dns_dispatch_t *disp; isc_loop_t *loop; isc_nmhandle_t *handle; /*%< netmgr handle for UDP connection */ dns_dispatchstate_t state; dns_transport_t *transport; isc_tlsctx_cache_t *tlsctx_cache; - unsigned int bucket; unsigned int retries; unsigned int timeout; isc_time_t start; @@ -103,69 +97,50 @@ struct dns_dispentry { void *arg; bool reading; isc_result_t result; - ISC_LINK(dns_dispentry_t) link; ISC_LINK(dns_dispentry_t) alink; ISC_LINK(dns_dispentry_t) plink; ISC_LINK(dns_dispentry_t) rlink; + + struct cds_lfht_node ht_node; + struct rcu_head rcu_head; }; struct dns_dispatch { /* Unlocked. */ unsigned int magic; /*%< magic */ uint32_t tid; + isc_socktype_t socktype; + isc_refcount_t references; + isc_mem_t *mctx; dns_dispatchmgr_t *mgr; /*%< dispatch manager */ isc_nmhandle_t *handle; /*%< netmgr handle for TCP connection */ isc_sockaddr_t local; /*%< local address */ - in_port_t localport; /*%< local UDP port */ isc_sockaddr_t peer; /*%< peer address (TCP) */ - /*% Locked by mgr->lock. */ - ISC_LINK(dns_dispatch_t) link; - - /* Locked by "lock". */ - isc_mutex_t lock; /*%< locks all below */ - isc_socktype_t socktype; dns_dispatchstate_t state; - isc_refcount_t references; bool reading; dns_displist_t pending; dns_displist_t active; - unsigned int requests; /*%< how many requests we have */ + uint_fast32_t requests; /*%< how many requests we have */ unsigned int timedout; -}; -#define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ') -#define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC) + struct cds_lfht_node ht_node; + struct rcu_head rcu_head; +}; #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p') #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC) -#define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c') -#define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC) - #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p') #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC) #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r') #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC) -/*% - * Number of buckets in the QID hash table, and the value to - * increment the QID by when attempting to avoid collisions. - * The number of buckets should be prime, and the increment - * should be the next higher prime number. - */ -#ifndef DNS_QID_BUCKETS -#define DNS_QID_BUCKETS 16411 -#endif /* ifndef DNS_QID_BUCKETS */ -#ifndef DNS_QID_INCREMENT -#define DNS_QID_INCREMENT 16433 -#endif /* ifndef DNS_QID_INCREMENT */ - #if DNS_DISPATCH_TRACE #define dns_dispentry_ref(ptr) \ dns_dispentry__ref(ptr, __func__, __FILE__, __LINE__) @@ -180,32 +155,34 @@ ISC_REFCOUNT_TRACE_DECL(dns_dispentry); ISC_REFCOUNT_DECL(dns_dispentry); #endif +/* + * The number of attempts to find unique combination + */ +#define QID_MAX_TRIES 64 + +/* + * Initial and minimum QID table sizes. + */ +#define QIDS_INIT_SIZE (1 << 4) /* Must be power of 2 */ +#define QIDS_MIN_SIZE (1 << 4) /* Must be power of 2 */ + /* * Statics. */ static void dispatchmgr_destroy(dns_dispatchmgr_t *mgr); -static dns_dispentry_t * -entry_search(dns_qid_t *, const isc_sockaddr_t *, dns_messageid_t, in_port_t, - unsigned int); static void udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, void *arg); static void tcp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, void *arg); -static uint32_t -dns_hash(dns_qid_t *, const isc_sockaddr_t *, dns_messageid_t, in_port_t); static void dispentry_cancel(dns_dispentry_t *resp, isc_result_t result); static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, - dns_dispatch_t **dispp); -static void -qid_allocate(dns_dispatchmgr_t *mgr, dns_qid_t **qidp); -static void -qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp); + uint32_t tid, dns_dispatch_t **dispp); static void udp_startrecv(isc_nmhandle_t *handle, dns_dispentry_t *resp); static void @@ -218,8 +195,6 @@ tcp_dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp, static void udp_dispatch_getnext(dns_dispentry_t *resp, int32_t timeout); -#define LVL(x) ISC_LOG_DEBUG(x) - static const char * socktype2str(dns_dispentry_t *resp) { dns_transport_type_t transport_type = DNS_TRANSPORT_UDP; @@ -357,26 +332,8 @@ dispentry_log(dns_dispentry_t *resp, int level, const char *fmt, ...) { socktype2str(resp), resp, msgbuf); } -/* - * Return a hash of the destination and message id. - */ -static uint32_t -dns_hash(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id, - in_port_t port) { - uint32_t ret; - - ret = isc_sockaddr_hash(dest, true); - ret ^= ((uint32_t)id << 16) | port; - ret %= qid->qid_nbuckets; - - INSIST(ret < qid->qid_nbuckets); - - return (ret); -} - /*% * Choose a random port number for a dispatch entry. - * The caller must hold the disp->lock */ static isc_result_t setup_socket(dns_dispatch_t *disp, dns_dispentry_t *resp, @@ -414,31 +371,31 @@ setup_socket(dns_dispatch_t *disp, dns_dispentry_t *resp, return (ISC_R_SUCCESS); } -/* - * Find an entry for query ID 'id', socket address 'dest', and port number - * 'port'. - * Return NULL if no such entry exists. - */ -static dns_dispentry_t * -entry_search(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id, - in_port_t port, unsigned int bucket) { - dns_dispentry_t *res = NULL; +static uint32_t +qid_hash(const dns_dispentry_t *dispentry) { + /* + * TODO(OS): Add incremental isc_sockaddr_hash() function and then use + * isc_hash32 API + */ + uint32_t hashval = isc_sockaddr_hash(&dispentry->peer, true); + return (hashval ^ (((uint32_t)dispentry->id << 16) | dispentry->port)); +} - REQUIRE(VALID_QID(qid)); - REQUIRE(bucket < qid->qid_nbuckets); +static int +qid_match(struct cds_lfht_node *node, const void *key0) { + const dns_dispentry_t *dispentry = + caa_container_of(node, dns_dispentry_t, ht_node); + const dns_dispentry_t *key = key0; - res = ISC_LIST_HEAD(qid->qid_table[bucket]); + return (dispentry->id == key->id && dispentry->port == key->port && + isc_sockaddr_equal(&dispentry->peer, &key->peer)); +} - while (res != NULL) { - if (res->id == id && isc_sockaddr_equal(dest, &res->peer) && - res->port == port) - { - return (res); - } - res = ISC_LIST_NEXT(res, link); - } - - return (NULL); +static void +dispentry_destroy_rcu(struct rcu_head *rcu_head) { + dns_dispentry_t *resp = caa_container_of(rcu_head, dns_dispentry_t, + rcu_head); + isc_mem_putanddetach(&resp->mctx, resp, sizeof(*resp)); } static void @@ -451,25 +408,23 @@ dispentry_destroy(dns_dispentry_t *resp) { */ dispentry_cancel(resp, ISC_R_CANCELED); - LOCK(&disp->lock); INSIST(disp->requests > 0); disp->requests--; - UNLOCK(&disp->lock); isc_refcount_destroy(&resp->references); resp->magic = 0; - INSIST(!ISC_LINK_LINKED(resp, link)); INSIST(!ISC_LINK_LINKED(resp, plink)); INSIST(!ISC_LINK_LINKED(resp, alink)); INSIST(!ISC_LINK_LINKED(resp, rlink)); - dispentry_log(resp, LVL(90), "destroying"); + dispentry_log(resp, ISC_LOG_DEBUG(90), "destroying"); if (resp->handle != NULL) { - dispentry_log(resp, LVL(90), "detaching handle %p from %p", - resp->handle, &resp->handle); + dispentry_log(resp, ISC_LOG_DEBUG(90), + "detaching handle %p from %p", resp->handle, + &resp->handle); isc_nmhandle_detach(&resp->handle); } @@ -481,9 +436,9 @@ dispentry_destroy(dns_dispentry_t *resp) { dns_transport_detach(&resp->transport); } - isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp)); - dns_dispatch_detach(&disp); /* DISPATCH001 */ + + call_rcu(&resp->rcu_head, dispentry_destroy_rcu); } #if DNS_DISPATCH_TRACE @@ -539,7 +494,7 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, disp = resp->disp; - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); INSIST(resp->reading); resp->reading = false; @@ -553,7 +508,8 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, eresult = ISC_R_CANCELED; } - dispentry_log(resp, LVL(90), "read callback:%s, requests %d", + dispentry_log(resp, ISC_LOG_DEBUG(90), + "read callback:%s, requests %" PRIuFAST32, isc_result_totext(eresult), disp->requests); if (eresult != ISC_R_SUCCESS) { @@ -577,11 +533,11 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, NULL) == ISC_R_SUCCESS && match > 0) { - if (isc_log_wouldlog(dns_lctx, LVL(10))) { + if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(10))) { char netaddrstr[ISC_NETADDR_FORMATSIZE]; isc_netaddr_format(&netaddr, netaddrstr, sizeof(netaddrstr)); - dispentry_log(resp, LVL(10), + dispentry_log(resp, ISC_LOG_DEBUG(10), "blackholed packet from %s", netaddrstr); } goto next; @@ -597,12 +553,12 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, if (dres != ISC_R_SUCCESS) { char netaddrstr[ISC_NETADDR_FORMATSIZE]; isc_netaddr_format(&netaddr, netaddrstr, sizeof(netaddrstr)); - dispentry_log(resp, LVL(10), "got garbage packet from %s", - netaddrstr); + dispentry_log(resp, ISC_LOG_DEBUG(10), + "got garbage packet from %s", netaddrstr); goto next; } - dispentry_log(resp, LVL(92), + dispentry_log(resp, ISC_LOG_DEBUG(92), "got valid DNS message header, /QR %c, id %u", (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id); @@ -617,7 +573,8 @@ udp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region, * The QID and the address must match the expected ones. */ if (resp->id != id || !isc_sockaddr_equal(&peer, &resp->peer)) { - dispentry_log(resp, LVL(90), "response doesn't match"); + dispentry_log(resp, ISC_LOG_DEBUG(90), + "response doesn't match"); inc_stats(disp->mgr, dns_resstatscounter_mismatch); goto next; } @@ -652,11 +609,10 @@ next: udp_dispatch_getnext(resp, timeout); done: - UNLOCK(&disp->lock); - if (respond) { - dispentry_log(resp, LVL(90), "UDP read callback on %p: %s", - handle, isc_result_totext(eresult)); + dispentry_log(resp, ISC_LOG_DEBUG(90), + "UDP read callback on %p: %s", handle, + isc_result_totext(eresult)); resp->response(eresult, region, resp->arg); } @@ -677,17 +633,19 @@ tcp_recv_oldest(dns_dispatch_t *disp, dns_dispentry_t **respp) { return (ISC_R_NOTFOUND); } +/* + * NOTE: Must be RCU read locked! + */ static isc_result_t -tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, dns_qid_t *qid, +tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, isc_sockaddr_t *peer, dns_dispentry_t **respp) { isc_buffer_t source; dns_messageid_t id; unsigned int flags; - unsigned int bucket; isc_result_t result = ISC_R_SUCCESS; - dns_dispentry_t *resp = NULL; - dispatch_log(disp, LVL(90), "TCP read success, length == %d, addr = %p", + dispatch_log(disp, ISC_LOG_DEBUG(90), + "TCP read success, length == %d, addr = %p", region->length, region->base); /* @@ -697,11 +655,11 @@ tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, dns_qid_t *qid, isc_buffer_add(&source, region->length); result = dns_message_peekheader(&source, &id, &flags); if (result != ISC_R_SUCCESS) { - dispatch_log(disp, LVL(10), "got garbage packet"); + dispatch_log(disp, ISC_LOG_DEBUG(10), "got garbage packet"); return (ISC_R_UNEXPECTED); } - dispatch_log(disp, LVL(92), + dispatch_log(disp, ISC_LOG_DEBUG(92), "got valid DNS message header, /QR %c, id %u", (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id); @@ -710,7 +668,8 @@ tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, dns_qid_t *qid, * reading. */ if ((flags & DNS_MESSAGEFLAG_QR) == 0) { - dispatch_log(disp, LVL(10), "got DNS query instead of answer"); + dispatch_log(disp, ISC_LOG_DEBUG(10), + "got DNS query instead of answer"); return (ISC_R_UNEXPECTED); } @@ -718,26 +677,33 @@ tcp_recv_success(dns_dispatch_t *disp, isc_region_t *region, dns_qid_t *qid, * We have a valid response; find the associated dispentry object * and call the caller back. */ - bucket = dns_hash(qid, peer, id, disp->localport); - LOCK(&qid->lock); - resp = entry_search(qid, peer, id, disp->localport, bucket); + dns_dispentry_t key = { + .id = id, + .peer = *peer, + .port = isc_sockaddr_getport(&disp->local), + }; + struct cds_lfht_iter iter; + cds_lfht_lookup(disp->mgr->qids, qid_hash(&key), qid_match, &key, + &iter); + + dns_dispentry_t *resp = cds_lfht_entry(cds_lfht_iter_get_node(&iter), + dns_dispentry_t, ht_node); if (resp != NULL) { - if (resp->reading) { - *respp = resp; - } else { + if (!resp->reading) { /* * We already got a message for this QID and weren't * expecting any more. */ result = ISC_R_UNEXPECTED; + } else { + *respp = resp; } } else { - /* We are not expecting this DNS message */ result = ISC_R_NOTFOUND; } - dispatch_log(disp, LVL(90), "search for response in bucket %d: %s", - bucket, isc_result_totext(result)); - UNLOCK(&qid->lock); + dispatch_log(disp, ISC_LOG_DEBUG(90), + "search for response in hashtable: %s", + isc_result_totext(result)); return (result); } @@ -776,7 +742,7 @@ tcp_recv_processall(dns_displist_t *resps, isc_region_t *region) { next = ISC_LIST_NEXT(resp, rlink); ISC_LIST_UNLINK(*resps, resp, rlink); - dispentry_log(resp, LVL(90), "read callback: %s", + dispentry_log(resp, ISC_LOG_DEBUG(90), "read callback: %s", isc_result_totext(resp->result)); resp->response(resp->result, region, resp->arg); dns_dispentry_detach(&resp); /* DISPENTRY009 */ @@ -801,7 +767,6 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, void *arg) { dns_dispatch_t *disp = (dns_dispatch_t *)arg; dns_dispentry_t *resp = NULL; - dns_qid_t *qid = NULL; char buf[ISC_SOCKADDR_FORMATSIZE]; isc_sockaddr_t peer; dns_displist_t resps = ISC_LIST_INITIALIZER; @@ -810,17 +775,17 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, REQUIRE(VALID_DISPATCH(disp)); - qid = disp->mgr->qid; - - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); INSIST(disp->reading); disp->reading = false; - dispatch_log(disp, LVL(90), "TCP read:%s:requests %u", + dispatch_log(disp, ISC_LOG_DEBUG(90), + "TCP read:%s:requests %" PRIuFAST32, isc_result_totext(result), disp->requests); peer = isc_nmhandle_peeraddr(handle); + rcu_read_lock(); /* * Phase 1: Process timeout and success. */ @@ -833,7 +798,7 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, break; case ISC_R_SUCCESS: /* We got an answer */ - result = tcp_recv_success(disp, region, qid, &peer, &resp); + result = tcp_recv_success(disp, region, &peer, &resp); break; default: @@ -891,7 +856,8 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, case ISC_R_EOF: case ISC_R_CONNECTIONRESET: isc_sockaddr_format(&peer, buf, sizeof(buf)); - dispatch_log(disp, LVL(90), "shutting down TCP: %s: %s", buf, + dispatch_log(disp, ISC_LOG_DEBUG(90), + "shutting down TCP: %s: %s", buf, isc_result_totext(result)); tcp_recv_shutdown(disp, &resps, result); break; @@ -916,7 +882,7 @@ tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region, isc_nmhandle_settimeout(handle, timeout); } - UNLOCK(&disp->lock); + rcu_read_unlock(); /* * Phase 6: Process all scheduled callbacks. @@ -994,7 +960,7 @@ setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset, */ isc_result_t -dns_dispatchmgr_create(isc_mem_t *mctx, isc_nm_t *nm, +dns_dispatchmgr_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, isc_nm_t *nm, dns_dispatchmgr_t **mgrp) { dns_dispatchmgr_t *mgr = NULL; isc_portset_t *v4portset = NULL; @@ -1004,7 +970,10 @@ dns_dispatchmgr_create(isc_mem_t *mctx, isc_nm_t *nm, REQUIRE(mgrp != NULL && *mgrp == NULL); mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t)); - *mgr = (dns_dispatchmgr_t){ .magic = 0 }; + *mgr = (dns_dispatchmgr_t){ + .magic = 0, + .nloops = isc_loopmgr_nloops(loopmgr), + }; #if DNS_DISPATCH_TRACE fprintf(stderr, "dns_dispatchmgr__init:%s:%s:%d:%p->references = 1\n", @@ -1015,19 +984,25 @@ dns_dispatchmgr_create(isc_mem_t *mctx, isc_nm_t *nm, isc_mem_attach(mctx, &mgr->mctx); isc_nm_attach(nm, &mgr->nm); - isc_mutex_init(&mgr->lock); + mgr->tcps = isc_mem_cget(mgr->mctx, mgr->nloops, sizeof(mgr->tcps[0])); + for (size_t i = 0; i < mgr->nloops; i++) { + mgr->tcps[i] = cds_lfht_new( + 2, 2, 0, CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, + NULL); + } - ISC_LIST_INIT(mgr->list); - - create_default_portset(mctx, AF_INET, &v4portset); - create_default_portset(mctx, AF_INET6, &v6portset); + create_default_portset(mgr->mctx, AF_INET, &v4portset); + create_default_portset(mgr->mctx, AF_INET6, &v6portset); setavailports(mgr, v4portset, v6portset); - isc_portset_destroy(mctx, &v4portset); - isc_portset_destroy(mctx, &v6portset); + isc_portset_destroy(mgr->mctx, &v4portset); + isc_portset_destroy(mgr->mctx, &v6portset); + + mgr->qids = cds_lfht_new(QIDS_INIT_SIZE, QIDS_MIN_SIZE, 0, + CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, + NULL); - qid_allocate(mgr, &mgr->qid); mgr->magic = DNS_DISPATCHMGR_MAGIC; *mgrp = mgr; @@ -1069,9 +1044,13 @@ dispatchmgr_destroy(dns_dispatchmgr_t *mgr) { isc_refcount_destroy(&mgr->references); mgr->magic = 0; - isc_mutex_destroy(&mgr->lock); - qid_destroy(mgr->mctx, &mgr->qid); + RUNTIME_CHECK(!cds_lfht_destroy(mgr->qids, NULL)); + + for (size_t i = 0; i < mgr->nloops; i++) { + RUNTIME_CHECK(!cds_lfht_destroy(mgr->tcps[i], NULL)); + } + isc_mem_cput(mgr->mctx, mgr->tcps, mgr->nloops, sizeof(mgr->tcps[0])); if (mgr->blackhole != NULL) { dns_acl_detach(&mgr->blackhole); @@ -1098,56 +1077,16 @@ dispatchmgr_destroy(dns_dispatchmgr_t *mgr) { void dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) { REQUIRE(VALID_DISPATCHMGR(mgr)); - REQUIRE(ISC_LIST_EMPTY(mgr->list)); REQUIRE(mgr->stats == NULL); isc_stats_attach(stats, &mgr->stats); } -static void -qid_allocate(dns_dispatchmgr_t *mgr, dns_qid_t **qidp) { - dns_qid_t *qid = NULL; - unsigned int i; - - REQUIRE(qidp != NULL && *qidp == NULL); - - qid = isc_mem_get(mgr->mctx, sizeof(*qid)); - *qid = (dns_qid_t){ .qid_nbuckets = DNS_QID_BUCKETS, - .qid_increment = DNS_QID_INCREMENT }; - - qid->qid_table = isc_mem_cget(mgr->mctx, DNS_QID_BUCKETS, - sizeof(dns_displist_t)); - for (i = 0; i < qid->qid_nbuckets; i++) { - ISC_LIST_INIT(qid->qid_table[i]); - } - - isc_mutex_init(&qid->lock); - qid->magic = QID_MAGIC; - *qidp = qid; -} - -static void -qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) { - dns_qid_t *qid = NULL; - - REQUIRE(qidp != NULL); - qid = *qidp; - *qidp = NULL; - - REQUIRE(VALID_QID(qid)); - - qid->magic = 0; - isc_mem_cput(mctx, qid->qid_table, qid->qid_nbuckets, - sizeof(dns_displist_t)); - isc_mutex_destroy(&qid->lock); - isc_mem_put(mctx, qid, sizeof(*qid)); -} - /* * Allocate and set important limits. */ static void -dispatch_allocate(dns_dispatchmgr_t *mgr, isc_socktype_t type, +dispatch_allocate(dns_dispatchmgr_t *mgr, isc_socktype_t type, uint32_t tid, dns_dispatch_t **dispp) { dns_dispatch_t *disp = NULL; @@ -1162,35 +1101,68 @@ dispatch_allocate(dns_dispatchmgr_t *mgr, isc_socktype_t type, disp = isc_mem_get(mgr->mctx, sizeof(*disp)); *disp = (dns_dispatch_t){ .socktype = type, - .link = ISC_LINK_INITIALIZER, .active = ISC_LIST_INITIALIZER, .pending = ISC_LIST_INITIALIZER, - .tid = isc_tid(), + .tid = tid, .magic = DISPATCH_MAGIC, }; + isc_mem_attach(mgr->mctx, &disp->mctx); + dns_dispatchmgr_attach(mgr, &disp->mgr); #if DNS_DISPATCH_TRACE fprintf(stderr, "dns_dispatch__init:%s:%s:%d:%p->references = 1\n", __func__, __FILE__, __LINE__, disp); #endif isc_refcount_init(&disp->references, 1); /* DISPATCH000 */ - isc_mutex_init(&disp->lock); *dispp = disp; } +struct dispatch_key { + const isc_sockaddr_t *local; + const isc_sockaddr_t *peer; +}; + +static uint32_t +dispatch_hash(struct dispatch_key *key) { + uint32_t hashval = isc_sockaddr_hash(key->peer, false); + if (key->local) { + hashval ^= isc_sockaddr_hash(key->local, true); + } + + return (hashval); +} + +static int +dispatch_match(struct cds_lfht_node *node, const void *key0) { + dns_dispatch_t *disp = caa_container_of(node, dns_dispatch_t, ht_node); + const struct dispatch_key *key = key0; + isc_sockaddr_t local; + isc_sockaddr_t peer; + + if (disp->handle != NULL) { + local = isc_nmhandle_localaddr(disp->handle); + peer = isc_nmhandle_peeraddr(disp->handle); + } else { + local = disp->local; + peer = disp->peer; + } + + return (isc_sockaddr_equal(&peer, key->peer) && + (key->local == NULL || isc_sockaddr_equal(&local, key->local))); +} + isc_result_t dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, const isc_sockaddr_t *destaddr, dns_dispatch_t **dispp) { dns_dispatch_t *disp = NULL; + uint32_t tid = isc_tid(); REQUIRE(VALID_DISPATCHMGR(mgr)); REQUIRE(destaddr != NULL); - LOCK(&mgr->lock); - - dispatch_allocate(mgr, isc_socktype_tcp, &disp); + dispatch_allocate(mgr, isc_socktype_tcp, tid, &disp); disp->peer = *destaddr; @@ -1206,10 +1178,14 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, /* * Append it to the dispatcher list. */ + struct dispatch_key key = { + .local = &disp->local, + .peer = &disp->peer, + }; - /* FIXME: There should be a lookup hashtable here */ - ISC_LIST_APPEND(mgr->list, disp, link); - UNLOCK(&mgr->lock); + rcu_read_lock(); + cds_lfht_add(mgr->tcps[tid], dispatch_hash(&key), &disp->ht_node); + rcu_read_unlock(); if (isc_log_wouldlog(dns_lctx, 90)) { char addrbuf[ISC_SOCKADDR_FORMATSIZE]; @@ -1217,7 +1193,7 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, isc_sockaddr_format(&disp->local, addrbuf, ISC_SOCKADDR_FORMATSIZE); - mgr_log(mgr, LVL(90), + mgr_log(mgr, ISC_LOG_DEBUG(90), "dns_dispatch_createtcp: created TCP dispatch %p for " "%s", disp, addrbuf); @@ -1233,48 +1209,25 @@ dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, dns_dispatch_t *disp_connected = NULL; dns_dispatch_t *disp_fallback = NULL; isc_result_t result = ISC_R_NOTFOUND; + uint32_t tid = isc_tid(); REQUIRE(VALID_DISPATCHMGR(mgr)); REQUIRE(destaddr != NULL); REQUIRE(dispp != NULL && *dispp == NULL); - LOCK(&mgr->lock); + struct dispatch_key key = { + .local = localaddr, + .peer = destaddr, + }; - for (dns_dispatch_t *disp = ISC_LIST_HEAD(mgr->list); disp != NULL; - disp = ISC_LIST_NEXT(disp, link)) - { - isc_sockaddr_t sockname; - isc_sockaddr_t peeraddr; - - LOCK(&disp->lock); - - if (disp->tid != isc_tid()) { - UNLOCK(&disp->lock); - continue; - } - - if (disp->handle != NULL) { - sockname = isc_nmhandle_localaddr(disp->handle); - peeraddr = isc_nmhandle_peeraddr(disp->handle); - } else { - sockname = disp->local; - peeraddr = disp->peer; - } - - /* - * The conditions match: - * 1. socktype is TCP - * 2. destination address is same - * 3. local address is either NULL or same - */ - if (disp->socktype != isc_socktype_tcp || - !isc_sockaddr_equal(destaddr, &peeraddr) || - (localaddr != NULL && - !isc_sockaddr_eqaddr(localaddr, &sockname))) - { - UNLOCK(&disp->lock); - continue; - } + rcu_read_lock(); + struct cds_lfht_iter iter; + dns_dispatch_t *disp = NULL; + cds_lfht_for_each_entry_duplicate(mgr->tcps[tid], dispatch_hash(&key), + dispatch_match, &key, &iter, disp, + ht_node) { + INSIST(disp->tid == isc_tid()); + INSIST(disp->socktype == isc_socktype_tcp); switch (disp->state) { case DNS_DISPATCHSTATE_NONE: @@ -1305,12 +1258,11 @@ dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, UNREACHABLE(); } - UNLOCK(&disp->lock); - if (disp_connected != NULL) { break; } } + rcu_read_unlock(); if (disp_connected != NULL) { /* We found connected dispatch */ @@ -1330,8 +1282,6 @@ dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, result = ISC_R_SUCCESS; } - UNLOCK(&mgr->lock); - return (result); } @@ -1345,19 +1295,17 @@ dns_dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, REQUIRE(localaddr != NULL); REQUIRE(dispp != NULL && *dispp == NULL); - LOCK(&mgr->lock); - result = dispatch_createudp(mgr, localaddr, &disp); + result = dispatch_createudp(mgr, localaddr, isc_tid(), &disp); if (result == ISC_R_SUCCESS) { *dispp = disp; } - UNLOCK(&mgr->lock); return (result); } static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, - dns_dispatch_t **dispp) { + uint32_t tid, dns_dispatch_t **dispp) { isc_result_t result = ISC_R_SUCCESS; dns_dispatch_t *disp = NULL; isc_sockaddr_t sa_any; @@ -1373,14 +1321,14 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, } } - dispatch_allocate(mgr, isc_socktype_udp, &disp); + dispatch_allocate(mgr, isc_socktype_udp, tid, &disp); if (isc_log_wouldlog(dns_lctx, 90)) { char addrbuf[ISC_SOCKADDR_FORMATSIZE]; isc_sockaddr_format(localaddr, addrbuf, ISC_SOCKADDR_FORMATSIZE); - mgr_log(mgr, LVL(90), + mgr_log(mgr, ISC_LOG_DEBUG(90), "dispatch_createudp: created UDP dispatch %p for %s", disp, addrbuf); } @@ -1399,42 +1347,41 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *localaddr, return (result); } +static void +dispatch_destroy_rcu(struct rcu_head *rcu_head) { + dns_dispatch_t *disp = caa_container_of(rcu_head, dns_dispatch_t, + rcu_head); + + isc_mem_putanddetach(&disp->mctx, disp, sizeof(*disp)); +} + static void dispatch_destroy(dns_dispatch_t *disp) { dns_dispatchmgr_t *mgr = disp->mgr; + uint32_t tid = isc_tid(); isc_refcount_destroy(&disp->references); disp->magic = 0; - LOCK(&mgr->lock); - if (ISC_LINK_LINKED(disp, link)) { - ISC_LIST_UNLINK(disp->mgr->list, disp, link); + if (disp->socktype == isc_socktype_tcp) { + (void)cds_lfht_del(mgr->tcps[tid], &disp->ht_node); } - UNLOCK(&mgr->lock); INSIST(disp->requests == 0); INSIST(ISC_LIST_EMPTY(disp->pending)); INSIST(ISC_LIST_EMPTY(disp->active)); - INSIST(!ISC_LINK_LINKED(disp, link)); - - dispatch_log(disp, LVL(90), "destroying dispatch %p", disp); + dispatch_log(disp, ISC_LOG_DEBUG(90), "destroying dispatch %p", disp); if (disp->handle) { - dispatch_log(disp, LVL(90), "detaching TCP handle %p from %p", - disp->handle, &disp->handle); + dispatch_log(disp, ISC_LOG_DEBUG(90), + "detaching TCP handle %p from %p", disp->handle, + &disp->handle); isc_nmhandle_detach(&disp->handle); } + dns_dispatchmgr_detach(&disp->mgr); - isc_mutex_destroy(&disp->lock); - - isc_mem_put(mgr->mctx, disp, sizeof(*disp)); - - /* - * Because dispatch uses mgr->mctx, we must detach after freeing - * dispatch, not before. - */ - dns_dispatchmgr_detach(&mgr); + call_rcu(&disp->rcu_head, dispatch_destroy_rcu); } #if DNS_DISPATCH_TRACE @@ -1450,14 +1397,6 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, dispatch_cb_t connected, dispatch_cb_t sent, dispatch_cb_t response, void *arg, dns_messageid_t *idp, dns_dispentry_t **respp) { - dns_dispentry_t *resp = NULL; - dns_qid_t *qid = NULL; - in_port_t localport; - dns_messageid_t id; - unsigned int bucket; - bool ok = false; - int i = 0; - REQUIRE(VALID_DISPATCH(disp)); REQUIRE(dest != NULL); REQUIRE(respp != NULL && *respp == NULL); @@ -1468,21 +1407,15 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, REQUIRE(response != NULL); REQUIRE(sent != NULL); REQUIRE(loop != NULL); - - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); if (disp->state == DNS_DISPATCHSTATE_CANCELED) { - UNLOCK(&disp->lock); return (ISC_R_CANCELED); } - qid = disp->mgr->qid; - - localport = isc_sockaddr_getport(&disp->local); - - resp = isc_mem_get(disp->mgr->mctx, sizeof(*resp)); + in_port_t localport = isc_sockaddr_getport(&disp->local); + dns_dispentry_t *resp = isc_mem_get(disp->mctx, sizeof(*resp)); *resp = (dns_dispentry_t){ - .port = localport, .timeout = timeout, .peer = *dest, .loop = loop, @@ -1490,7 +1423,6 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, .sent = sent, .response = response, .arg = arg, - .link = ISC_LINK_INITIALIZER, .alink = ISC_LINK_INITIALIZER, .plink = ISC_LINK_INITIALIZER, .rlink = ISC_LINK_INITIALIZER, @@ -1507,53 +1439,49 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, isc_result_t result = setup_socket(disp, resp, dest, &localport); if (result != ISC_R_SUCCESS) { - isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp)); - UNLOCK(&disp->lock); + isc_mem_put(disp->mctx, resp, sizeof(*resp)); inc_stats(disp->mgr, dns_resstatscounter_dispsockfail); return (result); } } - /* - * Try somewhat hard to find a unique ID. Start with - * a random number unless DNS_DISPATCHOPT_FIXEDID is set, - * in which case we start with the ID passed in via *idp. - */ - if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) { - id = *idp; - } else { - id = (dns_messageid_t)isc_random16(); - } - - LOCK(&qid->lock); + isc_result_t result = ISC_R_NOMORE; + size_t i = 0; + rcu_read_lock(); do { - dns_dispentry_t *entry = NULL; - bucket = dns_hash(qid, dest, id, localport); - entry = entry_search(qid, dest, id, localport, bucket); - if (entry == NULL) { - ok = true; + /* + * Try somewhat hard to find a unique ID. Start with + * a random number unless DNS_DISPATCHOPT_FIXEDID is set, + * in which case we start with the ID passed in via *idp. + */ + resp->id = ((options & DNS_DISPATCHOPT_FIXEDID) != 0) + ? *idp + : (dns_messageid_t)isc_random16(); + + struct cds_lfht_node *node = + cds_lfht_add_unique(disp->mgr->qids, qid_hash(resp), + qid_match, resp, &resp->ht_node); + + if (node != &resp->ht_node) { + if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) { + /* + * When using fixed ID, we either must + * use it or fail + */ + goto fail; + } + } else { + result = ISC_R_SUCCESS; break; } - if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) { - /* When using fixed ID, we either must use it or fail */ - break; - } - id += qid->qid_increment; - id &= 0x0000ffff; - } while (i++ < 64); - - if (ok) { - resp->id = id; - resp->bucket = bucket; - ISC_LIST_APPEND(qid->qid_table[bucket], resp, link); + } while (i++ < QID_MAX_TRIES); +fail: + if (result != ISC_R_SUCCESS) { + isc_mem_put(disp->mctx, resp, sizeof(*resp)); + return (result); } - UNLOCK(&qid->lock); - if (!ok) { - isc_mem_put(disp->mgr->mctx, resp, sizeof(*resp)); - UNLOCK(&disp->lock); - return (ISC_R_NOMORE); - } + isc_mem_attach(disp->mctx, &resp->mctx); if (transport != NULL) { dns_transport_attach(transport, &resp->transport); @@ -1571,9 +1499,9 @@ dns_dispatch_add(dns_dispatch_t *disp, isc_loop_t *loop, unsigned int options, ? dns_resstatscounter_disprequdp : dns_resstatscounter_dispreqtcp); - UNLOCK(&disp->lock); + rcu_read_unlock(); - *idp = id; + *idp = resp->id; *respp = resp; return (ISC_R_SUCCESS); @@ -1588,7 +1516,7 @@ dns_dispatch_getnext(dns_dispentry_t *resp) { isc_result_t result = ISC_R_SUCCESS; int32_t timeout = -1; - dispentry_log(resp, LVL(90), "getnext for QID %d", resp->id); + dispentry_log(resp, ISC_LOG_DEBUG(90), "getnext for QID %d", resp->id); isc_time_t now = isc_loop_now(resp->loop); timeout = resp->timeout - dispentry_runtime(resp, &now); @@ -1596,7 +1524,7 @@ dns_dispatch_getnext(dns_dispentry_t *resp) { return (ISC_R_TIMEDOUT); } - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); switch (disp->socktype) { case isc_socktype_udp: udp_dispatch_getnext(resp, timeout); @@ -1607,11 +1535,13 @@ dns_dispatch_getnext(dns_dispentry_t *resp) { default: UNREACHABLE(); } - UNLOCK(&disp->lock); return (result); } +/* + * NOTE: Must be RCU read locked! + */ static void udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { REQUIRE(VALID_RESPONSE(resp)); @@ -1619,14 +1549,12 @@ udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { REQUIRE(VALID_DISPATCHMGR(resp->disp->mgr)); dns_dispatch_t *disp = resp->disp; - dns_dispatchmgr_t *mgr = disp->mgr; - dns_qid_t *qid = mgr->qid; bool respond = false; - LOCK(&disp->lock); - dispentry_log(resp, LVL(90), + REQUIRE(disp->tid == isc_tid()); + dispentry_log(resp, ISC_LOG_DEBUG(90), "canceling response: %s, %s/%s (%s/%s), " - "requests %u", + "requests %" PRIuFAST32, isc_result_totext(result), state2str(resp->state), resp->reading ? "reading" : "not reading", state2str(disp->state), @@ -1647,8 +1575,8 @@ udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { case DNS_DISPATCHSTATE_CONNECTED: if (resp->reading) { respond = true; - dispentry_log(resp, LVL(90), "canceling read on %p", - resp->handle); + dispentry_log(resp, ISC_LOG_DEBUG(90), + "canceling read on %p", resp->handle); isc_nm_cancelread(resp->handle); } break; @@ -1662,21 +1590,21 @@ udp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { dec_stats(disp->mgr, dns_resstatscounter_disprequdp); - LOCK(&qid->lock); - ISC_LIST_UNLINK(qid->qid_table[resp->bucket], resp, link); - UNLOCK(&qid->lock); + (void)cds_lfht_del(disp->mgr->qids, &resp->ht_node); + resp->state = DNS_DISPATCHSTATE_CANCELED; unlock: - UNLOCK(&disp->lock); - if (respond) { - dispentry_log(resp, LVL(90), "read callback: %s", + dispentry_log(resp, ISC_LOG_DEBUG(90), "read callback: %s", isc_result_totext(result)); resp->response(result, NULL, resp->arg); } } +/* + * NOTE: Must be RCU read locked! + */ static void tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { REQUIRE(VALID_RESPONSE(resp)); @@ -1684,14 +1612,12 @@ tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { REQUIRE(VALID_DISPATCHMGR(resp->disp->mgr)); dns_dispatch_t *disp = resp->disp; - dns_dispatchmgr_t *mgr = disp->mgr; - dns_qid_t *qid = mgr->qid; dns_displist_t resps = ISC_LIST_INITIALIZER; - LOCK(&disp->lock); - dispentry_log(resp, LVL(90), + REQUIRE(disp->tid == isc_tid()); + dispentry_log(resp, ISC_LOG_DEBUG(90), "canceling response: %s, %s/%s (%s/%s), " - "requests %u", + "requests %" PRIuFAST32, isc_result_totext(result), state2str(resp->state), resp->reading ? "reading" : "not reading", state2str(disp->state), @@ -1736,14 +1662,14 @@ tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { isc_nmhandle_settimeout(disp->handle, 1000); if (!disp->reading) { - dispentry_log(resp, LVL(90), + dispentry_log(resp, ISC_LOG_DEBUG(90), "final 1 second timeout on %p", disp->handle); tcp_startrecv(disp, NULL); } #else if (disp->reading) { - dispentry_log(resp, LVL(90), + dispentry_log(resp, ISC_LOG_DEBUG(90), "canceling read on %p", disp->handle); isc_nm_cancelread(disp->handle); @@ -1761,13 +1687,11 @@ tcp_dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { dec_stats(disp->mgr, dns_resstatscounter_dispreqtcp); - LOCK(&qid->lock); - ISC_LIST_UNLINK(qid->qid_table[resp->bucket], resp, link); - UNLOCK(&qid->lock); + (void)cds_lfht_del(disp->mgr->qids, &resp->ht_node); + resp->state = DNS_DISPATCHSTATE_CANCELED; unlock: - UNLOCK(&disp->lock); /* * NOTE: Calling the response callback directly from here should be done @@ -1787,6 +1711,7 @@ dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { dns_dispatch_t *disp = resp->disp; + rcu_read_lock(); switch (disp->socktype) { case isc_socktype_udp: udp_dispentry_cancel(resp, result); @@ -1797,6 +1722,7 @@ dispentry_cancel(dns_dispentry_t *resp, isc_result_t result) { default: UNREACHABLE(); } + rcu_read_unlock(); } void @@ -1814,11 +1740,11 @@ static void udp_startrecv(isc_nmhandle_t *handle, dns_dispentry_t *resp) { REQUIRE(VALID_RESPONSE(resp)); - dispentry_log(resp, LVL(90), "attaching handle %p to %p", handle, - &resp->handle); + dispentry_log(resp, ISC_LOG_DEBUG(90), "attaching handle %p to %p", + handle, &resp->handle); isc_nmhandle_attach(handle, &resp->handle); dns_dispentry_ref(resp); /* DISPENTRY003 */ - dispentry_log(resp, LVL(90), "reading"); + dispentry_log(resp, ISC_LOG_DEBUG(90), "reading"); isc_nm_read(resp->handle, udp_recv, resp); resp->reading = true; } @@ -1830,10 +1756,11 @@ tcp_startrecv(dns_dispatch_t *disp, dns_dispentry_t *resp) { dns_dispatch_ref(disp); /* DISPATCH002 */ if (resp != NULL) { - dispentry_log(resp, LVL(90), "reading from %p", disp->handle); + dispentry_log(resp, ISC_LOG_DEBUG(90), "reading from %p", + disp->handle); INSIST(!isc_time_isepoch(&resp->start)); } else { - dispatch_log(disp, LVL(90), + dispatch_log(disp, ISC_LOG_DEBUG(90), "TCP reading without response from %p", disp->handle); } @@ -1866,11 +1793,12 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { ISC_SOCKADDR_FORMATSIZE); } - dispatch_log(disp, LVL(90), "connected from %s to %s: %s", - localbuf, peerbuf, isc_result_totext(eresult)); + dispatch_log(disp, ISC_LOG_DEBUG(90), + "connected from %s to %s: %s", localbuf, peerbuf, + isc_result_totext(eresult)); } - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); INSIST(disp->state == DNS_DISPATCHSTATE_CONNECTING); /* @@ -1889,7 +1817,7 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { resp->state = DNS_DISPATCHSTATE_CONNECTED; ISC_LIST_APPEND(disp->active, resp, alink); resp->reading = true; - dispentry_log(resp, LVL(90), "start reading"); + dispentry_log(resp, ISC_LOG_DEBUG(90), "start reading"); } else { resp->state = DNS_DISPATCHSTATE_NONE; } @@ -1906,13 +1834,11 @@ tcp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { disp->state = DNS_DISPATCHSTATE_NONE; } - UNLOCK(&disp->lock); - for (resp = ISC_LIST_HEAD(resps); resp != NULL; resp = next) { next = ISC_LIST_NEXT(resp, rlink); ISC_LIST_UNLINK(resps, resp, rlink); - dispentry_log(resp, LVL(90), "connect callback: %s", + dispentry_log(resp, ISC_LOG_DEBUG(90), "connect callback: %s", isc_result_totext(resp->result)); resp->connected(resp->result, NULL, resp->arg); dns_dispentry_detach(&resp); /* DISPENTRY005 */ @@ -1926,11 +1852,10 @@ udp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { dns_dispentry_t *resp = (dns_dispentry_t *)arg; dns_dispatch_t *disp = resp->disp; - dispentry_log(resp, LVL(90), "connected: %s", + dispentry_log(resp, ISC_LOG_DEBUG(90), "connected: %s", isc_result_totext(eresult)); - LOCK(&disp->lock); - + REQUIRE(disp->tid == isc_tid()); switch (resp->state) { case DNS_DISPATCHSTATE_CANCELED: eresult = ISC_R_CANCELED; @@ -1958,7 +1883,6 @@ udp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { /* probably a port collision; try a different one */ result = setup_socket(disp, resp, &resp->peer, &localport); if (result == ISC_R_SUCCESS) { - UNLOCK(&disp->lock); udp_dispatch_connect(disp, resp); goto detach; } @@ -1970,9 +1894,8 @@ udp_connected(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) { break; } unlock: - UNLOCK(&disp->lock); - dispentry_log(resp, LVL(90), "connect callback: %s", + dispentry_log(resp, ISC_LOG_DEBUG(90), "connect callback: %s", isc_result_totext(eresult)); resp->connected(eresult, NULL, resp->arg); @@ -1982,12 +1905,11 @@ detach: static void udp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); resp->state = DNS_DISPATCHSTATE_CONNECTING; resp->start = isc_loop_now(resp->loop); dns_dispentry_ref(resp); /* DISPENTRY004 */ ISC_LIST_APPEND(disp->pending, resp, plink); - UNLOCK(&disp->lock); isc_nm_udpconnect(disp->mgr->nm, &resp->local, &resp->peer, udp_connected, resp, resp->timeout); @@ -2008,7 +1930,7 @@ tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { result = dns_transport_get_tlsctx( resp->transport, &resp->peer, resp->tlsctx_cache, - resp->disp->mgr->mctx, &tlsctx, &sess_cache); + resp->mctx, &tlsctx, &sess_cache); if (result != ISC_R_SUCCESS) { return (result); @@ -2017,7 +1939,7 @@ tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { } /* Check whether the dispatch is already connecting or connected. */ - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); switch (disp->state) { case DNS_DISPATCHSTATE_NONE: /* First connection, continue with connecting */ @@ -2026,7 +1948,6 @@ tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { resp->start = isc_loop_now(resp->loop); dns_dispentry_ref(resp); /* DISPENTRY005 */ ISC_LIST_APPEND(disp->pending, resp, plink); - UNLOCK(&disp->lock); char localbuf[ISC_SOCKADDR_FORMATSIZE]; char peerbuf[ISC_SOCKADDR_FORMATSIZE]; @@ -2037,7 +1958,7 @@ tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { ISC_SOCKADDR_FORMATSIZE); dns_dispatch_ref(disp); /* DISPATCH003 */ - dispentry_log(resp, LVL(90), + dispentry_log(resp, ISC_LOG_DEBUG(90), "connecting from %s to %s, timeout %u", localbuf, peerbuf, resp->timeout); @@ -2052,7 +1973,6 @@ tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { resp->start = isc_loop_now(resp->loop); dns_dispentry_ref(resp); /* DISPENTRY005 */ ISC_LIST_APPEND(disp->pending, resp, plink); - UNLOCK(&disp->lock); break; case DNS_DISPATCHSTATE_CONNECTED: @@ -2061,7 +1981,8 @@ tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { /* Add the resp to the reading list */ ISC_LIST_APPEND(disp->active, resp, alink); - dispentry_log(resp, LVL(90), "already connected; attaching"); + dispentry_log(resp, ISC_LOG_DEBUG(90), + "already connected; attaching"); resp->reading = true; if (!disp->reading) { @@ -2069,9 +1990,8 @@ tcp_dispatch_connect(dns_dispatch_t *disp, dns_dispentry_t *resp) { tcp_startrecv(disp, resp); } - UNLOCK(&disp->lock); /* We are already connected; call the connected cb */ - dispentry_log(resp, LVL(90), "connect callback: %s", + dispentry_log(resp, ISC_LOG_DEBUG(90), "connect callback: %s", isc_result_totext(ISC_R_SUCCESS)); resp->connected(ISC_R_SUCCESS, NULL, resp->arg); break; @@ -2113,7 +2033,8 @@ send_done(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) { REQUIRE(VALID_DISPATCH(disp)); - dispentry_log(resp, LVL(90), "sent: %s", isc_result_totext(result)); + dispentry_log(resp, ISC_LOG_DEBUG(90), "sent: %s", + isc_result_totext(result)); resp->sent(result, NULL, resp->arg); @@ -2130,7 +2051,7 @@ tcp_dispatch_getnext(dns_dispatch_t *disp, dns_dispentry_t *resp, int32_t timeout) { REQUIRE(timeout <= INT16_MAX); - dispentry_log(resp, LVL(90), "continue reading"); + dispentry_log(resp, ISC_LOG_DEBUG(90), "continue reading"); if (!resp->reading) { ISC_LIST_APPEND(disp->active, resp, alink); @@ -2162,7 +2083,7 @@ udp_dispatch_getnext(dns_dispentry_t *resp, int32_t timeout) { isc_nmhandle_settimeout(resp->handle, timeout); } - dispentry_log(resp, LVL(90), "continue reading"); + dispentry_log(resp, ISC_LOG_DEBUG(90), "continue reading"); dns_dispentry_ref(resp); /* DISPENTRY003 */ isc_nm_read(resp->handle, udp_recv, resp); @@ -2176,9 +2097,9 @@ dns_dispatch_resume(dns_dispentry_t *resp, uint16_t timeout) { dns_dispatch_t *disp = resp->disp; - dispentry_log(resp, LVL(90), "resume"); + dispentry_log(resp, ISC_LOG_DEBUG(90), "resume"); - LOCK(&disp->lock); + REQUIRE(disp->tid == isc_tid()); switch (disp->socktype) { case isc_socktype_udp: { udp_dispatch_getnext(resp, timeout); @@ -2192,8 +2113,6 @@ dns_dispatch_resume(dns_dispentry_t *resp, uint16_t timeout) { default: UNREACHABLE(); } - - UNLOCK(&disp->lock); } void @@ -2204,7 +2123,7 @@ dns_dispatch_send(dns_dispentry_t *resp, isc_region_t *r) { dns_dispatch_t *disp = resp->disp; isc_nmhandle_t *sendhandle = NULL; - dispentry_log(resp, LVL(90), "sending"); + dispentry_log(resp, ISC_LOG_DEBUG(90), "sending"); switch (disp->socktype) { case isc_socktype_udp: isc_nmhandle_attach(resp->handle, &sendhandle); @@ -2253,31 +2172,25 @@ dns_dispentry_getlocaladdress(dns_dispentry_t *resp, isc_sockaddr_t *addrp) { dns_dispatch_t * dns_dispatchset_get(dns_dispatchset_t *dset) { - dns_dispatch_t *disp = NULL; + uint32_t tid = isc_tid(); /* check that dispatch set is configured */ if (dset == NULL || dset->ndisp == 0) { return (NULL); } - LOCK(&dset->lock); - disp = dset->dispatches[dset->cur]; - dset->cur++; - if (dset->cur == dset->ndisp) { - dset->cur = 0; - } - UNLOCK(&dset->lock); + INSIST(tid < dset->ndisp); - return (disp); + return (dset->dispatches[tid]); } isc_result_t dns_dispatchset_create(isc_mem_t *mctx, dns_dispatch_t *source, - dns_dispatchset_t **dsetp, int n) { + dns_dispatchset_t **dsetp, uint32_t ndisp) { isc_result_t result; dns_dispatchset_t *dset = NULL; dns_dispatchmgr_t *mgr = NULL; - int i, j; + size_t i; REQUIRE(VALID_DISPATCH(source)); REQUIRE(source->socktype == isc_socktype_udp); @@ -2286,63 +2199,51 @@ dns_dispatchset_create(isc_mem_t *mctx, dns_dispatch_t *source, mgr = source->mgr; dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t)); - *dset = (dns_dispatchset_t){ .ndisp = n }; - - isc_mutex_init(&dset->lock); - - dset->dispatches = isc_mem_cget(mctx, n, sizeof(dns_dispatch_t *)); + *dset = (dns_dispatchset_t){ .ndisp = ndisp }; isc_mem_attach(mctx, &dset->mctx); + dset->dispatches = isc_mem_cget(dset->mctx, ndisp, + sizeof(dns_dispatch_t *)); + dset->dispatches[0] = NULL; dns_dispatch_attach(source, &dset->dispatches[0]); /* DISPATCH004 */ - LOCK(&mgr->lock); - for (i = 1; i < n; i++) { - dset->dispatches[i] = NULL; - result = dispatch_createudp(mgr, &source->local, + for (i = 1; i < dset->ndisp; i++) { + result = dispatch_createudp(mgr, &source->local, i, &dset->dispatches[i]); if (result != ISC_R_SUCCESS) { goto fail; } } - UNLOCK(&mgr->lock); *dsetp = dset; return (ISC_R_SUCCESS); fail: - UNLOCK(&mgr->lock); - - for (j = 0; j < i; j++) { + for (size_t j = 0; j < i; j++) { dns_dispatch_detach(&(dset->dispatches[j])); /* DISPATCH004 */ } - isc_mem_cput(mctx, dset->dispatches, n, sizeof(dns_dispatch_t *)); - if (dset->mctx == mctx) { - isc_mem_detach(&dset->mctx); - } + isc_mem_cput(dset->mctx, dset->dispatches, ndisp, + sizeof(dns_dispatch_t *)); - isc_mutex_destroy(&dset->lock); - isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t)); + isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t)); return (result); } void dns_dispatchset_destroy(dns_dispatchset_t **dsetp) { - dns_dispatchset_t *dset = NULL; - int i; - REQUIRE(dsetp != NULL && *dsetp != NULL); - dset = *dsetp; + dns_dispatchset_t *dset = *dsetp; *dsetp = NULL; - for (i = 0; i < dset->ndisp; i++) { + + for (size_t i = 0; i < dset->ndisp; i++) { dns_dispatch_detach(&(dset->dispatches[i])); /* DISPATCH004 */ } isc_mem_cput(dset->mctx, dset->dispatches, dset->ndisp, sizeof(dns_dispatch_t *)); - isc_mutex_destroy(&dset->lock); isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t)); } diff --git a/lib/dns/include/dns/dispatch.h b/lib/dns/include/dns/dispatch.h index d5caf9af78..0e302f288e 100644 --- a/lib/dns/include/dns/dispatch.h +++ b/lib/dns/include/dns/dispatch.h @@ -69,9 +69,7 @@ ISC_LANG_BEGINDECLS struct dns_dispatchset { isc_mem_t *mctx; dns_dispatch_t **dispatches; - int ndisp; - int cur; - isc_mutex_t lock; + uint32_t ndisp; }; /* @@ -79,7 +77,8 @@ struct dns_dispatchset { #define DNS_DISPATCHOPT_FIXEDID 0x00000001U isc_result_t -dns_dispatchmgr_create(isc_mem_t *mctx, isc_nm_t *nm, dns_dispatchmgr_t **mgrp); +dns_dispatchmgr_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, isc_nm_t *nm, + dns_dispatchmgr_t **mgrp); /*%< * Creates a new dispatchmgr object, and sets the available ports * to the default range (1024-65535). @@ -358,7 +357,7 @@ dns_dispatchset_get(dns_dispatchset_t *dset); isc_result_t dns_dispatchset_create(isc_mem_t *mctx, dns_dispatch_t *source, - dns_dispatchset_t **dsetp, int n); + dns_dispatchset_t **dsetp, uint32_t n); /*%< * Given a valid dispatch 'source', create a dispatch set containing * 'n' UDP dispatches, with the remainder filled out by clones of the diff --git a/lib/dns/include/dns/resolver.h b/lib/dns/include/dns/resolver.h index 0c6af9c816..01725a13ec 100644 --- a/lib/dns/include/dns/resolver.h +++ b/lib/dns/include/dns/resolver.h @@ -165,9 +165,8 @@ enum { #define DNS_QMIN_MAX_NO_DELEGATION 3 isc_result_t -dns_resolver_create(dns_view_t *view, isc_loopmgr_t *loopmgr, - unsigned int ndisp, isc_nm_t *nm, unsigned int options, - isc_tlsctx_cache_t *tlsctx_cache, +dns_resolver_create(dns_view_t *view, isc_loopmgr_t *loopmgr, isc_nm_t *nm, + unsigned int options, isc_tlsctx_cache_t *tlsctx_cache, dns_dispatch_t *dispatchv4, dns_dispatch_t *dispatchv6, dns_resolver_t **resp); @@ -183,17 +182,15 @@ dns_resolver_create(dns_view_t *view, isc_loopmgr_t *loopmgr, * *\li 'view' is a valid view. * - *\li 'ndisp' > 0. - * *\li 'nm' is a valid network manager. * *\li 'tlsctx_cache' != NULL. * *\li 'dispatchv4' is a dispatch with an IPv4 UDP socket, or is NULL. - * If not NULL, 'ndisp' clones of it will be created by the resolver. + * If not NULL, clones per loop of it will be created by the resolver. * *\li 'dispatchv6' is a dispatch with an IPv6 UDP socket, or is NULL. - * If not NULL, 'ndisp' clones of it will be created by the resolver. + * If not NULL, clones per loop of it will be created by the resolver. * *\li resp != NULL && *resp == NULL. * diff --git a/lib/dns/include/dns/view.h b/lib/dns/include/dns/view.h index 7e1babaa16..8904a5a485 100644 --- a/lib/dns/include/dns/view.h +++ b/lib/dns/include/dns/view.h @@ -365,8 +365,8 @@ dns_view_weakdetach(dns_view_t **targetp); isc_result_t dns_view_createresolver(dns_view_t *view, isc_loopmgr_t *loopmgr, - unsigned int ndisp, isc_nm_t *netmgr, - unsigned int options, isc_tlsctx_cache_t *tlsctx_cache, + isc_nm_t *netmgr, unsigned int options, + isc_tlsctx_cache_t *tlsctx_cache, dns_dispatch_t *dispatchv4, dns_dispatch_t *dispatchv6); /*%< * Create a resolver and address database for the view. diff --git a/lib/dns/request.c b/lib/dns/request.c index a5a7291570..46affed3f1 100644 --- a/lib/dns/request.c +++ b/lib/dns/request.c @@ -54,8 +54,8 @@ struct dns_requestmgr { atomic_bool shuttingdown; dns_dispatchmgr_t *dispatchmgr; - dns_dispatch_t *dispatchv4; - dns_dispatch_t *dispatchv6; + dns_dispatchset_t *dispatches4; + dns_dispatchset_t *dispatches6; dns_requestlist_t *requests; }; @@ -150,10 +150,14 @@ dns_requestmgr_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, dns_dispatchmgr_attach(dispatchmgr, &requestmgr->dispatchmgr); if (dispatchv4 != NULL) { - dns_dispatch_attach(dispatchv4, &requestmgr->dispatchv4); + dns_dispatchset_create(requestmgr->mctx, dispatchv4, + &requestmgr->dispatches4, + isc_loopmgr_nloops(requestmgr->loopmgr)); } if (dispatchv6 != NULL) { - dns_dispatch_attach(dispatchv6, &requestmgr->dispatchv6); + dns_dispatchset_create(requestmgr->mctx, dispatchv6, + &requestmgr->dispatches6, + isc_loopmgr_nloops(requestmgr->loopmgr)); } isc_refcount_init(&requestmgr->references, 1); @@ -232,11 +236,11 @@ requestmgr_destroy(dns_requestmgr_t *requestmgr) { isc_mem_cput(requestmgr->mctx, requestmgr->requests, nloops, sizeof(requestmgr->requests[0])); - if (requestmgr->dispatchv4 != NULL) { - dns_dispatch_detach(&requestmgr->dispatchv4); + if (requestmgr->dispatches4 != NULL) { + dns_dispatchset_destroy(&requestmgr->dispatches4); } - if (requestmgr->dispatchv6 != NULL) { - dns_dispatch_detach(&requestmgr->dispatchv6); + if (requestmgr->dispatches6 != NULL) { + dns_dispatchset_destroy(&requestmgr->dispatches6); } if (requestmgr->dispatchmgr != NULL) { dns_dispatchmgr_detach(&requestmgr->dispatchmgr); @@ -360,11 +364,11 @@ udp_dispatch(dns_requestmgr_t *requestmgr, const isc_sockaddr_t *srcaddr, if (srcaddr == NULL) { switch (isc_sockaddr_pf(destaddr)) { case PF_INET: - disp = requestmgr->dispatchv4; + disp = dns_dispatchset_get(requestmgr->dispatches4); break; case PF_INET6: - disp = requestmgr->dispatchv6; + disp = dns_dispatchset_get(requestmgr->dispatches6); break; default: diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index c21513dda8..f65b7e4a92 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -9930,9 +9930,8 @@ spillattimer_countdown(void *arg) { } isc_result_t -dns_resolver_create(dns_view_t *view, isc_loopmgr_t *loopmgr, - unsigned int ndisp, isc_nm_t *nm, unsigned int options, - isc_tlsctx_cache_t *tlsctx_cache, +dns_resolver_create(dns_view_t *view, isc_loopmgr_t *loopmgr, isc_nm_t *nm, + unsigned int options, isc_tlsctx_cache_t *tlsctx_cache, dns_dispatch_t *dispatchv4, dns_dispatch_t *dispatchv6, dns_resolver_t **resp) { dns_resolver_t *res = NULL; @@ -9942,7 +9941,6 @@ dns_resolver_create(dns_view_t *view, isc_loopmgr_t *loopmgr, */ REQUIRE(DNS_VIEW_VALID(view)); - REQUIRE(ndisp > 0); REQUIRE(resp != NULL && *resp == NULL); REQUIRE(tlsctx_cache != NULL); REQUIRE(dispatchv4 != NULL || dispatchv6 != NULL); @@ -9989,12 +9987,12 @@ dns_resolver_create(dns_view_t *view, isc_loopmgr_t *loopmgr, if (dispatchv4 != NULL) { dns_dispatchset_create(res->mctx, dispatchv4, &res->dispatches4, - ndisp); + isc_loopmgr_nloops(res->loopmgr)); } if (dispatchv6 != NULL) { dns_dispatchset_create(res->mctx, dispatchv6, &res->dispatches6, - ndisp); + isc_loopmgr_nloops(res->loopmgr)); } isc_mutex_init(&res->lock); diff --git a/lib/dns/view.c b/lib/dns/view.c index 6855d02eda..e07ff434eb 100644 --- a/lib/dns/view.c +++ b/lib/dns/view.c @@ -589,8 +589,8 @@ dns_view_weakdetach(dns_view_t **viewp) { isc_result_t dns_view_createresolver(dns_view_t *view, isc_loopmgr_t *loopmgr, - unsigned int ndisp, isc_nm_t *netmgr, - unsigned int options, isc_tlsctx_cache_t *tlsctx_cache, + isc_nm_t *netmgr, unsigned int options, + isc_tlsctx_cache_t *tlsctx_cache, dns_dispatch_t *dispatchv4, dns_dispatch_t *dispatchv6) { isc_result_t result; @@ -601,7 +601,7 @@ dns_view_createresolver(dns_view_t *view, isc_loopmgr_t *loopmgr, REQUIRE(view->resolver == NULL); REQUIRE(view->dispatchmgr != NULL); - result = dns_resolver_create(view, loopmgr, ndisp, netmgr, options, + result = dns_resolver_create(view, loopmgr, netmgr, options, tlsctx_cache, dispatchv4, dispatchv6, &view->resolver); if (result != ISC_R_SUCCESS) { diff --git a/lib/isc/include/isc/sockaddr.h b/lib/isc/include/isc/sockaddr.h index 5d86d49895..0e6452e614 100644 --- a/lib/isc/include/isc/sockaddr.h +++ b/lib/isc/include/isc/sockaddr.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include diff --git a/lib/isc/include/isc/tid.h b/lib/isc/include/isc/tid.h index 20bafac595..e630c4f24f 100644 --- a/lib/isc/include/isc/tid.h +++ b/lib/isc/include/isc/tid.h @@ -28,11 +28,11 @@ isc_tid_count(void); * Returns the number of threads. */ -extern thread_local uint32_t tid_local; +extern thread_local uint32_t isc__tid_local; static inline uint32_t isc_tid(void) { - return (tid_local); + return (isc__tid_local); } /*%< * Returns the thread ID of the currently-running loop. diff --git a/lib/isc/tid.c b/lib/isc/tid.c index 397fac1c8f..d19a413554 100644 --- a/lib/isc/tid.c +++ b/lib/isc/tid.c @@ -23,7 +23,7 @@ /** * Private */ -thread_local uint32_t tid_local = ISC_TID_UNKNOWN; +thread_local uint32_t isc__tid_local = ISC_TID_UNKNOWN; /* * Zero is a better nonsense value in this case than ISC_TID_UNKNOWN; @@ -37,8 +37,8 @@ static uint32_t tid_count = 0; void isc__tid_init(uint32_t tid) { - REQUIRE(tid_local == ISC_TID_UNKNOWN || tid_local == tid); - tid_local = tid; + REQUIRE(isc__tid_local == ISC_TID_UNKNOWN || isc__tid_local == tid); + isc__tid_local = tid; } void diff --git a/lib/ns/include/ns/interfacemgr.h b/lib/ns/include/ns/interfacemgr.h index 651f4d429b..aea5b2dfac 100644 --- a/lib/ns/include/ns/interfacemgr.h +++ b/lib/ns/include/ns/interfacemgr.h @@ -65,9 +65,6 @@ #define NS_INTERFACEFLAG_ANYADDR 0x01U /*%< bound to "any" address */ #define NS_INTERFACEFLAG_LISTENING 0x02U /*%< listening */ -#define MAX_UDP_DISPATCH \ - 128 /*%< Maximum number of UDP dispatchers \ - * to start per interface */ /*% The nameserver interface structure */ struct ns_interface { unsigned int magic; /*%< Magic number. */ diff --git a/tests/dns/dispatch_test.c b/tests/dns/dispatch_test.c index 0780003bab..bce2565d82 100644 --- a/tests/dns/dispatch_test.c +++ b/tests/dns/dispatch_test.c @@ -245,7 +245,8 @@ ISC_LOOP_TEST_IMPL(dispatchset_create) { UNUSED(arg); - result = dns_dispatchmgr_create(mctx, connect_nm, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, connect_nm, + &dispatchmgr); assert_int_equal(result, ISC_R_SUCCESS); result = make_dispatchset(1, &dset); @@ -261,15 +262,17 @@ ISC_LOOP_TEST_IMPL(dispatchset_create) { isc_loopmgr_shutdown(loopmgr); } -/* test dispatch set round-robin */ +/* test dispatch set per-loop dispatch */ ISC_LOOP_TEST_IMPL(dispatchset_get) { isc_result_t result; dns_dispatchset_t *dset = NULL; dns_dispatch_t *d1, *d2, *d3, *d4, *d5; + uint32_t tid_saved; UNUSED(arg); - result = dns_dispatchmgr_create(mctx, connect_nm, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, connect_nm, + &dispatchmgr); assert_int_equal(result, ISC_R_SUCCESS); result = make_dispatchset(1, &dset); @@ -291,10 +294,21 @@ ISC_LOOP_TEST_IMPL(dispatchset_get) { result = make_dispatchset(4, &dset); assert_int_equal(result, ISC_R_SUCCESS); + /* + * Temporarily modify and then restore the current thread's + * ID value, in order to confirm that different threads get + * different dispatch sets but the same thread gets the same + * one. + */ + tid_saved = isc__tid_local; d1 = dns_dispatchset_get(dset); + isc__tid_local++; d2 = dns_dispatchset_get(dset); + isc__tid_local++; d3 = dns_dispatchset_get(dset); + isc__tid_local++; d4 = dns_dispatchset_get(dset); + isc__tid_local = tid_saved; d5 = dns_dispatchset_get(dset); assert_ptr_equal(d1, d5); @@ -451,7 +465,8 @@ ISC_LOOP_TEST_IMPL(dispatch_timeout_tcp_connect) { testdata.region.base = testdata.message; testdata.region.length = sizeof(testdata.message); - result = dns_dispatchmgr_create(mctx, connect_nm, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, connect_nm, + &dispatchmgr); assert_int_equal(result, ISC_R_SUCCESS); result = dns_dispatch_createtcp(dispatchmgr, &tcp_connect_addr, @@ -496,7 +511,8 @@ ISC_LOOP_TEST_IMPL(dispatch_timeout_tcp_response) { isc_loop_teardown(isc_loop_main(loopmgr), stop_listening, sock); /* Client */ - result = dns_dispatchmgr_create(mctx, connect_nm, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, connect_nm, + &dispatchmgr); assert_int_equal(result, ISC_R_SUCCESS); result = dns_dispatch_createtcp(dispatchmgr, &tcp_connect_addr, @@ -530,7 +546,8 @@ ISC_LOOP_TEST_IMPL(dispatch_tcp_response) { testdata.region.base = testdata.message; testdata.region.length = sizeof(testdata.message); - result = dns_dispatchmgr_create(mctx, connect_nm, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, connect_nm, + &dispatchmgr); assert_int_equal(result, ISC_R_SUCCESS); result = dns_dispatch_createtcp(dispatchmgr, &tcp_connect_addr, @@ -567,7 +584,8 @@ ISC_LOOP_TEST_IMPL(dispatch_tls_response) { testdata.region.base = testdata.message; testdata.region.length = sizeof(testdata.message); - result = dns_dispatchmgr_create(mctx, connect_nm, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, connect_nm, + &dispatchmgr); assert_int_equal(result, ISC_R_SUCCESS); result = dns_dispatch_createtcp(dispatchmgr, &tls_connect_addr, @@ -594,7 +612,8 @@ ISC_LOOP_TEST_IMPL(dispatch_timeout_udp_response) { uint16_t id; /* Server */ - result = dns_dispatchmgr_create(mctx, connect_nm, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, connect_nm, + &dispatchmgr); assert_int_equal(result, ISC_R_SUCCESS); result = isc_nm_listenudp(netmgr, ISC_NM_LISTEN_ONE, &udp_server_addr, @@ -636,7 +655,8 @@ ISC_LOOP_TEST_IMPL(dispatch_getnext) { testdata.region.base = testdata.message; testdata.region.length = sizeof(testdata.message); - result = dns_dispatchmgr_create(mctx, connect_nm, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, connect_nm, + &dispatchmgr); assert_int_equal(result, ISC_R_SUCCESS); result = dns_dispatch_createudp(dispatchmgr, &udp_connect_addr, diff --git a/tests/dns/resolver_test.c b/tests/dns/resolver_test.c index 09106aadd6..3f53f9f351 100644 --- a/tests/dns/resolver_test.c +++ b/tests/dns/resolver_test.c @@ -76,7 +76,7 @@ mkres(dns_resolver_t **resolverp) { isc_result_t result; isc_tlsctx_cache_create(mctx, &tlsctx_cache); - result = dns_resolver_create(view, loopmgr, 1, netmgr, 0, tlsctx_cache, + result = dns_resolver_create(view, loopmgr, netmgr, 0, tlsctx_cache, dispatch, NULL, resolverp); assert_int_equal(result, ISC_R_SUCCESS); } diff --git a/tests/libtest/dns.c b/tests/libtest/dns.c index d50c012caf..73fb630807 100644 --- a/tests/libtest/dns.c +++ b/tests/libtest/dns.c @@ -68,7 +68,8 @@ dns_test_makeview(const char *name, bool with_dispatchmgr, bool with_cache, dns_dispatchmgr_t *dispatchmgr = NULL; if (with_dispatchmgr) { - result = dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, netmgr, + &dispatchmgr); if (result != ISC_R_SUCCESS) { return (result); } diff --git a/tests/libtest/ns.c b/tests/libtest/ns.c index 19b2f67699..4c49474ccc 100644 --- a/tests/libtest/ns.c +++ b/tests/libtest/ns.c @@ -85,7 +85,7 @@ setup_server(void **state) { ns_server_create(mctx, matchview, &sctx); - result = dns_dispatchmgr_create(mctx, netmgr, &dispatchmgr); + result = dns_dispatchmgr_create(mctx, loopmgr, netmgr, &dispatchmgr); if (result != ISC_R_SUCCESS) { goto cleanup; }