From 43b4cebc714c9d16666a7b858813e364bc14486a Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Fri, 24 Oct 2025 17:01:14 -0700 Subject: [PATCH 1/2] Remove maybe_cancel_validators() function When shutting down an fctx, validators can just be canceled without checking whether there are pending finds. (cherry picked from commit e62895e999e03e1e64d62c5da942b17fd15b8c75) --- lib/dns/resolver.c | 45 ++++++++++++++------------------------------- 1 file changed, 14 insertions(+), 31 deletions(-) diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index 1c1f5a31d0..4aa1554f11 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -681,8 +681,6 @@ ncache_adderesult(dns_message_t *message, dns_db_t *cache, dns_dbnode_t *node, static void validated(void *arg); static void -maybe_cancel_validators(fetchctx_t *fctx); -static void add_bad(fetchctx_t *fctx, dns_message_t *rmessage, dns_adbaddrinfo_t *addrinfo, isc_result_t reason, badnstype_t badtype); static isc_result_t @@ -1747,11 +1745,14 @@ fctx__done(fetchctx_t *fctx, isc_result_t result, const char *func, fctx_stoptimer(fctx); /* - * Cancel all pending validators. Note that this must be done - * without the fctx lock held, since that could cause - * deadlock. + * Cancel all pending validators. */ - maybe_cancel_validators(fctx); + dns_validator_t *validator = NULL; + for (validator = ISC_LIST_HEAD(fctx->validators); validator != NULL; + validator = ISC_LIST_NEXT(validator, link)) + { + dns_validator_cancel(validator); + } if (fctx->nsfetch != NULL) { dns_resolver_cancelfetch(fctx->nsfetch); @@ -5068,30 +5069,6 @@ clone_results(fetchctx_t *fctx) { #define CHASE(r) (((r)->attributes & DNS_RDATASETATTR_CHASE) != 0) #define CHECKNAMES(r) (((r)->attributes & DNS_RDATASETATTR_CHECKNAMES) != 0) -/* - * Cancel validators associated with '*fctx' if it is ready to be - * destroyed (i.e., no queries waiting for it and no pending ADB finds). - * Caller must hold fctx bucket lock. - * - * Requires: - * '*fctx' is shutting down. - */ -static void -maybe_cancel_validators(fetchctx_t *fctx) { - if (atomic_load_acquire(&fctx->pending) != 0 || - atomic_load_acquire(&fctx->nqueries) != 0) - { - return; - } - - REQUIRE(SHUTTINGDOWN(fctx)); - for (dns_validator_t *validator = ISC_LIST_HEAD(fctx->validators); - validator != NULL; validator = ISC_LIST_NEXT(validator, link)) - { - dns_validator_cancel(validator); - } -} - /* * typemap with just RRSIG(46) and NSEC(47) bits set. * @@ -5515,7 +5492,13 @@ validated(void *arg) { */ dns_db_detachnode(fctx->cache, &node); if (SHUTTINGDOWN(fctx)) { - maybe_cancel_validators(fctx); + dns_validator_t *validator = NULL; + for (validator = ISC_LIST_HEAD(fctx->validators); + validator != NULL; + validator = ISC_LIST_NEXT(validator, link)) + { + dns_validator_cancel(validator); + } } UNLOCK(&fctx->lock); goto cleanup_fetchctx; From a8af46a39c689aa43ea47a77c9edd342d8b13a83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Thu, 23 Oct 2025 13:11:45 +0200 Subject: [PATCH 2/2] Reduce the number of outgoing queries The dns_resolver mode of operation is to resolve all the domains as it iterates the DNS tree to fill up the cache as quickly as possible. This commit reduces the number of outgoing queries by reducing the number of remote fetches started for the nameserver addresses resolution via dns_adb_createfind() to a smaller number per depth of the recursion since the delegation point (3 2 1 0) - where 0 means only create fetch on demand if we don't have any addresses yet. (cherry picked from commit 1b90d2ffdb331dea82786c9c67d42da1879b98d8) --- bin/tests/system/resolver/tests.sh | 4 +- lib/dns/resolver.c | 253 +++++++++++++++++++---------- 2 files changed, 165 insertions(+), 92 deletions(-) diff --git a/bin/tests/system/resolver/tests.sh b/bin/tests/system/resolver/tests.sh index 83dedaee9a..46a184a0a7 100755 --- a/bin/tests/system/resolver/tests.sh +++ b/bin/tests/system/resolver/tests.sh @@ -238,9 +238,9 @@ for nscount in 1 2 3 4 5 6 7 8 9 10; do test "${sourcerecs}" -eq "${nscount}" || ret=1 test "${sourcerecs}" -eq "${nscount}" || echo_i "NS count incorrect for target${nscount}.sourcens" - # Expected queries = 2 * number of NS records, up to a maximum of 10. + # Expected queries = 2 * number of NS records allowed at the first level, up to a maximum of 6. expected=$((nscount * 2)) - if [ "$expected" -gt 10 ]; then expected=10; fi + if [ "$expected" -gt 6 ]; then expected=6; fi # Count the number of logged fetches nextpart ns5/named.run >/dev/null dig_with_opts @10.53.0.5 target${nscount}.sourcens A >dig.ns5.out.${nscount}.${n} || ret=1 diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index 4aa1554f11..ad48b24d02 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -228,13 +229,6 @@ #define DEFAULT_MAX_QUERIES 50 #endif /* ifndef DEFAULT_MAX_QUERIES */ -/* - * After NS_FAIL_LIMIT attempts to fetch a name server address, - * if the number of addresses in the NS RRset exceeds NS_RR_LIMIT, - * stop trying to fetch, in order to avoid wasting resources. - */ -#define NS_FAIL_LIMIT 4 -#define NS_RR_LIMIT 5 /* * IP address lookups are performed for at most NS_PROCESSING_LIMIT NS RRs in * any NS RRset encountered, to avoid excessive resource use while processing @@ -242,11 +236,6 @@ */ #define NS_PROCESSING_LIMIT 20 -STATIC_ASSERT(NS_PROCESSING_LIMIT > NS_RR_LIMIT, - "The maximum number of NS RRs processed for each " - "delegation (NS_PROCESSING_LIMIT) must be larger than the large " - "delegation threshold (NS_RR_LIMIT)."); - /* Hash table for zone counters */ #ifndef RES_DOMAIN_HASH_BITS #define RES_DOMAIN_HASH_BITS 12 @@ -425,9 +414,10 @@ struct fetchctx { dns_name_t *fwdname; /*% - * The number of events we're waiting for. + * Used to track started ADB finds with event. */ - atomic_uint_fast32_t pending; + size_t pending_running; + dns_adbfindlist_t pending_finds; /*% * The number of times we've "restarted" the current @@ -1741,6 +1731,19 @@ fctx__done(fetchctx_t *fctx, isc_result_t result, const char *func, fctx->qmin_warning = ISC_R_SUCCESS; + /* + * Cancel all pending ADB finds if we have not been successful + * or we are shutting down. + */ + if (result != ISC_R_SUCCESS) { + dns_adbfind_t *find = NULL; + for (find = ISC_LIST_HEAD(fctx->pending_finds); find != NULL; + find = ISC_LIST_NEXT(find, publink)) + { + dns_adb_cancelfind(find); + } + } + fctx_cancelqueries(fctx, no_response, age_untried); fctx_stoptimer(fctx); @@ -2890,13 +2893,38 @@ detach: resquery_detach(&query); } +static isc_result_t +fctx_finddone_fail(fetchctx_t *fctx) { + fctx->findfail++; + + /* + * There are still running ADB finds and these can be more successful. + */ + if (!ISC_LIST_EMPTY(fctx->pending_finds)) { + return DNS_R_WAIT; + } + + FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT); + + /* + * There's something on the alternate list. Try that. + */ + if (!ISC_LIST_EMPTY(fctx->res->alternates)) { + return DNS_R_CONTINUE; + } + + /* + * We've got nothing else to wait for and don't know the answer. + * There's nothing to do but fail the fctx. + */ + return ISC_R_FAILURE; +} + static void fctx_finddone(void *arg) { dns_adbfind_t *find = (dns_adbfind_t *)arg; fetchctx_t *fctx = (fetchctx_t *)find->cbarg; - bool want_try = false; - bool want_done = false; - uint_fast32_t pending; + isc_result_t result = ISC_R_SUCCESS; REQUIRE(VALID_FCTX(fctx)); @@ -2905,8 +2933,15 @@ fctx_finddone(void *arg) { REQUIRE(fctx->tid == isc_tid()); LOCK(&fctx->lock); - pending = atomic_fetch_sub_release(&fctx->pending, 1); - INSIST(pending > 0); + if (ISC_LINK_LINKED(find, publink)) { + /* + * If we canceled the find directly in findname(), + * it won't be linked here as dns_adb_cancelfind() + * is not idempotent. + */ + fctx->pending_running--; + ISC_LIST_UNLINK(fctx->pending_finds, find, publink); + } if (ADDRWAIT(fctx)) { /* @@ -2915,22 +2950,9 @@ fctx_finddone(void *arg) { INSIST(!SHUTTINGDOWN(fctx)); if (dns_adb_findstatus(find) == DNS_ADB_MOREADDRESSES) { FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT); - want_try = true; + result = DNS_R_CONTINUE; } else { - fctx->findfail++; - if (atomic_load_acquire(&fctx->pending) == 0) { - FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT); - if (!ISC_LIST_EMPTY(fctx->res->alternates)) { - want_try = true; - } else { - /* - * We've got nothing else to wait for - * and don't know the answer. There's - * nothing to do but fail the fctx. - */ - want_done = true; - } - } + result = fctx_finddone_fail(fctx); } } @@ -2938,13 +2960,18 @@ fctx_finddone(void *arg) { dns_adb_destroyfind(&find); - if (want_done) { - FCTXTRACE("fetch failed in finddone(); return " - "ISC_R_FAILURE"); - - fctx_done_unref(fctx, ISC_R_FAILURE); - } else if (want_try) { + switch (result) { + case ISC_R_SUCCESS: + case DNS_R_WAIT: + break; + case DNS_R_CONTINUE: fctx_try(fctx, true); + break; + default: + FCTXTRACE2("fetch failed in finddone()", + isc_result_totext(result)); + fctx_done_unref(fctx, result); + break; } fetchctx_detach(&fctx); @@ -3247,7 +3274,7 @@ waiting_for(dns_adbfind_t *find, dns_rdatatype_t type) { static void findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, unsigned int options, unsigned int flags, isc_stdtime_t now, - bool *overquota, bool *need_alternate, unsigned int *no_addresses) { + bool *overquota, bool *need_alternate, bool *have_address) { dns_adbaddrinfo_t *ai = NULL; dns_adbfind_t *find = NULL; dns_resolver_t *res = fctx->res; @@ -3331,6 +3358,7 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, } else { ISC_LIST_APPEND(fctx->finds, find, publink); } + SET_IF_NOT_NULL(have_address, true); return; } @@ -3349,7 +3377,6 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, "loop detected resolving '%s'", fctx->info); if ((find->options & DNS_ADBFIND_WANTEVENT) != 0) { - atomic_fetch_add_relaxed(&fctx->pending, 1); dns_adb_cancelfind(find); } else { dns_adb_destroyfind(&find); @@ -3363,7 +3390,8 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, * we'll get an event later when the find has what it needs. */ if ((find->options & DNS_ADBFIND_WANTEVENT) != 0) { - atomic_fetch_add_relaxed(&fctx->pending, 1); + fctx->pending_running++; + ISC_LIST_APPEND(fctx->pending_finds, find, publink); /* * Bootstrap. @@ -3376,9 +3404,6 @@ findname(fetchctx_t *fctx, const dns_name_t *name, in_port_t port, { *need_alternate = true; } - if (no_addresses != NULL) { - (*no_addresses)++; - } return; } @@ -3420,7 +3445,6 @@ isstrictsubdomain(const dns_name_t *name1, const dns_name_t *name2) { static isc_result_t fctx_getaddresses(fetchctx_t *fctx) { - dns_rdata_t rdata = DNS_RDATA_INIT; isc_result_t result; dns_resolver_t *res; isc_stdtime_t now; @@ -3431,8 +3455,9 @@ fctx_getaddresses(fetchctx_t *fctx) { dns_rdata_ns_t ns; bool need_alternate = false; bool all_spilled = false; - unsigned int no_addresses = 0; + bool have_address = false; unsigned int ns_processed = 0; + size_t fetches_allowed = 0; FCTXTRACE5("getaddresses", "fctx->depth=", fctx->depth); @@ -3604,47 +3629,91 @@ normal_nses: INSIST(ISC_LIST_EMPTY(fctx->finds)); INSIST(ISC_LIST_EMPTY(fctx->altfinds)); - for (result = dns_rdataset_first(&fctx->nameservers); - result == ISC_R_SUCCESS; - result = dns_rdataset_next(&fctx->nameservers)) - { - bool overquota = false; - unsigned int static_stub = 0; + switch (fctx->depth) { + case 0: + fetches_allowed = 3; + break; + case 1: + fetches_allowed = 2; + break; + default: + fetches_allowed = 1; + break; + } + + for (;;) { + for (result = dns_rdataset_first(&fctx->nameservers); + result == ISC_R_SUCCESS; + result = dns_rdataset_next(&fctx->nameservers)) + { + dns_rdata_t rdata = DNS_RDATA_INIT; + bool overquota = false; + unsigned int static_stub = 0; + unsigned int no_fetch = 0; + + dns_rdataset_current(&fctx->nameservers, &rdata); + /* + * Extract the name from the NS record. + */ + result = dns_rdata_tostruct(&rdata, &ns, NULL); + if (result != ISC_R_SUCCESS) { + continue; + } + + if (STATICSTUB(&fctx->nameservers) && + dns_name_equal(&ns.name, fctx->domain)) + { + static_stub = DNS_ADBFIND_STATICSTUB; + } + + /* + * Make sure we only launch a limited number of + * outgoing fetches. + */ + if (fctx->pending_running >= fetches_allowed) { + no_fetch = DNS_ADBFIND_NOFETCH; + } + + findname(fctx, &ns.name, 0, + stdoptions | static_stub | no_fetch, 0, now, + &overquota, &need_alternate, &have_address); + + if (!overquota) { + all_spilled = false; + } + + dns_rdata_reset(&rdata); + dns_rdata_freestruct(&ns); + + if (++ns_processed >= NS_PROCESSING_LIMIT) { + result = ISC_R_NOMORE; + break; + } + } - dns_rdataset_current(&fctx->nameservers, &rdata); /* - * Extract the name from the NS record. + * Don't start alternate fetch if we just started one above. */ - result = dns_rdata_tostruct(&rdata, &ns, NULL); - if (result != ISC_R_SUCCESS) { - continue; - } - - if (STATICSTUB(&fctx->nameservers) && - dns_name_equal(&ns.name, fctx->domain)) - { - static_stub = DNS_ADBFIND_STATICSTUB; - } - - if (no_addresses > NS_FAIL_LIMIT && - dns_rdataset_count(&fctx->nameservers) > NS_RR_LIMIT) - { + /* + * Don't start alternate fetch if we just started one above. + */ + if (fctx->pending_running > 0) { stdoptions |= DNS_ADBFIND_NOFETCH; - } - findname(fctx, &ns.name, 0, stdoptions | static_stub, 0, now, - &overquota, &need_alternate, &no_addresses); - - if (!overquota) { - all_spilled = false; - } - - dns_rdata_reset(&rdata); - dns_rdata_freestruct(&ns); - - if (++ns_processed >= NS_PROCESSING_LIMIT) { result = ISC_R_NOMORE; + } else if (have_address || fetches_allowed != 0) { + result = ISC_R_NOMORE; + } + + if (result != ISC_R_SUCCESS) { break; } + + /* + * We have no addresses and we haven't allowed any + * fetches to be started. Allow one extra fetch and try + * again. + */ + fetches_allowed = 1; } if (result != ISC_R_NOMORE) { return result; @@ -3704,7 +3773,7 @@ out: /* * We've got no addresses. */ - if (atomic_load_acquire(&fctx->pending) > 0) { + if (fctx->pending_running > 0) { /* * We're fetching the addresses, but don't have * any yet. Tell the caller to wait for an @@ -3981,11 +4050,6 @@ fctx_try(fetchctx_t *fctx, bool retrying) { dns_adbaddrinfo_t *addrinfo = NULL; dns_resolver_t *res = NULL; - FCTXTRACE5("try", "fctx->qc=", isc_counter_used(fctx->qc)); - if (fctx->gqc != NULL) { - FCTXTRACE5("try", "fctx->gqc=", isc_counter_used(fctx->gqc)); - } - REQUIRE(!ADDRWAIT(fctx)); REQUIRE(fctx->tid == isc_tid()); @@ -4120,6 +4184,10 @@ fctx_try(fetchctx_t *fctx, bool retrying) { } result = isc_counter_increment(fctx->qc); +#if WANT_QUERYTRACE + FCTXTRACE5("query", "max-recursion-queries, querycount=", + isc_counter_used(fctx->qc)); +#endif if (result != ISC_R_SUCCESS) { isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER, DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3), @@ -4131,6 +4199,10 @@ fctx_try(fetchctx_t *fctx, bool retrying) { if (fctx->gqc != NULL) { result = isc_counter_increment(fctx->gqc); +#if WANT_QUERYTRACE + FCTXTRACE5("query", "max-query-count, querycount=", + isc_counter_used(fctx->gqc)); +#endif if (result != ISC_R_SUCCESS) { isc_log_write(dns_lctx, DNS_LOGCATEGORY_RESOLVER, DNS_LOGMODULE_RESOLVER, ISC_LOG_DEBUG(3), @@ -4295,7 +4367,6 @@ resume_qmin(void *arg) { goto cleanup; } fcount_decr(fctx); - dns_name_copy(fname, fctx->domain); result = fcount_incr(fctx, true); @@ -4341,9 +4412,10 @@ fctx_destroy(fetchctx_t *fctx) { REQUIRE(ISC_LIST_EMPTY(fctx->queries)); REQUIRE(ISC_LIST_EMPTY(fctx->finds)); REQUIRE(ISC_LIST_EMPTY(fctx->altfinds)); - REQUIRE(atomic_load_acquire(&fctx->pending) == 0); + REQUIRE(ISC_LIST_EMPTY(fctx->pending_finds)); REQUIRE(ISC_LIST_EMPTY(fctx->validators)); REQUIRE(fctx->state != fetchstate_active); + REQUIRE(fctx->timer == NULL); FCTXTRACE("destroy"); @@ -4634,6 +4706,7 @@ fctx_create(dns_resolver_t *res, isc_loop_t *loop, const dns_name_t *name, ISC_LIST_INIT(fctx->bad); ISC_LIST_INIT(fctx->edns); ISC_LIST_INIT(fctx->validators); + ISC_LIST_INIT(fctx->pending_finds); atomic_init(&fctx->attributes, 0);