From 1aa9cd3484b47e855e0f3203624a45714863e7b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Thu, 11 Sep 2025 10:53:47 +0200 Subject: [PATCH 1/2] Split the fctx_done() into success and failure variants The split will allow us to call fctx__done() with fctx->lock acquired when it is called with ISC_R_SUCESS to prevent data races when finishing the fetch context. --- lib/dns/resolver.c | 65 ++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index 4005d2ce71..ef848ef384 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -677,16 +677,28 @@ static void findnoqname(fetchctx_t *fctx, dns_message_t *message, dns_name_t *name, dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset); -#define fctx_done_detach(fctxp, result) \ +#define fctx_failure_detach(fctxp, result) \ + REQUIRE(result != ISC_R_SUCCESS); \ if (fctx__done(*fctxp, result, __func__, __FILE__, __LINE__)) { \ fetchctx_detach(fctxp); \ } -#define fctx_done_unref(fctx, result) \ +#define fctx_failure_unref(fctx, result) \ + REQUIRE(result != ISC_R_SUCCESS); \ if (fctx__done(fctx, result, __func__, __FILE__, __LINE__)) { \ fetchctx_unref(fctx); \ } +#define fctx_success_detach(fctxp) \ + if (fctx__done(*fctxp, ISC_R_SUCCESS, __func__, __FILE__, __LINE__)) { \ + fetchctx_detach(fctxp); \ + } + +#define fctx_success_unref(fctx) \ + if (fctx__done(fctx, ISC_R_SUCCESS, __func__, __FILE__, __LINE__)) { \ + fetchctx_unref(fctx); \ + } + #if DNS_RESOLVER_TRACE #define fetchctx_ref(ptr) fetchctx__ref(ptr, __func__, __FILE__, __LINE__) #define fetchctx_unref(ptr) fetchctx__unref(ptr, __func__, __FILE__, __LINE__) @@ -1778,7 +1790,7 @@ resquery_senddone(isc_result_t eresult, isc_region_t *region, void *arg) { "due to unexpected result; responding", eresult); fctx_cancelquery(©, NULL, false, false); - fctx_done_detach(&fctx, eresult); + fctx_failure_detach(&fctx, eresult); break; } @@ -2782,7 +2794,7 @@ resquery_connected(isc_result_t eresult, isc_region_t *region, void *arg) { "responding"); fctx_cancelquery(©, NULL, false, false); - fctx_done_detach(&fctx, result); + fctx_failure_detach(&fctx, result); break; } @@ -2804,7 +2816,7 @@ resquery_connected(isc_result_t eresult, isc_region_t *region, void *arg) { case ISC_R_SHUTTINGDOWN: FCTXTRACE3("shutdown in resquery_connected()", eresult); fctx_cancelquery(©, NULL, true, false); - fctx_done_detach(&fctx, eresult); + fctx_failure_detach(&fctx, eresult); break; case ISC_R_HOSTDOWN: @@ -2836,7 +2848,7 @@ resquery_connected(isc_result_t eresult, isc_region_t *region, void *arg) { eresult); fctx_cancelquery(©, NULL, false, false); - fctx_done_detach(&fctx, eresult); + fctx_failure_detach(&fctx, eresult); break; } @@ -2896,7 +2908,7 @@ fctx_finddone(void *arg) { FCTXTRACE("fetch failed in finddone(); return " "ISC_R_FAILURE"); - fctx_done_unref(fctx, ISC_R_FAILURE); + fctx_failure_unref(fctx, ISC_R_FAILURE); } else if (want_try) { fctx_try(fctx, true); } @@ -4079,7 +4091,7 @@ fctx_try(fetchctx_t *fctx, bool retrying) { done: if (result != ISC_R_SUCCESS) { - fctx_done_detach(&fctx, result); + fctx_failure_detach(&fctx, result); } } @@ -4358,7 +4370,7 @@ cleanup: } if (result != ISC_R_SUCCESS) { /* An error occurred, tear down whole fctx */ - fctx_done_unref(fctx, fixup_result ? ISC_R_SUCCESS : result); + fctx_failure_unref(fctx, fixup_result ? ISC_R_SUCCESS : result); } fetchctx_detach(&fctx); } @@ -4436,7 +4448,7 @@ fctx_expired(void *arg) { dns_ede_add(&fctx->edectx, DNS_EDE_NOREACHABLEAUTH, NULL); - fctx_done_detach(&fctx, DNS_R_SERVFAIL); + fctx_failure_detach(&fctx, DNS_R_SERVFAIL); } static void @@ -4445,7 +4457,7 @@ fctx_shutdown(void *arg) { REQUIRE(VALID_FCTX(fctx)); - fctx_done_unref(fctx, ISC_R_SHUTTINGDOWN); + fctx_failure_unref(fctx, ISC_R_SHUTTINGDOWN); fetchctx_detach(&fctx); } @@ -5602,8 +5614,10 @@ cleanup_unlocked: dns_validator_send(nextval); } - if (done) { - fctx_done_unref(fctx, result); + if (done && result == ISC_R_SUCCESS) { + fctx_success_unref(fctx); + } else if (done) { + fctx_failure_unref(fctx, result); } /* @@ -6835,7 +6849,7 @@ cleanup: if (result != ISC_R_SUCCESS) { /* An error occurred, tear down whole fctx */ - fctx_done_unref(fctx, result); + fctx_failure_unref(fctx, result); } fetchctx_detach(&fctx); @@ -9002,7 +9016,7 @@ rctx_nextserver(respctx_t *rctx, dns_message_t *message, dcname = dns_fixedname_initname(&founddc); if (result != ISC_R_SUCCESS) { - fctx_done_detach(&rctx->fctx, DNS_R_SERVFAIL); + fctx_failure_detach(&rctx->fctx, DNS_R_SERVFAIL); return; } if (dns_rdatatype_atparent(fctx->type)) { @@ -9019,7 +9033,7 @@ rctx_nextserver(respctx_t *rctx, dns_message_t *message, findoptions, true, true, &fctx->nameservers, NULL); if (result != ISC_R_SUCCESS) { FCTXTRACE("couldn't find a zonecut"); - fctx_done_detach(&rctx->fctx, DNS_R_SERVFAIL); + fctx_failure_detach(&rctx->fctx, DNS_R_SERVFAIL); return; } if (!dns_name_issubdomain(fname, fctx->domain)) { @@ -9028,7 +9042,7 @@ rctx_nextserver(respctx_t *rctx, dns_message_t *message, * QDOMAIN. */ FCTXTRACE("nameservers now above QDOMAIN"); - fctx_done_detach(&rctx->fctx, DNS_R_SERVFAIL); + fctx_failure_detach(&rctx->fctx, DNS_R_SERVFAIL); return; } @@ -9039,7 +9053,7 @@ rctx_nextserver(respctx_t *rctx, dns_message_t *message, result = fcount_incr(fctx, true); if (result != ISC_R_SUCCESS) { - fctx_done_detach(&rctx->fctx, DNS_R_SERVFAIL); + fctx_failure_detach(&rctx->fctx, DNS_R_SERVFAIL); return; } fctx->ns_ttl = fctx->nameservers.ttl; @@ -9072,7 +9086,7 @@ rctx_resend(respctx_t *rctx, dns_adbaddrinfo_t *addrinfo) { inc_stats(fctx->res, dns_resstatscounter_retry); result = fctx_query(fctx, addrinfo, rctx->retryopts); if (result != ISC_R_SUCCESS) { - fctx_done_detach(&rctx->fctx, result); + fctx_failure_detach(&rctx->fctx, result); } } @@ -9126,7 +9140,7 @@ rctx_chaseds(respctx_t *rctx, dns_message_t *message, if (result == DNS_R_DUPLICATE) { result = DNS_R_SERVFAIL; } - fctx_done_detach(&rctx->fctx, result); + fctx_failure_detach(&rctx->fctx, result); fetchctx_detach(&fctx); return; } @@ -9164,7 +9178,7 @@ rctx_done(respctx_t *rctx, isc_result_t result) { { fctx_cancelquery(&query, rctx->finish, rctx->no_response, false); - fctx_done_detach(&rctx->fctx, DNS_R_SERVFAIL); + fctx_failure_detach(&rctx->fctx, DNS_R_SERVFAIL); goto detach; } #endif /* ifdef ENABLE_AFL */ @@ -9205,11 +9219,12 @@ rctx_done(respctx_t *rctx, isc_result_t result) { */ FCTXTRACE("wait for validator"); fctx_cancelqueries(fctx, true, false); + } else if (result == ISC_R_SUCCESS) { + /* We are done. */ + fctx_success_detach(&rctx->fctx); } else { - /* - * We're done. - */ - fctx_done_detach(&rctx->fctx, result); + /* Done with an error. */ + fctx_failure_detach(&rctx->fctx, result); } detach: From 09762bdc44657eba11c51c4550f0bb1144740a88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Thu, 11 Sep 2025 10:53:47 +0200 Subject: [PATCH 2/2] Fix datarace between unlocking fctx lock and shuttingdown fctx There was a data race where new fetch response could be added to the fetch context after we unlock the fetch context and before we shut it down. This could cause assertion failure when fctx__done() was called with ISC_R_SUCCESS because there was originally no fetch response, but new fetch response without associated dataset was added before we had a chance to shutdown the fetch context. This manifested in the validated() callback, where cache_rrset() now returns ISC_R_SUCCESS instead of DNS_R_UNCHANGED when cache was not changed. However the data race was wrong on a general level. When the fctx__done() is called with ISC_R_SUCCESS as result is expects the fctx->lock to be already acquired to prevent these data races. --- lib/dns/resolver.c | 51 +++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index ef848ef384..fe0c36fd08 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -1664,7 +1664,9 @@ fctx__done(fetchctx_t *fctx, isc_result_t result, const char *func, UNUSED(func); #endif - LOCK(&fctx->lock); + if (result != ISC_R_SUCCESS) { + LOCK(&fctx->lock); + } /* We need to do this under the lock for intra-thread synchronization */ if (fctx->state == fetchstate_done) { UNLOCK(&fctx->lock); @@ -2899,11 +2901,8 @@ fctx_finddone(void *arg) { } } } - UNLOCK(&fctx->lock); - dns_adb_destroyfind(&find); - if (want_done) { FCTXTRACE("fetch failed in finddone(); return " "ISC_R_FAILURE"); @@ -2913,6 +2912,8 @@ fctx_finddone(void *arg) { fctx_try(fctx, true); } + dns_adb_destroyfind(&find); + fetchctx_detach(&fctx); } @@ -4108,7 +4109,6 @@ resume_qmin(void *arg) { dns_rdataset_t sigrdataset; dns_db_t *db = NULL; dns_dbnode_t *node = NULL; - bool fixup_result = false; REQUIRE(VALID_FCTX(fctx)); @@ -4265,14 +4265,16 @@ resume_qmin(void *arg) { dns_db_attachnode(node, &resp->node); } dns_name_copy(fname, resp->foundname); + clone_results(fctx); if (result == DNS_R_CNAME && dns_rdataset_isassociated(&rdataset) && fctx->type == dns_rdatatype_cname) { - fixup_result = true; + fctx_success_unref(fctx); + result = ISC_R_SUCCESS; + } else { + UNLOCK(&fctx->lock); } - clone_results(fctx); - UNLOCK(&fctx->lock); goto cleanup; } UNLOCK(&fctx->lock); @@ -4370,7 +4372,7 @@ cleanup: } if (result != ISC_R_SUCCESS) { /* An error occurred, tear down whole fctx */ - fctx_failure_unref(fctx, fixup_result ? ISC_R_SUCCESS : result); + fctx_failure_unref(fctx, result); } fetchctx_detach(&fctx); } @@ -5603,9 +5605,19 @@ answer_response: done = true; cleanup: - UNLOCK(&fctx->lock); -cleanup_unlocked: + if (!done || result != ISC_R_SUCCESS) { + UNLOCK(&fctx->lock); + } + if (done) { + if (result == ISC_R_SUCCESS) { + fctx_success_unref(fctx); + } else { + fctx_failure_unref(fctx, result); + } + } + +cleanup_unlocked: if (node != NULL) { dns_db_detachnode(&node); } @@ -5614,12 +5626,6 @@ cleanup_unlocked: dns_validator_send(nextval); } - if (done && result == ISC_R_SUCCESS) { - fctx_success_unref(fctx); - } else if (done) { - fctx_failure_unref(fctx, result); - } - /* * val->name points to name on a message on one of the * queries on the fetch context so the name has to be @@ -5628,7 +5634,6 @@ cleanup_unlocked: dns_validator_shutdown(val); dns_validator_detach(&val); fetchctx_detach(&fctx); - INSIST(node == NULL); } static void @@ -9204,15 +9209,18 @@ rctx_done(respctx_t *rctx, isc_result_t result) { rctx->next_server = false; rctx->resend = false; } - UNLOCK(&fctx->lock); if (rctx->next_server) { + UNLOCK(&fctx->lock); rctx_nextserver(rctx, message, addrinfo, result); } else if (rctx->resend) { + UNLOCK(&fctx->lock); rctx_resend(rctx, addrinfo); } else if (result == DNS_R_CHASEDSSERVERS) { + UNLOCK(&fctx->lock); rctx_chaseds(rctx, message, addrinfo, result); } else if (result == ISC_R_SUCCESS && !HAVE_ANSWER(fctx)) { + UNLOCK(&fctx->lock); /* * All has gone well so far, but we are waiting for the DNSSEC * validator to validate the answer. @@ -9220,10 +9228,11 @@ rctx_done(respctx_t *rctx, isc_result_t result) { FCTXTRACE("wait for validator"); fctx_cancelqueries(fctx, true, false); } else if (result == ISC_R_SUCCESS) { - /* We are done. */ + /* Ended successfully. */ fctx_success_detach(&rctx->fctx); } else { - /* Done with an error. */ + UNLOCK(&fctx->lock); + /* Ended with failure. */ fctx_failure_detach(&rctx->fctx, result); }