From 33d219bfe1bd50b29f49a02cdbc356f7a35a6915 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Fri, 20 Mar 2026 14:29:57 +0100 Subject: [PATCH 1/2] SKIP cache flush ordering on NTA expiry dns_view_flushnode() was called in the delete_expired() async callback, which runs after the query that detected the NTA expiry. This created a race: the query would proceed with stale cached data from the NTA period before the flush had a chance to run, resulting in transient SERVFAIL with EDE 22 (No Reachable Authority). Skip dns_view_flushnode() in the older branches as the solutions for older branches are too complicated and this was not a critical bug. (cherry picked from commit da8e1c956a9ae9134b2df511f0eba8efcceb77c7) --- bin/tests/system/nta/tests_nta.py | 7 ------- lib/dns/nta.c | 15 --------------- 2 files changed, 22 deletions(-) diff --git a/bin/tests/system/nta/tests_nta.py b/bin/tests/system/nta/tests_nta.py index 828c04822b..ece8db6729 100644 --- a/bin/tests/system/nta/tests_nta.py +++ b/bin/tests/system/nta/tests_nta.py @@ -147,13 +147,6 @@ def test_nta_behavior(servers): isctest.check.noerror(res) isctest.check.noadflag(res) - # Expiry should also trigger a cache flush, so even if a.secure.example A - # was cached when its NTA was active, cached data should not be returned. - m = isctest.query.create("a.secure.example", "A") - res = isctest.query.tcp(m, "10.53.0.4") - isctest.check.noerror(res) - isctest.check.adflag(res) - # bogus.example was set to expire in 20s, so at t=13 # it should still be NTA'd, but badds.example used the default # lifetime of 12s, so it should revert to SERVFAIL now. diff --git a/lib/dns/nta.c b/lib/dns/nta.c index b3b50b4ac8..ec5b2c80e2 100644 --- a/lib/dns/nta.c +++ b/lib/dns/nta.c @@ -375,7 +375,6 @@ delete_expired(void *arg) { isc_result_t result; dns_qp_t *qp = NULL; void *pval = NULL; - dns_view_t *view = NULL; REQUIRE(VALID_NTATABLE(ntatable)); @@ -391,16 +390,6 @@ delete_expired(void *arg) { DNS_LOGMODULE_NTA, ISC_LOG_INFO, "deleting expired NTA at %s", nb); - /* - * Delay the flushing to avoid lock-order-inversion, as - * dns_view_flushnode()->dns_adb_flushnames() locks 'adbname', - * and it can cause a problem e.g. in dns_ntatable_covered() in - * another thread called by the resolver (also involving 'fctx' - * lock), or in dns_ntatable_shutdown() (also involving 'view' - * lock). - */ - dns_view_weakattach(ntatable->view, &view); - dns_qp_deletename(qp, &nta->name, NULL, NULL); dns__nta_shutdown(nta); dns__nta_unref(nta); @@ -408,10 +397,6 @@ delete_expired(void *arg) { dns_qp_compact(qp, DNS_QPGC_MAYBE); dns_qpmulti_commit(ntatable->table, &qp); RWUNLOCK(&ntatable->rwlock, isc_rwlocktype_write); - if (view != NULL) { - dns_view_flushnode(view, &nta->name, true); - dns_view_weakdetach(&view); - } dns__nta_detach(&nta); dns_ntatable_detach(&ntatable); } From d3965a91b6c7c5d0f1ba86339e237a923eef5553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Fri, 20 Mar 2026 23:56:02 +0100 Subject: [PATCH 2/2] Replace existing NTA instead of reusing it in dns_ntatable_add() When an NTA already exists for a name, the old code retrieved and reused the existing NTA object, then reset its timer via settimer(). This is incorrect because isc_timer_start() and isc_timer_stop() require the timer to be manipulated from its owning loop (enforced by REQUIRE(timer->loop == isc_loop()) in lib/isc/timer.c), and the caller may be running on a different loop than the one that created the original NTA. Instead, delete the old NTA (shutting down its timer on the correct loop) and insert a fresh one that is owned by the current loop. --- lib/dns/nta.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/lib/dns/nta.c b/lib/dns/nta.c index ec5b2c80e2..83e9c38e01 100644 --- a/lib/dns/nta.c +++ b/lib/dns/nta.c @@ -300,8 +300,8 @@ dns_ntatable_add(dns_ntatable_t *ntatable, const dns_name_t *name, bool force, isc_stdtime_t now, uint32_t lifetime) { isc_result_t result = ISC_R_SUCCESS; dns__nta_t *nta = NULL; + dns__nta_t *old_nta = NULL; dns_qp_t *qp = NULL; - void *pval = NULL; REQUIRE(VALID_NTATABLE(ntatable)); @@ -317,17 +317,15 @@ dns_ntatable_add(dns_ntatable_t *ntatable, const dns_name_t *name, bool force, result = dns_qp_insert(qp, nta, 0); switch (result) { case ISC_R_EXISTS: - result = dns_qp_getname(qp, &nta->name, &pval, NULL); - if (result == ISC_R_SUCCESS) { - /* - * an NTA already existed: throw away the - * new one and update the old one. - */ - dns__nta_detach(&nta); /* for nta_create */ - nta = pval; - break; - } - /* update the NTA's timer as if it were new */ + result = dns_qp_deletename(qp, name, (void *)&old_nta, NULL); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + + dns__nta_shutdown(old_nta); + dns__nta_detach(&old_nta); + + result = dns_qp_insert(qp, nta, 0); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + FALLTHROUGH; case ISC_R_SUCCESS: nta->expiry = now + lifetime; @@ -381,9 +379,7 @@ delete_expired(void *arg) { RWLOCK(&ntatable->rwlock, isc_rwlocktype_write); dns_qpmulti_write(ntatable->table, &qp); result = dns_qp_getname(qp, &nta->name, &pval, NULL); - if (result == ISC_R_SUCCESS && - ((dns__nta_t *)pval)->expiry == nta->expiry && !nta->shuttingdown) - { + if (result == ISC_R_SUCCESS && pval == nta && !nta->shuttingdown) { char nb[DNS_NAME_FORMATSIZE]; dns_name_format(&nta->name, nb, sizeof(nb)); isc_log_write(dns_lctx, DNS_LOGCATEGORY_DNSSEC,