From 33d219bfe1bd50b29f49a02cdbc356f7a35a6915 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Fri, 20 Mar 2026 14:29:57 +0100 Subject: [PATCH] SKIP cache flush ordering on NTA expiry dns_view_flushnode() was called in the delete_expired() async callback, which runs after the query that detected the NTA expiry. This created a race: the query would proceed with stale cached data from the NTA period before the flush had a chance to run, resulting in transient SERVFAIL with EDE 22 (No Reachable Authority). Skip dns_view_flushnode() in the older branches as the solutions for older branches are too complicated and this was not a critical bug. (cherry picked from commit da8e1c956a9ae9134b2df511f0eba8efcceb77c7) --- bin/tests/system/nta/tests_nta.py | 7 ------- lib/dns/nta.c | 15 --------------- 2 files changed, 22 deletions(-) diff --git a/bin/tests/system/nta/tests_nta.py b/bin/tests/system/nta/tests_nta.py index 828c04822b..ece8db6729 100644 --- a/bin/tests/system/nta/tests_nta.py +++ b/bin/tests/system/nta/tests_nta.py @@ -147,13 +147,6 @@ def test_nta_behavior(servers): isctest.check.noerror(res) isctest.check.noadflag(res) - # Expiry should also trigger a cache flush, so even if a.secure.example A - # was cached when its NTA was active, cached data should not be returned. - m = isctest.query.create("a.secure.example", "A") - res = isctest.query.tcp(m, "10.53.0.4") - isctest.check.noerror(res) - isctest.check.adflag(res) - # bogus.example was set to expire in 20s, so at t=13 # it should still be NTA'd, but badds.example used the default # lifetime of 12s, so it should revert to SERVFAIL now. diff --git a/lib/dns/nta.c b/lib/dns/nta.c index b3b50b4ac8..ec5b2c80e2 100644 --- a/lib/dns/nta.c +++ b/lib/dns/nta.c @@ -375,7 +375,6 @@ delete_expired(void *arg) { isc_result_t result; dns_qp_t *qp = NULL; void *pval = NULL; - dns_view_t *view = NULL; REQUIRE(VALID_NTATABLE(ntatable)); @@ -391,16 +390,6 @@ delete_expired(void *arg) { DNS_LOGMODULE_NTA, ISC_LOG_INFO, "deleting expired NTA at %s", nb); - /* - * Delay the flushing to avoid lock-order-inversion, as - * dns_view_flushnode()->dns_adb_flushnames() locks 'adbname', - * and it can cause a problem e.g. in dns_ntatable_covered() in - * another thread called by the resolver (also involving 'fctx' - * lock), or in dns_ntatable_shutdown() (also involving 'view' - * lock). - */ - dns_view_weakattach(ntatable->view, &view); - dns_qp_deletename(qp, &nta->name, NULL, NULL); dns__nta_shutdown(nta); dns__nta_unref(nta); @@ -408,10 +397,6 @@ delete_expired(void *arg) { dns_qp_compact(qp, DNS_QPGC_MAYBE); dns_qpmulti_commit(ntatable->table, &qp); RWUNLOCK(&ntatable->rwlock, isc_rwlocktype_write); - if (view != NULL) { - dns_view_flushnode(view, &nta->name, true); - dns_view_weakdetach(&view); - } dns__nta_detach(&nta); dns_ntatable_detach(&ntatable); }