From d7dbfbb011fe60f3ee63dfeb7df88e80b03f477e Mon Sep 17 00:00:00 2001 From: Colin Vidal Date: Fri, 26 Sep 2025 11:12:53 +0200 Subject: [PATCH 1/2] apply_configuration: leave exclusive mode after viewlist cleanup When a re-configuration fails, `apply_configuration` flows jump to a cleanup label and, at some point, leave the exclusive mode and cleanup the viewlist. It looks fine as the viewlist is at this point only locally known (if this is a configuration failure, this is the new view list, if this is a success, this is the old list which has been swapped out from the production list during the exclusive mode). However, the view and zone initialization code enqueues job callbacks, for instance from `dns_zone_setsigninginterval` (but there are others cases) which will be called for the new views and zones after the exclusive mode is over. Depending where the configuration fails, those views and zones can be half-configured, for instance a view might have an unfrozen resolver. Hence, leaving the exclusive mode before cleaning up those views ans zones will immediately called the previously enqueued callbacks and lead to this reconfiguration-failure crash stack: ``` isc_assertion_failed dns_resolver_createfetch do_keyfetch isc__async_cb ... uv_run loop_thread thread_body thread_run start_thread ... ``` To avoid the problem, the views are now cleaned up before leaving the exclusive mode (which also clean up the zones and enqueued callbacks). As context, the bug was introduced by !10910 which moved the creation (not configuration) of the view outsides of the exclusive mode. This is a safe move (as at this point, the newly view are only known locally by `apply_configuration`) but the re-order was wrong regarding the point where the exclusive mode was ended (before the change, the exclusive mode as always ended before the new view are detached). --- bin/named/server.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/bin/named/server.c b/bin/named/server.c index f3dffbb625..c34fd28a8b 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -8142,7 +8142,7 @@ apply_configuration(cfg_parser_t *configparser, cfg_obj_t *config, uint32_t max; uint64_t initial, idle, keepalive, advertised, primaries; bool loadbalancesockets; - bool exclusive = true; + bool exclusive = false; dns_aclenv_t *env = ns_interfacemgr_getaclenv(named_g_server->interfacemgr); cfg_aclconfctx_t *tmpaclctx, *aclctx = NULL; @@ -8199,6 +8199,7 @@ apply_configuration(cfg_parser_t *configparser, cfg_obj_t *config, isc_tlsctx_cache_create(isc_g_mctx, &tlsctx_client_cache); /* Ensure exclusive access to configuration data. */ + exclusive = true; isc_loopmgr_pause(); /* @@ -9300,10 +9301,6 @@ cleanup_bindkeys_parser: cfg_parser_destroy(&bindkeys_parser); } - if (exclusive) { - isc_loopmgr_resume(); - } - /* * Detach the TLS client context (whether the one created at the * begining of this function, or the previous running one) @@ -9311,6 +9308,9 @@ cleanup_bindkeys_parser: isc_tlsctx_cache_detach(&tlsctx_client_cache); cleanup_viewlist: + isc_log_write(NAMED_LOGCATEGORY_GENERAL, NAMED_LOGMODULE_SERVER, + ISC_LOG_DEBUG(3), "apply_configuration: detaching views"); + ISC_LIST_FOREACH(viewlist, view, link) { ISC_LIST_UNLINK(viewlist, view, link); if (result == ISC_R_SUCCESS && strcmp(view->name, "_bind") != 0) @@ -9321,6 +9321,10 @@ cleanup_viewlist: dns_view_detach(&view); } + if (exclusive) { + isc_loopmgr_resume(); + } + cleanup_kasplist: ISC_LIST_FOREACH(kasplist, kasp, link) { ISC_LIST_UNLINK(kasplist, kasp, link); From 47dd27d87bc217ca1a62cedfa8b245e6db7c0000 Mon Sep 17 00:00:00 2001 From: Colin Vidal Date: Fri, 26 Sep 2025 14:54:42 +0200 Subject: [PATCH 2/2] test views are detached before leaving exclusive mode Adds a log-based test ensuring that when a reconfiguration fails inside the view configuration, the newly created view are always detached before the exclusive mode is ended. --- .../system/configloading/ns1/named.conf.j2 | 18 +++++++---- .../configloading/tests_configloading.py | 30 +++++++++++++++++++ 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/bin/tests/system/configloading/ns1/named.conf.j2 b/bin/tests/system/configloading/ns1/named.conf.j2 index 6c85c1df5e..ca1c1630a1 100644 --- a/bin/tests/system/configloading/ns1/named.conf.j2 +++ b/bin/tests/system/configloading/ns1/named.conf.j2 @@ -11,7 +11,7 @@ * information regarding copyright ownership. */ -// NS2 +{% set wrongoption = wrongoption | default(False) %} options { port @PORT@; @@ -30,9 +30,15 @@ controls { inet 10.53.0.1 port @CONTROLPORT@ allow { any; } keys { rndc_key; }; }; - -zone "example." { - type primary; - file "example.db"; +view "default" { + zone "example." { + type primary; + file "example.db"; + }; + /* + * failure to reconfig must occurs during configuration of the views + */ + {% if wrongoption %} + forwarders port 9999999 { 127.0.0.1; }; + {% endif %} }; - diff --git a/bin/tests/system/configloading/tests_configloading.py b/bin/tests/system/configloading/tests_configloading.py index 92a1907741..1b74e31b33 100644 --- a/bin/tests/system/configloading/tests_configloading.py +++ b/bin/tests/system/configloading/tests_configloading.py @@ -9,6 +9,11 @@ # See the COPYRIGHT file distributed with this work for additional # information regarding copyright ownership. +import re + + +import isctest + def test_configloading_log(ns1): """ @@ -39,3 +44,28 @@ def test_configloading_log(ns1): with ns1.watch_log_from_here() as watcher: ns1.rndc("reload") watcher.wait_for_sequence(log_sequence) + + +def test_reload_fails_log(ns1, templates): + """ + This test ensures that when a reconfig fails during view configuration (or + after), views/zones (which are newly created view/zones which won't be used + and local of apply_configuration) are detached (and freed) before the + exclusive mode is released + """ + + log_sequence = [ + "apply_configuration", + "loop exclusive mode: starting", + "apply_configuration: configure_views", + re.compile(r".*port '9999999' out of range"), + "apply_configuration: detaching views", + "loop exclusive mode: ending", + ] + + with ns1.watch_log_from_here() as watcher: + templates.render("ns1/named.conf", {"wrongoption": True}) + try: + ns1.rndc("reload") + except isctest.rndc.RNDCException: + watcher.wait_for_sequence(log_sequence)