From be6cc53ec27e4549bf166d2161bacce688151e79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Tue, 23 Jun 2020 13:02:21 +0200 Subject: [PATCH 1/3] Don't continue opening a new rndc connection if we are shutting down Due to lack of synchronization, whenever named was being requested to stop using rndc, controlconf.c module could be trying to access an already released pointer through named_g_server->interfacemgr in a separate thread. The race could only be triggered if named was being shutdown and more rndc connections were ocurring at the same time. This fix correctly checks if the server is shutting down before opening a new rndc connection. --- bin/named/controlconf.c | 66 +++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/bin/named/controlconf.c b/bin/named/controlconf.c index a3fda486d3..bab50ae8e0 100644 --- a/bin/named/controlconf.c +++ b/bin/named/controlconf.c @@ -226,6 +226,7 @@ shutdown_listener(controllistener_t *listener) { if (listener->listening) { isc_socket_cancel(listener->sock, listener->task, ISC_SOCKCANCEL_ACCEPT); + return; } maybe_free_listener(listener); @@ -636,6 +637,8 @@ control_newconn(isc_task_t *task, isc_event_t *event) { UNUSED(task); + REQUIRE(listener->listening); + listener->listening = false; if (nevent->result != ISC_R_SUCCESS) { @@ -647,6 +650,14 @@ control_newconn(isc_task_t *task, isc_event_t *event) { } sock = nevent->newsocket; + + /* Is the server shutting down? */ + if (listener->controls->shuttingdown) { + isc_socket_detach(&sock); + shutdown_listener(listener); + goto cleanup; + } + isc_socket_setname(sock, "control", NULL); (void)isc_socket_getpeername(sock, &peeraddr); if (listener->type == isc_sockettype_tcp && @@ -1121,36 +1132,33 @@ add_listener(named_controls_t *cp, controllistener_t **listenerp, listener = isc_mem_get(mctx, sizeof(*listener)); - if (result == ISC_R_SUCCESS) { - listener->mctx = NULL; - isc_mem_attach(mctx, &listener->mctx); - listener->controls = cp; - listener->task = cp->server->task; - listener->address = *addr; - listener->sock = NULL; - listener->listening = false; - listener->exiting = false; - listener->acl = NULL; - listener->type = type; - listener->perm = 0; - listener->owner = 0; - listener->group = 0; - listener->readonly = false; - ISC_LINK_INIT(listener, link); - ISC_LIST_INIT(listener->keys); - ISC_LIST_INIT(listener->connections); + listener->mctx = NULL; + isc_mem_attach(mctx, &listener->mctx); + listener->controls = cp; + listener->task = cp->server->task; + listener->address = *addr; + listener->sock = NULL; + listener->listening = false; + listener->exiting = false; + listener->acl = NULL; + listener->type = type; + listener->perm = 0; + listener->owner = 0; + listener->group = 0; + listener->readonly = false; + ISC_LINK_INIT(listener, link); + ISC_LIST_INIT(listener->keys); + ISC_LIST_INIT(listener->connections); - /* - * Make the acl. - */ - if (control != NULL && type == isc_sockettype_tcp) { - allow = cfg_tuple_get(control, "allow"); - result = cfg_acl_fromconfig(allow, config, named_g_lctx, - aclconfctx, mctx, 0, - &new_acl); - } else { - result = dns_acl_any(mctx, &new_acl); - } + /* + * Make the acl. + */ + if (control != NULL && type == isc_sockettype_tcp) { + allow = cfg_tuple_get(control, "allow"); + result = cfg_acl_fromconfig(allow, config, named_g_lctx, + aclconfctx, mctx, 0, &new_acl); + } else { + result = dns_acl_any(mctx, &new_acl); } if ((result == ISC_R_SUCCESS) && (control != NULL)) { From 042e509753062e8e2d829304f9f57f4e67011cb8 Mon Sep 17 00:00:00 2001 From: Diego Fronza Date: Mon, 25 May 2020 15:03:32 -0300 Subject: [PATCH 2/3] Added test for the fix This test ensures that named will correctly shutdown when receiving multiple control connections after processing of either "rncd stop" or "kill -SIGTERM" commands. Before the fix, named was crashing due to a race condition happening between two threads, one running shutdown logic in named/server.c and other handling control logic in controlconf.c. This test tries to reproduce the above scenario by issuing multiple queries to a target named instance, issuing either rndc stop or kill -SIGTERM command to the same named instance, then starting multiple rndc status connections to ensure it is not crashing anymore. --- bin/tests/system/Makefile.am | 5 +- bin/tests/system/conf.sh.common | 1 + bin/tests/system/shutdown/clean.sh | 18 ++ bin/tests/system/shutdown/conftest.py | 58 ++++++ bin/tests/system/shutdown/ns1/named.conf.in | 29 +++ bin/tests/system/shutdown/ns1/root.db | 23 +++ bin/tests/system/shutdown/ns2/named.conf.in | 27 +++ bin/tests/system/shutdown/ns2/test.db | 7 + .../system/shutdown/resolver/named.conf.in | 26 +++ bin/tests/system/shutdown/resolver/root.db | 19 ++ bin/tests/system/shutdown/setup.sh | 21 ++ bin/tests/system/shutdown/tests-shutdown.py | 193 ++++++++++++++++++ util/copyrights | 4 + 13 files changed, 429 insertions(+), 2 deletions(-) create mode 100644 bin/tests/system/shutdown/clean.sh create mode 100644 bin/tests/system/shutdown/conftest.py create mode 100644 bin/tests/system/shutdown/ns1/named.conf.in create mode 100644 bin/tests/system/shutdown/ns1/root.db create mode 100644 bin/tests/system/shutdown/ns2/named.conf.in create mode 100644 bin/tests/system/shutdown/ns2/test.db create mode 100644 bin/tests/system/shutdown/resolver/named.conf.in create mode 100644 bin/tests/system/shutdown/resolver/root.db create mode 100644 bin/tests/system/shutdown/setup.sh create mode 100755 bin/tests/system/shutdown/tests-shutdown.py diff --git a/bin/tests/system/Makefile.am b/bin/tests/system/Makefile.am index 3abe7a4738..10e38dc279 100644 --- a/bin/tests/system/Makefile.am +++ b/bin/tests/system/Makefile.am @@ -76,7 +76,7 @@ TESTS += \ rpzrecurse endif HAVE_PERLMOD_NET_DNS -TESTS += \ +TESTS += \ acl \ additional \ addzone \ @@ -107,7 +107,7 @@ TESTS += \ geoip2 \ glue \ idna \ - include-multiplecfg \ + include-multiplecfg \ inline \ integrity \ kasp \ @@ -134,6 +134,7 @@ TESTS += \ rrsetorder \ rsabigexponent \ runtime \ + shutdown \ sfcache \ smartsign \ sortlist \ diff --git a/bin/tests/system/conf.sh.common b/bin/tests/system/conf.sh.common index b2fafd453c..91c277e452 100644 --- a/bin/tests/system/conf.sh.common +++ b/bin/tests/system/conf.sh.common @@ -114,6 +114,7 @@ rrsetorder rsabigexponent runtime sfcache +shutdown smartsign sortlist spf diff --git a/bin/tests/system/shutdown/clean.sh b/bin/tests/system/shutdown/clean.sh new file mode 100644 index 0000000000..f977c772e8 --- /dev/null +++ b/bin/tests/system/shutdown/clean.sh @@ -0,0 +1,18 @@ +# Copyright (C) Internet Systems Consortium, Inc. ("ISC") +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# See the COPYRIGHT file distributed with this work for additional +# information regarding copyright ownership. + +rm -f ns*/*.jnl +rm -f ns*/named.conf +rm -f ns*/named.lock +rm -f ns*/named.memstats +rm -f ns*/named.run +rm -f ns*/rpz*.txt +rm -f resolver/named.conf +rm -rf __pycache__ +rm -f *.status diff --git a/bin/tests/system/shutdown/conftest.py b/bin/tests/system/shutdown/conftest.py new file mode 100644 index 0000000000..166d95f4ee --- /dev/null +++ b/bin/tests/system/shutdown/conftest.py @@ -0,0 +1,58 @@ +############################################################################ +# Copyright (C) Internet Systems Consortium, Inc. ("ISC") +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# See the COPYRIGHT file distributed with this work for additional +# information regarding copyright ownership. +############################################################################ + +import os +import pytest + + +def pytest_configure(config): + config.addinivalue_line( + "markers", "dnspython: mark tests that need dnspython to function" + ) + + +def pytest_collection_modifyitems(config, items): + # pylint: disable=unused-argument,unused-import,too-many-branches + # pylint: disable=import-outside-toplevel + + # Test for dnspython module + skip_dnspython = pytest.mark.skip( + reason="need dnspython module to run") + try: + import dns.query # noqa: F401 + except ModuleNotFoundError: + for item in items: + if "dnspython" in item.keywords: + item.add_marker(skip_dnspython) + + +@pytest.fixture +def named_port(request): + # pylint: disable=unused-argument + port = os.getenv("PORT") + if port is None: + port = 5301 + else: + port = int(port) + + return port + + +@pytest.fixture +def control_port(request): + # pylint: disable=unused-argument + port = os.getenv("CONTROLPORT") + if port is None: + port = 5301 + else: + port = int(port) + + return port diff --git a/bin/tests/system/shutdown/ns1/named.conf.in b/bin/tests/system/shutdown/ns1/named.conf.in new file mode 100644 index 0000000000..b0fbe616ce --- /dev/null +++ b/bin/tests/system/shutdown/ns1/named.conf.in @@ -0,0 +1,29 @@ +key rndc_key { + secret "1234abcd8765"; + algorithm hmac-sha256; +}; + +controls { + inet 10.53.0.1 port @CONTROLPORT@ allow { any; } keys { rndc_key; }; +}; + +options { + query-source address 10.53.0.1; + notify-source 10.53.0.1; + transfer-source 10.53.0.1; + port @PORT@; + listen-on { 10.53.0.1; }; + pid-file "named.pid"; + notify no; + dnssec-validation no; + allow-query { any; }; + recursion yes; + allow-recursion { any; }; +}; + +# Delegate .test domain to 10.53.0.2 +zone "." { + type master; + file "root.db"; + allow-transfer { none; }; +}; diff --git a/bin/tests/system/shutdown/ns1/root.db b/bin/tests/system/shutdown/ns1/root.db new file mode 100644 index 0000000000..2533685351 --- /dev/null +++ b/bin/tests/system/shutdown/ns1/root.db @@ -0,0 +1,23 @@ +; Copyright (C) Internet Systems Consortium, Inc. ("ISC") +; +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this +; file, You can obtain one at http://mozilla.org/MPL/2.0/. +; +; See the COPYRIGHT file distributed with this work for additional +; information regarding copyright ownership. + +$TTL 300 +@ IN SOA a.root. root.test. ( + 2000042100 ; serial + 600 ; refresh + 600 ; retry + 1200 ; expire + 600 ; minimum + ) + +. IN NS a.root. +a.root. IN A 10.53.0.1 + +test IN NS ns1.test +ns1.test IN A 10.53.0.2 diff --git a/bin/tests/system/shutdown/ns2/named.conf.in b/bin/tests/system/shutdown/ns2/named.conf.in new file mode 100644 index 0000000000..13aeb30a41 --- /dev/null +++ b/bin/tests/system/shutdown/ns2/named.conf.in @@ -0,0 +1,27 @@ +key rndc_key { + secret "1234abcd8765"; + algorithm hmac-sha256; +}; + +controls { + inet 10.53.0.1 port @CONTROLPORT@ allow { any; } keys { rndc_key; }; +}; + +options { + query-source address 10.53.0.2; + notify-source 10.53.0.2; + transfer-source 10.53.0.2; + port @PORT@; + listen-on { 10.53.0.2; }; + pid-file "named.pid"; + notify no; + dnssec-validation no; + allow-query { any; }; +}; + +# 10.53.0.2 is authoritative for .test domain +zone "test" { + type master; + file "test.db"; + allow-transfer { none; }; +}; diff --git a/bin/tests/system/shutdown/ns2/test.db b/bin/tests/system/shutdown/ns2/test.db new file mode 100644 index 0000000000..f9c3b9f1f8 --- /dev/null +++ b/bin/tests/system/shutdown/ns2/test.db @@ -0,0 +1,7 @@ +$TTL 300 + +@ IN SOA ns1 root.test. 2020040101 4h 1h 1w 60 +@ IN NS ns1 +ns1 IN A 10.53.0.2 +@ IN A 10.53.0.2 +www IN A 10.53.0.2 diff --git a/bin/tests/system/shutdown/resolver/named.conf.in b/bin/tests/system/shutdown/resolver/named.conf.in new file mode 100644 index 0000000000..8ad14f408b --- /dev/null +++ b/bin/tests/system/shutdown/resolver/named.conf.in @@ -0,0 +1,26 @@ +key rndc_key { + secret "1234abcd8765"; + algorithm hmac-sha256; +}; + +controls { + inet 10.53.0.3 port @CONTROLPORT@ allow { any; } keys { rndc_key; }; +}; + +options { + query-source address 10.53.0.3; + notify-source 10.53.0.3; + transfer-source 10.53.0.3; + port @PORT@; + listen-on { 10.53.0.3; }; + pid-file "named.pid"; + notify no; + dnssec-validation no; + allow-query { any; }; + allow-recursion { any; }; +}; + +zone "." { + type hint; + file "root.db"; +}; diff --git a/bin/tests/system/shutdown/resolver/root.db b/bin/tests/system/shutdown/resolver/root.db new file mode 100644 index 0000000000..c50a9079ee --- /dev/null +++ b/bin/tests/system/shutdown/resolver/root.db @@ -0,0 +1,19 @@ +; Copyright (C) Internet Systems Consortium, Inc. ("ISC") +; +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this +; file, You can obtain one at http://mozilla.org/MPL/2.0/. +; +; See the COPYRIGHT file distributed with this work for additional +; information regarding copyright ownership. + +$TTL 300 +. IN SOA a.root. root.root. ( + 2000042100 ; serial + 600 ; refresh + 600 ; retry + 1200 ; expire + 600 ; minimum + ) +. IN NS a.root. +a.root. IN A 10.53.0.1 diff --git a/bin/tests/system/shutdown/setup.sh b/bin/tests/system/shutdown/setup.sh new file mode 100644 index 0000000000..a5a30d57df --- /dev/null +++ b/bin/tests/system/shutdown/setup.sh @@ -0,0 +1,21 @@ +#! /bin/sh +# +# Copyright (C) Internet Systems Consortium, Inc. ("ISC") +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# See the COPYRIGHT file distributed with this work for additional +# information regarding copyright ownership. + +# touch dnsrps-off to not test with DNSRPS + +set -e + +SYSTEMTESTTOP=.. +. $SYSTEMTESTTOP/conf.sh + +copy_setports ns1/named.conf.in ns1/named.conf +copy_setports ns2/named.conf.in ns2/named.conf +copy_setports resolver/named.conf.in resolver/named.conf diff --git a/bin/tests/system/shutdown/tests-shutdown.py b/bin/tests/system/shutdown/tests-shutdown.py new file mode 100755 index 0000000000..69eefd11ce --- /dev/null +++ b/bin/tests/system/shutdown/tests-shutdown.py @@ -0,0 +1,193 @@ +#!/usr/bin/python3 +############################################################################ +# Copyright (C) Internet Systems Consortium, Inc. ("ISC") +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# See the COPYRIGHT file distributed with this work for additional +# information regarding copyright ownership. +############################################################################ + +from concurrent.futures import ThreadPoolExecutor, as_completed +import os +import random +import subprocess +from string import ascii_lowercase as letters +import time + +import dns.resolver +import pytest + + +def do_work(named_proc, resolver, rndc_cmd, kill_method, n_workers, n_queries): + """Creates a number of A queries to run in parallel + in order simulate a slightly more realistic test scenario. + + The main idea of this function is to create and send a bunch + of A queries to a target named instance and during this process + a request for shutting down named will be issued. + + In the process of shutting down named, a couple control connections + are created (by launching rndc) to ensure that the crash was fixed. + + if kill_method=="rndc" named will be asked to shutdown by + means of rndc stop. + if kill_method=="sigterm" named will be killed by SIGTERM on + POSIX systems or by TerminateProcess() on Windows systems. + + :param named_proc: named process instance + :type named_proc: subprocess.Popen + + :param resolver: target resolver + :type resolver: dns.resolver.Resolver + + :param rndc_cmd: rndc command with default arguments + :type rndc_cmd: list of strings, e.g. ["rndc", "-p", "23750"] + + :kill_method: "rndc" or "sigterm" + :type kill_method: str + + :param n_workers: Number of worker threads to create + :type n_workers: int + + :param n_queries: Total number of queries to send + :type n_queries: int + """ +# pylint: disable-msg=too-many-arguments +# pylint: disable-msg=too-many-locals + + # helper function, args must be a list or tuple with arguments to rndc. + def launch_rndc(args): + return subprocess.call(rndc_cmd + args, timeout=10) + + # We're going to execute queries in parallel by means of a thread pool. + # dnspython functions block, so we need to circunvent that. + executor = ThreadPoolExecutor(n_workers + 1) + + # Helper dict, where keys=Future objects and values are tags used + # to process results later. + futures = {} + + # 50% of work will be A queries. + # 1 work will be rndc stop. + # Remaining work will be rndc status (so we test parallel control + # connections that were crashing named). + shutdown = True + for i in range(n_queries): + if i < (n_queries // 2): + # Half work will be standard A queries. + # Among those we split 50% queries relname='www', + # 50% queries relname=random characters + if random.randrange(2) == 1: + tag = "good" + relname = "www" + else: + tag = "bad" + length = random.randint(4, 10) + relname = "".join(letters[ + random.randrange(len(letters))] for i in range(length)) + + qname = relname + ".test" + futures[executor.submit(resolver.query, qname, 'A')] = tag + elif shutdown: # We attempt to stop named in the middle + shutdown = False + if kill_method == "rndc": + futures[executor.submit(launch_rndc, ['stop'])] = 'stop' + else: + futures[executor.submit(named_proc.terminate)] = 'kill' + + else: + # We attempt to send couple rndc commands while named is + # being shutdown + futures[executor.submit(launch_rndc, ['status'])] = 'status' + + ret_code = -1 + for future in as_completed(futures): + try: + result = future.result() + # If tag is "stop", result is an instance of + # subprocess.CompletedProcess, then we check returncode + # attribute to know if rncd stop command finished successfully. + # + # if tag is "kill" then the main function will check if + # named process exited gracefully after SIGTERM signal. + if futures[future] == "stop": + ret_code = result + + except (dns.resolver.NXDOMAIN, dns.exception.Timeout): + pass + + if kill_method == "rndc": + assert ret_code == 0 + + executor.shutdown() + + +@pytest.mark.dnspython +def test_named_shutdown(named_port, control_port): + # pylint: disable-msg=too-many-locals + cfg_dir = os.path.join(os.getcwd(), "resolver") + assert os.path.isdir(cfg_dir) + + cfg_file = os.path.join(cfg_dir, "named.conf") + assert os.path.isfile(cfg_file) + + named = os.getenv("NAMED") + assert named is not None + + rndc = os.getenv("RNDC") + assert rndc is not None + + systest_dir = os.getenv("SYSTEMTESTTOP") + assert systest_dir is not None + + # rndc configuration resides in $SYSTEMTESTTOP/common/rndc.conf + rndc_cfg = os.path.join(systest_dir, "common", "rndc.conf") + assert os.path.isfile(rndc_cfg) + + # rndc command with default arguments. + rndc_cmd = [rndc, "-c", rndc_cfg, "-p", str(control_port), + "-s", "10.53.0.3"] + + # Helper function, launch named without blocking. + def launch_named(): + proc = subprocess.Popen([named, "-c", cfg_file, "-f"], cwd=cfg_dir) + # Ensure named is running + assert proc.poll() is None + + return proc + + # We create a resolver instance that will be used to send queries. + resolver = dns.resolver.Resolver() + resolver.nameservers = ['10.53.0.3'] + resolver.port = named_port + + # We test named shutting down using two methods: + # Method 1: using rndc ctop + # Method 2: killing with SIGTERM + # In both methods named should exit gracefully. + for kill_method in ("rndc", "sigterm"): + named_proc = launch_named() + time.sleep(2) + + do_work(named_proc, resolver, rndc_cmd, + kill_method, n_workers=12, n_queries=16) + + # Wait named to exit for a maximum of MAX_TIMEOUT seconds. + MAX_TIMEOUT = 10 + is_dead = False + for _ in range(MAX_TIMEOUT): + if named_proc.poll() is not None: + is_dead = True + break + time.sleep(1) + + if not is_dead: + named_proc.kill() + + assert is_dead + # Ensures that named exited gracefully. + # If it crashed (abort()) exitcode will be non zero. + assert named_proc.returncode == 0 diff --git a/util/copyrights b/util/copyrights index 238e6da92d..e049c42f45 100644 --- a/util/copyrights +++ b/util/copyrights @@ -784,6 +784,10 @@ ./bin/tests/system/sfcache/ns5/sign.sh SH 2018,2019,2020 ./bin/tests/system/sfcache/setup.sh SH 2014,2016,2017,2018,2019,2020 ./bin/tests/system/sfcache/tests.sh SH 2014,2016,2017,2018,2019,2020 +./bin/tests/system/shutdown/clean.sh SH 2020 +./bin/tests/system/shutdown/conftest.py PYTHON 2020 +./bin/tests/system/shutdown/setup.sh SH 2020 +./bin/tests/system/shutdown/tests-shutdown.py PYTHON-BIN 2020 ./bin/tests/system/smartsign/clean.sh SH 2010,2012,2014,2016,2018,2019,2020 ./bin/tests/system/smartsign/tests.sh SH 2010,2011,2012,2014,2016,2017,2018,2019,2020 ./bin/tests/system/sortlist/clean.sh SH 2000,2001,2004,2007,2009,2012,2014,2015,2016,2018,2019,2020 From 605209402ff56e91a49be10dcf2d8782a680e7be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Tue, 23 Jun 2020 13:30:09 +0200 Subject: [PATCH 3/3] Add CHANGES and release not for #1747 --- CHANGES | 4 ++++ doc/notes/notes-current.rst | 3 +++ 2 files changed, 7 insertions(+) diff --git a/CHANGES b/CHANGES index ec4a709c58..3311ac6650 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,7 @@ +5453. [bug] `named` would crash on shutdown when new `rndc` + connection is received at the same time as + shutting down. [GL #1747] + 5452. [bug] The "blackhole" ACL was accidentally disabled with respect to client queries. [GL #1936] diff --git a/doc/notes/notes-current.rst b/doc/notes/notes-current.rst index f5c274b16f..1e129ffef1 100644 --- a/doc/notes/notes-current.rst +++ b/doc/notes/notes-current.rst @@ -62,3 +62,6 @@ Bug Fixes client queries. Blocked IP addresses were not used for upstream queries but queries from those addresses could still be answered. [GL #1936] + +- ``named`` would crash on shutdown when new ``rndc`` connection is received at + the same time as shutting down. [GL #1747]