From 71ce8b0a51bc028ed46541d274965e7e4b5fc832 Mon Sep 17 00:00:00 2001 From: Tony Finch Date: Fri, 1 Apr 2022 13:46:39 +0100 Subject: [PATCH] Ensure that dns_request_createvia() has a retry limit There are a couple of problems with dns_request_createvia(): a UDP retry count of zero means unlimited retries (it should mean no retries), and the overall request timeout is not enforced. The combination of these bugs means that requests can be retried forever. This change alters calls to dns_request_createvia() to avoid the infinite retry bug by providing an explicit retry count. Previously, the calls specified infinite retries and relied on the limit implied by the overall request timeout and the UDP timeout (which did not work because the overall timeout is not enforced). The `udpretries` argument is also changed to be the number of retries; previously, zero was interpreted as infinity because of an underflow to UINT_MAX, which appeared to be a mistake. And `mdig` is updated to match the change in retry accounting. The bug could be triggered by zone maintenance queries, including NOTIFY messages, DS parental checks, refresh SOA queries and stub zone nameserver lookups. It could also occur with `nsupdate -r 0`. (But `mdig` had its own code to avoid the bug.) --- CHANGES | 3 +++ bin/tests/system/notify/ns3/named.conf.in | 2 ++ bin/tests/system/notify/tests.sh | 4 ++++ bin/tools/mdig.c | 7 +++---- doc/notes/notes-current.rst | 9 +++++++++ lib/dns/request.c | 5 +++-- lib/dns/zone.c | 10 +++++----- 7 files changed, 29 insertions(+), 11 deletions(-) diff --git a/CHANGES b/CHANGES index c6fb175f7e..8600ca7df8 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,6 @@ +5855. [bug] Ensure that zone maintenance queries have a retry limit. + [GL #3242] + 5854. [func] Implement reference counting for TLS contexts and allow reloading of TLS certificates on reconfiguration without destroying the underlying TCP listener sockets diff --git a/bin/tests/system/notify/ns3/named.conf.in b/bin/tests/system/notify/ns3/named.conf.in index e364e6087e..406418e8f3 100644 --- a/bin/tests/system/notify/ns3/named.conf.in +++ b/bin/tests/system/notify/ns3/named.conf.in @@ -32,4 +32,6 @@ zone "example" { type secondary; primaries { 10.53.0.2; }; file "example.bk"; + # non-responsive notify recipient (no reply, no ICMP errors) + also-notify { 10.53.10.53; }; }; diff --git a/bin/tests/system/notify/tests.sh b/bin/tests/system/notify/tests.sh index 9fd7b816f0..52d2f81a5b 100644 --- a/bin/tests/system/notify/tests.sh +++ b/bin/tests/system/notify/tests.sh @@ -211,5 +211,9 @@ grep "sending notify to 10.53.0.5#[0-9]* : TSIG (c)" ns5/named.run > /dev/null | test_end +test_start "checking notify retries expire within 45 seconds ($n)" +wait_for_log 45 'retries exceeded' ns3/named.run || ret=1 +test_end + echo_i "exit status: $status" [ $status -eq 0 ] || exit 1 diff --git a/bin/tools/mdig.c b/bin/tools/mdig.c index aec588aef4..e2adf43235 100644 --- a/bin/tools/mdig.c +++ b/bin/tools/mdig.c @@ -1458,7 +1458,6 @@ plus_option(char *option, struct query *query, bool global) { result = parse_uint(&query->udpretries, value, MAXTRIES - 1, "udpretries"); CHECK("parse_uint(udpretries)", result); - query->udpretries++; break; default: goto invalid_option; @@ -1579,8 +1578,8 @@ plus_option(char *option, struct query *query, bool global) { result = parse_uint(&query->udpretries, value, MAXTRIES, "udpretries"); CHECK("parse_uint(udpretries)", result); - if (query->udpretries == 0) { - query->udpretries = 1; + if (query->udpretries > 0) { + query->udpretries -= 1; } break; case 't': @@ -1947,7 +1946,7 @@ parse_args(bool is_batchfile, int argc, char **argv) { default_query.ecs_addr = NULL; default_query.timeout = 0; default_query.udptimeout = 0; - default_query.udpretries = 3; + default_query.udpretries = 2; ISC_LINK_INIT(&default_query, link); } diff --git a/doc/notes/notes-current.rst b/doc/notes/notes-current.rst index 080008ad87..ae49ca4cec 100644 --- a/doc/notes/notes-current.rst +++ b/doc/notes/notes-current.rst @@ -156,3 +156,12 @@ Bug Fixes - Handling of the TCP write timeouts has been improved to track timeout for each TCP write separately leading to faster connection tear down in case the other party is not reading the data. :gl:`#3200` + +- Zone maintenance DNS queries would retry forever while the + destination server was unreachable. These queries include outgoing + NOTIFY messages, refresh SOA queries, parental DS checks, and stub + zone NS queries. For example, if a zone has any nameservers with + IPv6 addresses and a secondary server without IPv6 connectivity, the + IPv4-only server would keep trying to send a growing amount of + NOTIFY traffic over IPv6. This futile traffic was not logged. + :gl:`#3242` diff --git a/lib/dns/request.c b/lib/dns/request.c index c170e6ee24..5a70ad6d89 100644 --- a/lib/dns/request.c +++ b/lib/dns/request.c @@ -535,7 +535,7 @@ dns_request_createraw(dns_requestmgr_t *requestmgr, isc_buffer_t *msgbuf, tcp = true; request->timeout = timeout * 1000; } else { - if (udptimeout == 0 && udpretries != 0) { + if (udptimeout == 0) { udptimeout = timeout / (udpretries + 1); } if (udptimeout == 0) { @@ -1080,7 +1080,8 @@ req_response(isc_result_t result, isc_region_t *region, void *arg) { if (result == ISC_R_TIMEDOUT) { LOCK(&request->requestmgr->locks[request->hash]); - if (--request->udpcount != 0) { + if (request->udpcount != 0) { + request->udpcount -= 1; dns_dispatch_resume(request->dispentry, request->timeout); if (!DNS_REQUEST_SENDING(request)) { diff --git a/lib/dns/zone.c b/lib/dns/zone.c index 76c2554b1a..2ce42fceca 100644 --- a/lib/dns/zone.c +++ b/lib/dns/zone.c @@ -12696,7 +12696,7 @@ notify_send_toaddr(isc_task_t *task, isc_event_t *event) { } result = dns_request_createvia( notify->zone->view->requestmgr, message, &src, ¬ify->dst, - dscp, options, key, timeout * 3, timeout, 0, notify->zone->task, + dscp, options, key, timeout * 3, timeout, 2, notify->zone->task, notify_done, notify, ¬ify->request); if (result == ISC_R_SUCCESS) { if (isc_sockaddr_pf(¬ify->dst) == AF_INET) { @@ -13425,7 +13425,7 @@ stub_request_nameserver_address(struct stub_cb_args *args, bool ipv4, result = dns_request_createvia( zone->view->requestmgr, message, &zone->sourceaddr, &zone->primaryaddr, args->dscp, DNS_REQUESTOPT_TCP, - args->tsig_key, args->timeout * 3, args->timeout, 0, zone->task, + args->tsig_key, args->timeout * 3, args->timeout, 2, zone->task, stub_glue_response_cb, request, &request->request); if (result != ISC_R_SUCCESS) { @@ -14673,7 +14673,7 @@ again: } result = dns_request_createvia( zone->view->requestmgr, message, &zone->sourceaddr, - &zone->primaryaddr, dscp, options, key, timeout * 3, timeout, 0, + &zone->primaryaddr, dscp, options, key, timeout * 3, timeout, 2, zone->task, refresh_callback, zone, &zone->request); if (result != ISC_R_SUCCESS) { zone_idetach(&dummy); @@ -14960,7 +14960,7 @@ ns_query(dns_zone_t *zone, dns_rdataset_t *soardataset, dns_stub_t *stub) { result = dns_request_createvia( zone->view->requestmgr, message, &zone->sourceaddr, &zone->primaryaddr, dscp, DNS_REQUESTOPT_TCP, key, timeout * 3, - timeout, 0, zone->task, stub_callback, cb_args, &zone->request); + timeout, 2, zone->task, stub_callback, cb_args, &zone->request); if (result != ISC_R_SUCCESS) { zone_debuglog(zone, me, 1, "dns_request_createvia() failed: %s", isc_result_totext(result)); @@ -21302,7 +21302,7 @@ checkds_send_toaddr(isc_task_t *task, isc_event_t *event) { options |= DNS_REQUESTOPT_TCP; result = dns_request_createvia( checkds->zone->view->requestmgr, message, &src, &checkds->dst, - dscp, options, key, timeout * 3, timeout, 0, + dscp, options, key, timeout * 3, timeout, 2, checkds->zone->task, checkds_done, checkds, &checkds->request); if (result != ISC_R_SUCCESS) { dns_zone_log(