From 240caa32b9cab90a38ab863fd64e6becf5d1393c Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Thu, 25 May 2023 23:53:50 -0700 Subject: [PATCH 1/3] Stale answer lookups could loop when over recursion quota When a query was aborted because of the recursion quota being exceeded, but triggered a stale answer response and a stale data refresh query, it could cause named to loop back where we are iterating and following a delegation. Having no good answer in cache, we would fall back to using serve-stale again, use the stale data, try to refresh the RRset, and loop back again, without ever terminating until crashing due to stack overflow. This happens because in the functions 'query_notfound()' and 'query_delegation_recurse()', we check whether we can fall back to serving stale data. We shouldn't do so if we are already refreshing an RRset due to having prioritized stale data in cache. In other words, we need to add an extra check to 'query_usestale()' to disallow serving stale data if we are currently refreshing a stale RRset. As an additional mitigation to prevent looping, we now use the result code ISC_R_ALREADYRUNNING rather than ISC_R_FAILURE when a recursion loop is encountered, and we check for that condition in 'query_usestale()' as well. --- lib/ns/query.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/lib/ns/query.c b/lib/ns/query.c index 1fab263a35..9fa8ee0f9a 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -6533,7 +6533,7 @@ ns_query_recurse(ns_client_t *client, dns_rdatatype_t qtype, dns_name_t *qname, if (recparam_match(&client->query.recparam, qtype, qname, qdomain)) { ns_client_log(client, NS_LOGCATEGORY_CLIENT, NS_LOGMODULE_QUERY, ISC_LOG_INFO, "recursion loop detected"); - return (ISC_R_FAILURE); + return (ISC_R_ALREADYRUNNING); } recparam_update(&client->query.recparam, qtype, qname, qdomain); @@ -7650,10 +7650,21 @@ query_usestale(query_ctx_t *qctx, isc_result_t result) { return (false); } - if (result == DNS_R_DUPLICATE || result == DNS_R_DROP) { + if (qctx->refresh_rrset) { + /* + * This is a refreshing query, we have already prioritized + * stale data, so don't enable serve-stale again. + */ + return (false); + } + + if (result == DNS_R_DUPLICATE || result == DNS_R_DROP || + result == ISC_R_ALREADYRUNNING) + { /* * Don't enable serve-stale if the result signals a duplicate - * query or query that is being dropped. + * query or a query that is being dropped or can't proceed + * because of a recursion loop. */ return (false); } From ff5bacf17c2451e9d48c78a5ef96ec0c376ff33d Mon Sep 17 00:00:00 2001 From: Matthijs Mekking Date: Thu, 1 Jun 2023 10:03:48 +0200 Subject: [PATCH 2/3] Fix serve-stale hang at shutdown The 'refresh_rrset' variable is used to determine if we can detach from the client. This can cause a hang on shutdown. To fix this, move setting of the 'nodetach' variable up to where 'refresh_rrset' is set (in query_lookup(), and thus not in ns_query_done()), and set it to false when actually refreshing the RRset, so that when this lookup is completed, the client will be detached. --- lib/ns/query.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/ns/query.c b/lib/ns/query.c index 9fa8ee0f9a..64e9278307 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -5827,6 +5827,7 @@ query_refresh_rrset(query_ctx_t *orig_qctx) { qctx.client->query.dboptions &= ~(DNS_DBFIND_STALETIMEOUT | DNS_DBFIND_STALEOK | DNS_DBFIND_STALEENABLED); + qctx.client->nodetach = false; /* * We'll need some resources... @@ -6100,7 +6101,14 @@ query_lookup(query_ctx_t *qctx) { "%s stale answer used, an attempt to " "refresh the RRset will still be made", namebuf); + qctx->refresh_rrset = STALE(qctx->rdataset); + /* + * If we are refreshing the RRSet, we must not + * detach from the client in query_send(). + */ + qctx->client->nodetach = qctx->refresh_rrset; + if (stale_found) { ns_client_extendederror( qctx->client, ede, @@ -11961,12 +11969,7 @@ ns_query_done(query_ctx_t *qctx) { /* * Client may have been detached after query_send(), so * we test and store the flag state here, for safety. - * If we are refreshing the RRSet, we must not detach from the client - * in the query_send(), so we need to override the flag. */ - if (qctx->refresh_rrset) { - qctx->client->nodetach = true; - } nodetach = qctx->client->nodetach; query_send(qctx->client); From 10ac503a94ee5384f68325d50dd90eaf03d41912 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Wed, 31 May 2023 12:47:31 -0700 Subject: [PATCH 3/3] CHANGES and release notes for [GL #4089] --- CHANGES | 7 +++++++ doc/notes/notes-current.rst | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/CHANGES b/CHANGES index b34478bf5c..9bcfe6270f 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,10 @@ +6192. [security] A query that prioritizes stale data over lookup + triggers a fetch to refresh the stale data in cache. + If the fetch is aborted for exceeding the recursion + quota, it was possible for 'named' to enter an infinite + callback loop and crash due to stack overflow. This has + been fixed. (CVE-2023-2911) [GL #4089] + 6190. [security] Improve the overmem cleaning process to prevent the cache going over the configured limit. (CVE-2023-2828) [GL #4055] diff --git a/doc/notes/notes-current.rst b/doc/notes/notes-current.rst index 78c3c048e2..3c706ebe98 100644 --- a/doc/notes/notes-current.rst +++ b/doc/notes/notes-current.rst @@ -24,6 +24,12 @@ Security Fixes and Yuval Shavitt from Tel-Aviv University for bringing this vulnerability to our attention. :gl:`#4055` +- A query that prioritizes stale data over lookup triggers a fetch to refresh + the stale data in cache. If the fetch is aborted for exceeding the recursion + quota, it was possible for :iscman:`named` to enter an infinite callback + loop and crash due to stack overflow. This has been fixed. (CVE-2023-2911) + :gl:`#4089` + New Features ~~~~~~~~~~~~