diff --git a/CHANGES b/CHANGES index a4915f76e3..262133754b 100644 --- a/CHANGES +++ b/CHANGES @@ -14,6 +14,19 @@ process of the catalog zone was already running. [GL #4136] + --- 9.18.16 released --- + +6192. [security] A query that prioritizes stale data over lookup + triggers a fetch to refresh the stale data in cache. + If the fetch is aborted for exceeding the recursion + quota, it was possible for 'named' to enter an infinite + callback loop and crash due to stack overflow. This has + been fixed. (CVE-2023-2911) [GL #4089] + +6190. [security] Improve the overmem cleaning process to prevent the + cache going over the configured limit. (CVE-2023-2828) + [GL #4055] + 6188. [performance] Reduce memory consumption by allocating properly sized send buffers for stream-based transports. [GL #4038] diff --git a/bin/tests/system/hooks/tests_async_plugin.py b/bin/tests/system/hooks/tests_async_plugin.py index 48f9feefbd..2f42e27379 100644 --- a/bin/tests/system/hooks/tests_async_plugin.py +++ b/bin/tests/system/hooks/tests_async_plugin.py @@ -23,4 +23,5 @@ def test_async_hook(named_port): "A", ) ans = dns.query.udp(msg, "10.53.0.1", timeout=10, port=named_port) + # the test-async plugin changes the status of any positive answer to NOTIMP assert ans.rcode() == dns.rcode.NOTIMP diff --git a/doc/arm/notes.rst b/doc/arm/notes.rst index 3dc0196180..f8705ced6a 100644 --- a/doc/arm/notes.rst +++ b/doc/arm/notes.rst @@ -36,6 +36,7 @@ information about each release, and source code. .. include:: ../notes/notes-known-issues.rst .. include:: ../notes/notes-current.rst +.. include:: ../notes/notes-9.18.16.rst .. include:: ../notes/notes-9.18.15.rst .. include:: ../notes/notes-9.18.14.rst .. include:: ../notes/notes-9.18.13.rst diff --git a/doc/arm/reference.rst b/doc/arm/reference.rst index 25a13fa70a..3de2effb72 100644 --- a/doc/arm/reference.rst +++ b/doc/arm/reference.rst @@ -3932,6 +3932,11 @@ system. default value of that option (90% of physical memory for each individual cache) may lead to memory exhaustion over time. + .. note:: + + :any:`max-cache-size` does not work reliably for the maximum + amount of memory of 100 MB or lower. + Upon startup and reconfiguration, caches with a limited size preallocate a small amount of memory (less than 1% of :any:`max-cache-size` for a given view). This preallocation serves as an diff --git a/doc/notes/notes-9.18.16.rst b/doc/notes/notes-9.18.16.rst new file mode 100644 index 0000000000..9ed090ca9c --- /dev/null +++ b/doc/notes/notes-9.18.16.rst @@ -0,0 +1,72 @@ +.. Copyright (C) Internet Systems Consortium, Inc. ("ISC") +.. +.. SPDX-License-Identifier: MPL-2.0 +.. +.. This Source Code Form is subject to the terms of the Mozilla Public +.. License, v. 2.0. If a copy of the MPL was not distributed with this +.. file, you can obtain one at https://mozilla.org/MPL/2.0/. +.. +.. See the COPYRIGHT file distributed with this work for additional +.. information regarding copyright ownership. + +Notes for BIND 9.18.16 +---------------------- + +Security Fixes +~~~~~~~~~~~~~~ + +- The overmem cleaning process has been improved, to prevent the cache from + significantly exceeding the configured :any:`max-cache-size` limit. + (CVE-2023-2828) + + ISC would like to thank Shoham Danino from Reichman University, Anat + Bremler-Barr from Tel-Aviv University, Yehuda Afek from Tel-Aviv University, + and Yuval Shavitt from Tel-Aviv University for bringing this vulnerability to + our attention. :gl:`#4055` + +- A query that prioritizes stale data over lookup triggers a fetch to refresh + the stale data in cache. If the fetch is aborted for exceeding the recursion + quota, it was possible for :iscman:`named` to enter an infinite callback + loop and crash due to stack overflow. This has been fixed. (CVE-2023-2911) + :gl:`#4089` + +New Features +~~~~~~~~~~~~ + +- The system test suite can now be executed with pytest (along with + pytest-xdist for parallel execution). :gl:`#3978` + +Removed Features +~~~~~~~~~~~~~~~~ + +- TKEY mode 2 (Diffie-Hellman Exchanged Keying) is now deprecated, and + will be removed in a future release. A warning will be logged when + the :any:`tkey-dhkey` option is used in ``named.conf``. :gl:`#3905` + +Bug Fixes +~~~~~~~~~ + +- BIND could get stuck on reconfiguration when a :any:`listen-on` + statement for HTTP is removed from the configuration. That has been + fixed. :gl:`#4071` + +- Previously, it was possible for a delegation from cache to be returned + to the client after the :any:`stale-answer-client-timeout` duration. + This has been fixed. :gl:`#3950` + +- BIND could allocate too big buffers when sending data via + stream-based DNS transports, leading to increased memory usage. + This has been fixed. :gl:`#4038` + +- When the :any:`stale-answer-enable` option was enabled and the + :any:`stale-answer-client-timeout` option was enabled and larger than + 0, :iscman:`named` previously allocated two slots from the + :any:`clients-per-query` limit for each client and failed to gradually + auto-tune its value, as configured. This has been fixed. :gl:`#4074` + +Known Issues +~~~~~~~~~~~~ + +- There are no new known issues with this release. See :ref:`above + ` for a list of all known issues affecting this + BIND 9 branch. diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index bebef1006a..5d364667eb 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -561,7 +561,7 @@ static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked, expire_t reason); static void -overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, isc_stdtime_t now, +overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, size_t purgesize, bool tree_locked); static void resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader); @@ -6795,6 +6795,16 @@ cleanup: static dns_dbmethods_t zone_methods; +static size_t +rdataset_size(rdatasetheader_t *header) { + if (!NONEXISTENT(header)) { + return (dns_rdataslab_size((unsigned char *)header, + sizeof(*header))); + } + + return (sizeof(*header)); +} + static isc_result_t addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options, @@ -6959,7 +6969,8 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, } if (cache_is_overmem) { - overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked); + overmem_purge(rbtdb, rbtnode->locknum, rdataset_size(newheader), + tree_locked); } NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, @@ -6978,11 +6989,18 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, } header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1); - if (header != NULL && - header->rdh_ttl + STALE_TTL(header, rbtdb) < - now - RBTDB_VIRTUAL) - { - expire_header(rbtdb, header, tree_locked, expire_ttl); + if (header != NULL) { + dns_ttl_t rdh_ttl = header->rdh_ttl; + + /* Only account for stale TTL if cache is not overmem */ + if (!cache_is_overmem) { + rdh_ttl += STALE_TTL(header, rbtdb); + } + + if (rdh_ttl < now - RBTDB_VIRTUAL) { + expire_header(rbtdb, header, tree_locked, + expire_ttl); + } } /* @@ -10122,52 +10140,58 @@ update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, isc_stdtime_t now) { ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link); } +static size_t +expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, size_t purgesize, + bool tree_locked) { + rdatasetheader_t *header, *header_prev; + size_t purged = 0; + + for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]); + header != NULL && purged <= purgesize; header = header_prev) + { + header_prev = ISC_LIST_PREV(header, link); + /* + * Unlink the entry at this point to avoid checking it + * again even if it's currently used someone else and + * cannot be purged at this moment. This entry won't be + * referenced any more (so unlinking is safe) since the + * TTL was reset to 0. + */ + ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header, link); + size_t header_size = rdataset_size(header); + expire_header(rbtdb, header, tree_locked, expire_lru); + purged += header_size; + } + + return (purged); +} + /*% - * Purge some expired and/or stale (i.e. unused for some period) cache entries - * under an overmem condition. To recover from this condition quickly, up to - * 2 entries will be purged. This process is triggered while adding a new - * entry, and we specifically avoid purging entries in the same LRU bucket as - * the one to which the new entry will belong. Otherwise, we might purge - * entries of the same name of different RR types while adding RRsets from a - * single response (consider the case where we're adding A and AAAA glue records - * of the same NS name). + * Purge some stale (i.e. unused for some period - LRU based cleaning) cache + * entries under the overmem condition. To recover from this condition quickly, + * we cleanup entries up to the size of newly added rdata (passed as purgesize). + * + * This process is triggered while adding a new entry, and we specifically avoid + * purging entries in the same LRU bucket as the one to which the new entry will + * belong. Otherwise, we might purge entries of the same name of different RR + * types while adding RRsets from a single response (consider the case where + * we're adding A and AAAA glue records of the same NS name). */ static void -overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, isc_stdtime_t now, +overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, size_t purgesize, bool tree_locked) { - rdatasetheader_t *header, *header_prev; unsigned int locknum; - int purgecount = 2; + size_t purged = 0; for (locknum = (locknum_start + 1) % rbtdb->node_lock_count; - locknum != locknum_start && purgecount > 0; + locknum != locknum_start && purged <= purgesize; locknum = (locknum + 1) % rbtdb->node_lock_count) { NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write); - header = isc_heap_element(rbtdb->heaps[locknum], 1); - if (header && header->rdh_ttl < now - RBTDB_VIRTUAL) { - expire_header(rbtdb, header, tree_locked, expire_ttl); - purgecount--; - } - - for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]); - header != NULL && purgecount > 0; header = header_prev) - { - header_prev = ISC_LIST_PREV(header, link); - /* - * Unlink the entry at this point to avoid checking it - * again even if it's currently used someone else and - * cannot be purged at this moment. This entry won't be - * referenced any more (so unlinking is safe) since the - * TTL was reset to 0. - */ - ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header, - link); - expire_header(rbtdb, header, tree_locked, expire_lru); - purgecount--; - } + purged += expire_lru_headers(rbtdb, locknum, purgesize - purged, + tree_locked); NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write); diff --git a/lib/ns/query.c b/lib/ns/query.c index 1fab263a35..64e9278307 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -5827,6 +5827,7 @@ query_refresh_rrset(query_ctx_t *orig_qctx) { qctx.client->query.dboptions &= ~(DNS_DBFIND_STALETIMEOUT | DNS_DBFIND_STALEOK | DNS_DBFIND_STALEENABLED); + qctx.client->nodetach = false; /* * We'll need some resources... @@ -6100,7 +6101,14 @@ query_lookup(query_ctx_t *qctx) { "%s stale answer used, an attempt to " "refresh the RRset will still be made", namebuf); + qctx->refresh_rrset = STALE(qctx->rdataset); + /* + * If we are refreshing the RRSet, we must not + * detach from the client in query_send(). + */ + qctx->client->nodetach = qctx->refresh_rrset; + if (stale_found) { ns_client_extendederror( qctx->client, ede, @@ -6533,7 +6541,7 @@ ns_query_recurse(ns_client_t *client, dns_rdatatype_t qtype, dns_name_t *qname, if (recparam_match(&client->query.recparam, qtype, qname, qdomain)) { ns_client_log(client, NS_LOGCATEGORY_CLIENT, NS_LOGMODULE_QUERY, ISC_LOG_INFO, "recursion loop detected"); - return (ISC_R_FAILURE); + return (ISC_R_ALREADYRUNNING); } recparam_update(&client->query.recparam, qtype, qname, qdomain); @@ -7650,10 +7658,21 @@ query_usestale(query_ctx_t *qctx, isc_result_t result) { return (false); } - if (result == DNS_R_DUPLICATE || result == DNS_R_DROP) { + if (qctx->refresh_rrset) { + /* + * This is a refreshing query, we have already prioritized + * stale data, so don't enable serve-stale again. + */ + return (false); + } + + if (result == DNS_R_DUPLICATE || result == DNS_R_DROP || + result == ISC_R_ALREADYRUNNING) + { /* * Don't enable serve-stale if the result signals a duplicate - * query or query that is being dropped. + * query or a query that is being dropped or can't proceed + * because of a recursion loop. */ return (false); } @@ -11950,12 +11969,7 @@ ns_query_done(query_ctx_t *qctx) { /* * Client may have been detached after query_send(), so * we test and store the flag state here, for safety. - * If we are refreshing the RRSet, we must not detach from the client - * in the query_send(), so we need to override the flag. */ - if (qctx->refresh_rrset) { - qctx->client->nodetach = true; - } nodetach = qctx->client->nodetach; query_send(qctx->client);