diff --git a/CHANGES b/CHANGES index 163ab0d0e4..73f87ac6e9 100644 --- a/CHANGES +++ b/CHANGES @@ -1,9 +1,6 @@ 6345. [bug] Added missing dns_rdataset_disassociate calls in validator.c:findnsec3proofs. [GL #4571] -6343. [bug] Fix case insensitive setting for isc_ht hashtable. - [GL #4568] - 6340. [test] Fix incorrectly reported errors when running tests with `make test` on platforms with older pytest. [GL #4560] @@ -39,6 +36,41 @@ This lead to failures when DNSKEYs where updated as the TTLs mismatched. [GL #4466] + --- 9.18.24 released --- + +6343. [bug] Fix case insensitive setting for isc_ht hashtable. + [GL #4568] + + --- 9.18.23 released --- + +6322. [security] Specific DNS answers could cause a denial-of-service + condition due to DNS validation taking a long time. + (CVE-2023-50387) [GL #4424] + +6321. [security] Change 6315 inadvertently introduced regressions that + could cause named to crash. [GL #4234] + +6320. [bug] Under some circumstances, the DoT code in client + mode could process more than one message at a time when + that was not expected. That has been fixed. [GL #4487] + + --- 9.18.22 released --- + +6319. [func] Limit isc_task_send() overhead for RBTDB tree pruning. + [GL #4383] + +6317. [security] Restore DNS64 state when handling a serve-stale timeout. + (CVE-2023-5679) [GL #4334] + +6316. [security] Specific queries could trigger an assertion check with + nxdomain-redirect enabled. (CVE-2023-5517) [GL #4281] + +6315. [security] Speed up parsing of DNS messages with many different + names. (CVE-2023-4408) [GL #4234] + +6314. [bug] Address race conditions in dns_tsigkey_find(). + [GL #4182] + 6312. [bug] Conversion from NSEC3 signed to NSEC signed could temporarily put the zone into a state where it was treated as unsigned until the NSEC chain was built. diff --git a/dangerfile.py b/dangerfile.py index 2c1e7de14c..d3d3a8f4c6 100644 --- a/dangerfile.py +++ b/dangerfile.py @@ -347,18 +347,18 @@ if changes_added_lines: # MR. release_notes_regex = re.compile(r"doc/(arm|notes)/notes-.*\.(rst|xml)") -release_notes_changed = list(filter(release_notes_regex.match, modified_files)) +release_notes_changed = list(filter(release_notes_regex.match, affected_files)) release_notes_label_set = "Release Notes" in mr_labels if not release_notes_changed: if release_notes_label_set: fail( "This merge request has the *Release Notes* label set. " - "Add a release note or unset the *Release Notes* label." + "Update release notes or unset the *Release Notes* label." ) elif "Customer" in mr_labels: warn( "This merge request has the *Customer* label set. " - "Add a release note unless the changes introduced are trivial." + "Update release notes unless the changes introduced are trivial." ) if release_notes_changed and not release_notes_label_set: fail( @@ -367,7 +367,9 @@ if release_notes_changed and not release_notes_label_set: ) if release_notes_changed: - notes_added_lines = added_lines(target_branch, release_notes_changed) + modified_or_new_files = danger.git.modified_files + danger.git.created_files + release_notes_added = list(filter(release_notes_regex.match, modified_or_new_files)) + notes_added_lines = added_lines(target_branch, release_notes_added) identifiers_found = filter(relnotes_issue_or_mr_id_regex.search, notes_added_lines) if notes_added_lines and not any(identifiers_found): warn("No valid issue/MR identifiers found in added release notes.") diff --git a/doc/arm/notes.rst b/doc/arm/notes.rst index 1caec1b103..578a5aada5 100644 --- a/doc/arm/notes.rst +++ b/doc/arm/notes.rst @@ -36,6 +36,9 @@ information about each release, and source code. .. include:: ../notes/notes-known-issues.rst .. include:: ../notes/notes-current.rst +.. include:: ../notes/notes-9.18.24.rst +.. include:: ../notes/notes-9.18.23.rst +.. include:: ../notes/notes-9.18.22.rst .. include:: ../notes/notes-9.18.21.rst .. include:: ../notes/notes-9.18.20.rst .. include:: ../notes/notes-9.18.19.rst diff --git a/doc/notes/notes-9.18.22.rst b/doc/notes/notes-9.18.22.rst new file mode 100644 index 0000000000..77f374c4cd --- /dev/null +++ b/doc/notes/notes-9.18.22.rst @@ -0,0 +1,19 @@ +.. Copyright (C) Internet Systems Consortium, Inc. ("ISC") +.. +.. SPDX-License-Identifier: MPL-2.0 +.. +.. This Source Code Form is subject to the terms of the Mozilla Public +.. License, v. 2.0. If a copy of the MPL was not distributed with this +.. file, you can obtain one at https://mozilla.org/MPL/2.0/. +.. +.. See the COPYRIGHT file distributed with this work for additional +.. information regarding copyright ownership. + +Notes for BIND 9.18.22 +---------------------- + +.. note:: + + The BIND 9.18.22 release was withdrawn after the discovery of a + regression in a security fix in it during pre-release testing. ISC + would like to acknowledge the assistance of Curtis Tuplin of SaskTel. diff --git a/doc/notes/notes-9.18.23.rst b/doc/notes/notes-9.18.23.rst new file mode 100644 index 0000000000..7f95b80131 --- /dev/null +++ b/doc/notes/notes-9.18.23.rst @@ -0,0 +1,20 @@ +.. Copyright (C) Internet Systems Consortium, Inc. ("ISC") +.. +.. SPDX-License-Identifier: MPL-2.0 +.. +.. This Source Code Form is subject to the terms of the Mozilla Public +.. License, v. 2.0. If a copy of the MPL was not distributed with this +.. file, you can obtain one at https://mozilla.org/MPL/2.0/. +.. +.. See the COPYRIGHT file distributed with this work for additional +.. information regarding copyright ownership. + +Notes for BIND 9.18.23 +---------------------- + +.. note:: + + The BIND 9.18.23 release was withdrawn after the discovery of a + regression in a security fix in it during pre-release testing. ISC + would like to acknowledge the assistance of Vinzenz Vogel and Daniel + Stirnimann of SWITCH. diff --git a/doc/notes/notes-9.18.24.rst b/doc/notes/notes-9.18.24.rst new file mode 100644 index 0000000000..3e3f1c2f83 --- /dev/null +++ b/doc/notes/notes-9.18.24.rst @@ -0,0 +1,65 @@ +.. Copyright (C) Internet Systems Consortium, Inc. ("ISC") +.. +.. SPDX-License-Identifier: MPL-2.0 +.. +.. This Source Code Form is subject to the terms of the Mozilla Public +.. License, v. 2.0. If a copy of the MPL was not distributed with this +.. file, you can obtain one at https://mozilla.org/MPL/2.0/. +.. +.. See the COPYRIGHT file distributed with this work for additional +.. information regarding copyright ownership. + +Notes for BIND 9.18.24 +---------------------- + +Security Fixes +~~~~~~~~~~~~~~ + +- Validating DNS messages containing a lot of DNSSEC signatures could + cause excessive CPU load, leading to a denial-of-service condition. + This has been fixed. :cve:`2023-50387` + + ISC would like to thank Elias Heftrig, Haya Schulmann, Niklas Vogel, + and Michael Waidner from the German National Research Center for + Applied Cybersecurity ATHENE for bringing this vulnerability to our + attention. :gl:`#4424` + +- Preparing an NSEC3 closest encloser proof could cause excessive CPU + load, leading to a denial-of-service condition. This has been fixed. + :cve:`2023-50868` :gl:`#4459` + +- Parsing DNS messages with many different names could cause excessive + CPU load. This has been fixed. :cve:`2023-4408` + + ISC would like to thank Shoham Danino from Reichman University, Anat + Bremler-Barr from Tel-Aviv University, Yehuda Afek from Tel-Aviv + University, and Yuval Shavitt from Tel-Aviv University for bringing + this vulnerability to our attention. :gl:`#4234` + +- Specific queries could cause :iscman:`named` to crash with an + assertion failure when :any:`nxdomain-redirect` was enabled. This has + been fixed. :cve:`2023-5517` :gl:`#4281` + +- A bad interaction between DNS64 and serve-stale could cause + :iscman:`named` to crash with an assertion failure, when both of these + features were enabled. This has been fixed. :cve:`2023-5679` + :gl:`#4334` + +- Under certain circumstances, the DNS-over-TLS client code incorrectly + attempted to process more than one DNS message at a time, which could + cause :iscman:`named` to crash with an assertion failure. This has + been fixed. :gl:`#4487` + +Bug Fixes +~~~~~~~~~ + +- The counters exported via the statistics channel were changed back to + 64-bit signed values; they were being inadvertently truncated to + unsigned 32-bit values since BIND 9.15.0. :gl:`#4467` + +Known Issues +~~~~~~~~~~~~ + +- There are no new known issues with this release. See :ref:`above + ` for a list of all known issues affecting this + BIND 9 branch. diff --git a/lib/dns/dst_api.c b/lib/dns/dst_api.c index 4ffda8b358..0658c691a8 100644 --- a/lib/dns/dst_api.c +++ b/lib/dns/dst_api.c @@ -164,7 +164,8 @@ computeid(dst_key_t *key); static isc_result_t frombuffer(const dns_name_t *name, unsigned int alg, unsigned int flags, unsigned int protocol, dns_rdataclass_t rdclass, - isc_buffer_t *source, isc_mem_t *mctx, dst_key_t **keyp); + isc_buffer_t *source, isc_mem_t *mctx, bool no_rdata, + dst_key_t **keyp); static isc_result_t algorithm_status(unsigned int alg); @@ -753,6 +754,13 @@ dst_key_todns(const dst_key_t *key, isc_buffer_t *target) { isc_result_t dst_key_fromdns(const dns_name_t *name, dns_rdataclass_t rdclass, isc_buffer_t *source, isc_mem_t *mctx, dst_key_t **keyp) { + return (dst_key_fromdns_ex(name, rdclass, source, mctx, false, keyp)); +} + +isc_result_t +dst_key_fromdns_ex(const dns_name_t *name, dns_rdataclass_t rdclass, + isc_buffer_t *source, isc_mem_t *mctx, bool no_rdata, + dst_key_t **keyp) { uint8_t alg, proto; uint32_t flags, extflags; dst_key_t *key = NULL; @@ -783,7 +791,7 @@ dst_key_fromdns(const dns_name_t *name, dns_rdataclass_t rdclass, } result = frombuffer(name, alg, flags, proto, rdclass, source, mctx, - &key); + no_rdata, &key); if (result != ISC_R_SUCCESS) { return (result); } @@ -804,7 +812,7 @@ dst_key_frombuffer(const dns_name_t *name, unsigned int alg, unsigned int flags, REQUIRE(dst_initialized); result = frombuffer(name, alg, flags, protocol, rdclass, source, mctx, - &key); + false, &key); if (result != ISC_R_SUCCESS) { return (result); } @@ -2351,7 +2359,8 @@ computeid(dst_key_t *key) { static isc_result_t frombuffer(const dns_name_t *name, unsigned int alg, unsigned int flags, unsigned int protocol, dns_rdataclass_t rdclass, - isc_buffer_t *source, isc_mem_t *mctx, dst_key_t **keyp) { + isc_buffer_t *source, isc_mem_t *mctx, bool no_rdata, + dst_key_t **keyp) { dst_key_t *key; isc_result_t ret; @@ -2376,10 +2385,12 @@ frombuffer(const dns_name_t *name, unsigned int alg, unsigned int flags, return (DST_R_UNSUPPORTEDALG); } - ret = key->func->fromdns(key, source); - if (ret != ISC_R_SUCCESS) { - dst_key_free(&key); - return (ret); + if (!no_rdata) { + ret = key->func->fromdns(key, source); + if (ret != ISC_R_SUCCESS) { + dst_key_free(&key); + return (ret); + } } } diff --git a/lib/dns/include/dns/message.h b/lib/dns/include/dns/message.h index 940c9b1748..f15884a183 100644 --- a/lib/dns/include/dns/message.h +++ b/lib/dns/include/dns/message.h @@ -856,44 +856,6 @@ dns_message_findtype(const dns_name_t *name, dns_rdatatype_t type, *\li #ISC_R_NOTFOUND -- the desired type does not exist. */ -isc_result_t -dns_message_find(const dns_name_t *name, dns_rdataclass_t rdclass, - dns_rdatatype_t type, dns_rdatatype_t covers, - dns_rdataset_t **rdataset); -/*%< - * Search the name for the specified rdclass and type. If it is found, - * *rdataset is filled in with a pointer to that rdataset. - * - * Requires: - *\li if '**rdataset' is non-NULL, *rdataset needs to be NULL. - * - *\li 'type' be a valid type, and NOT dns_rdatatype_any. - * - *\li If 'type' is dns_rdatatype_rrsig, 'covers' must be a valid type. - * Otherwise it should be 0. - * - * Returns: - *\li #ISC_R_SUCCESS -- all is well. - *\li #ISC_R_NOTFOUND -- the desired type does not exist. - */ - -void -dns_message_movename(dns_message_t *msg, dns_name_t *name, - dns_section_t fromsection, dns_section_t tosection); -/*%< - * Move a name from one section to another. - * - * Requires: - * - *\li 'msg' be valid. - * - *\li 'name' must be a name already in 'fromsection'. - * - *\li 'fromsection' must be a valid section. - * - *\li 'tosection' must be a valid section. - */ - void dns_message_addname(dns_message_t *msg, dns_name_t *name, dns_section_t section); diff --git a/lib/dns/include/dns/name.h b/lib/dns/include/dns/name.h index a758c4d948..199856aae7 100644 --- a/lib/dns/include/dns/name.h +++ b/lib/dns/include/dns/name.h @@ -68,6 +68,7 @@ #include #include +#include #include #include #include /* Required for storage size of dns_label_t. */ @@ -111,6 +112,7 @@ struct dns_name { isc_buffer_t *buffer; ISC_LINK(dns_name_t) link; ISC_LIST(dns_rdataset_t) list; + isc_ht_t *ht; }; #define DNS_NAME_MAGIC ISC_MAGIC('D', 'N', 'S', 'n') @@ -166,30 +168,24 @@ extern const dns_name_t *dns_wildcardname; * unsigned char offsets[] = { 0, 6 }; * dns_name_t value = DNS_NAME_INITABSOLUTE(data, offsets); */ -#define DNS_NAME_INITNONABSOLUTE(A, B) \ - { \ - DNS_NAME_MAGIC, A, (sizeof(A) - 1), sizeof(B), \ - DNS_NAMEATTR_READONLY, B, NULL, \ - { (void *)-1, (void *)-1 }, { \ - NULL, NULL \ - } \ +#define DNS_NAME_INITNONABSOLUTE(A, B) \ + { \ + DNS_NAME_MAGIC, A, (sizeof(A) - 1), sizeof(B), \ + DNS_NAMEATTR_READONLY, B, NULL, \ + { (void *)-1, (void *)-1 }, { NULL, NULL }, NULL \ } -#define DNS_NAME_INITABSOLUTE(A, B) \ - { \ - DNS_NAME_MAGIC, A, sizeof(A), sizeof(B), \ - DNS_NAMEATTR_READONLY | DNS_NAMEATTR_ABSOLUTE, B, \ - NULL, { (void *)-1, (void *)-1 }, { \ - NULL, NULL \ - } \ +#define DNS_NAME_INITABSOLUTE(A, B) \ + { \ + DNS_NAME_MAGIC, A, sizeof(A), sizeof(B), \ + DNS_NAMEATTR_READONLY | DNS_NAMEATTR_ABSOLUTE, B, \ + NULL, { (void *)-1, (void *)-1 }, { NULL, NULL }, NULL \ } -#define DNS_NAME_INITEMPTY \ - { \ - DNS_NAME_MAGIC, NULL, 0, 0, 0, NULL, NULL, \ - { (void *)-1, (void *)-1 }, { \ - NULL, NULL \ - } \ +#define DNS_NAME_INITEMPTY \ + { \ + DNS_NAME_MAGIC, NULL, 0, 0, 0, NULL, NULL, \ + { (void *)-1, (void *)-1 }, { NULL, NULL }, NULL \ } /*% @@ -1330,6 +1326,7 @@ ISC_LANG_ENDDECLS _n->buffer = NULL; \ ISC_LINK_INIT(_n, link); \ ISC_LIST_INIT(_n->list); \ + _n->ht = NULL; \ } while (0) #define DNS_NAME_RESET(n) \ diff --git a/lib/dns/include/dns/rbt.h b/lib/dns/include/dns/rbt.h index c144f83b49..40bf09ee77 100644 --- a/lib/dns/include/dns/rbt.h +++ b/lib/dns/include/dns/rbt.h @@ -124,6 +124,12 @@ struct dns_rbtnode { */ ISC_LINK(dns_rbtnode_t) deadlink; + /*% + * This linked list is used to store nodes from which tree pruning can + * be started. + */ + ISC_LINK(dns_rbtnode_t) prunelink; + /*@{*/ /*! * These values are used in the RBT DB implementation. The appropriate diff --git a/lib/dns/include/dns/validator.h b/lib/dns/include/dns/validator.h index 383dcb46e4..352a60a6a0 100644 --- a/lib/dns/include/dns/validator.h +++ b/lib/dns/include/dns/validator.h @@ -148,6 +148,7 @@ struct dns_validator { unsigned int depth; unsigned int authcount; unsigned int authfail; + bool failed; isc_stdtime_t start; }; diff --git a/lib/dns/include/dst/dst.h b/lib/dns/include/dst/dst.h index ca292b0ef0..f845e9bd2e 100644 --- a/lib/dns/include/dst/dst.h +++ b/lib/dns/include/dst/dst.h @@ -482,6 +482,10 @@ dst_key_tofile(const dst_key_t *key, int type, const char *directory); */ isc_result_t +dst_key_fromdns_ex(const dns_name_t *name, dns_rdataclass_t rdclass, + isc_buffer_t *source, isc_mem_t *mctx, bool no_rdata, + dst_key_t **keyp); +isc_result_t dst_key_fromdns(const dns_name_t *name, dns_rdataclass_t rdclass, isc_buffer_t *source, isc_mem_t *mctx, dst_key_t **keyp); /*%< diff --git a/lib/dns/message.c b/lib/dns/message.c index 761a8e1471..8654e92ec3 100644 --- a/lib/dns/message.c +++ b/lib/dns/message.c @@ -22,6 +22,8 @@ #include #include +#include +#include #include #include #include @@ -493,9 +495,11 @@ msgresetsigs(dns_message_t *msg, bool replying) { } else { dns_rdataset_disassociate(msg->tsig); isc_mempool_put(msg->rdspool, msg->tsig); + msg->tsig = NULL; if (msg->querytsig != NULL) { dns_rdataset_disassociate(msg->querytsig); isc_mempool_put(msg->rdspool, msg->querytsig); + msg->querytsig = NULL; } } dns_message_puttempname(msg, &msg->tsigname); @@ -790,6 +794,18 @@ dns_message_detach(dns_message_t **messagep) { } } +static isc_result_t +name_hash_add(isc_ht_t *ht, dns_name_t *name, dns_name_t **foundp) { + isc_result_t result = isc_ht_find(ht, name->ndata, name->length, + (void **)foundp); + if (result == ISC_R_SUCCESS) { + return (ISC_R_EXISTS); + } + result = isc_ht_add(ht, name->ndata, name->length, (void *)name); + INSIST(result == ISC_R_SUCCESS); + return (ISC_R_SUCCESS); +} + static isc_result_t findname(dns_name_t **foundname, const dns_name_t *target, dns_namelist_t *section) { @@ -809,29 +825,26 @@ findname(dns_name_t **foundname, const dns_name_t *target, return (ISC_R_NOTFOUND); } -isc_result_t -dns_message_find(const dns_name_t *name, dns_rdataclass_t rdclass, - dns_rdatatype_t type, dns_rdatatype_t covers, - dns_rdataset_t **rdataset) { - dns_rdataset_t *curr; +typedef struct __attribute__((__packed__)) rds_key { + dns_rdataclass_t rdclass; + dns_rdatatype_t type; + dns_rdatatype_t covers; +} rds_key_t; - REQUIRE(name != NULL); - REQUIRE(rdataset == NULL || *rdataset == NULL); - - for (curr = ISC_LIST_TAIL(name->list); curr != NULL; - curr = ISC_LIST_PREV(curr, link)) - { - if (curr->rdclass == rdclass && curr->type == type && - curr->covers == covers) - { - if (rdataset != NULL) { - *rdataset = curr; - } - return (ISC_R_SUCCESS); - } +static isc_result_t +rds_hash_add(isc_ht_t *ht, dns_rdataset_t *rds, dns_rdataset_t **foundp) { + rds_key_t key = { .rdclass = rds->rdclass, + .type = rds->type, + .covers = rds->covers }; + isc_result_t result = isc_ht_find(ht, (const unsigned char *)&key, + sizeof(key), (void **)foundp); + if (result == ISC_R_SUCCESS) { + return (ISC_R_EXISTS); } - - return (ISC_R_NOTFOUND); + result = isc_ht_add(ht, (const unsigned char *)&key, sizeof(key), + (void *)rds); + INSIST(result == ISC_R_SUCCESS); + return (ISC_R_SUCCESS); } isc_result_t @@ -958,6 +971,18 @@ getrdata(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, } \ } while (0) +static void +cleanup_name_hashmaps(dns_namelist_t *section) { + dns_name_t *name = NULL; + for (name = ISC_LIST_HEAD(*section); name != NULL; + name = ISC_LIST_NEXT(name, link)) + { + if (name->ht != NULL) { + isc_ht_destroy(&name->ht); + } + } +} + static isc_result_t getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, unsigned int options) { @@ -967,13 +992,19 @@ getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, dns_name_t *name2 = NULL; dns_rdataset_t *rdataset = NULL; dns_rdatalist_t *rdatalist = NULL; - isc_result_t result; + isc_result_t result = ISC_R_SUCCESS; dns_rdatatype_t rdtype; dns_rdataclass_t rdclass; dns_namelist_t *section = &msg->sections[DNS_SECTION_QUESTION]; bool best_effort = ((options & DNS_MESSAGEPARSE_BESTEFFORT) != 0); bool seen_problem = false; bool free_name = false; + bool free_ht = false; + isc_ht_t *name_map = NULL; + + if (msg->counts[DNS_SECTION_QUESTION] > 1) { + isc_ht_init(&name_map, msg->mctx, 1, ISC_HT_CASE_INSENSITIVE); + } for (count = 0; count < msg->counts[DNS_SECTION_QUESTION]; count++) { name = NULL; @@ -994,13 +1025,19 @@ getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, goto cleanup; } + /* If there is only one QNAME, skip the duplicity checks */ + if (name_map == NULL) { + result = ISC_R_SUCCESS; + goto skip_name_check; + } + /* * Run through the section, looking to see if this name * is already there. If it is found, put back the allocated * name since we no longer need it, and set our name pointer * to point to the name we found. */ - result = findname(&name2, name, section); + result = name_hash_add(name_map, name, &name2); /* * If it is the first name in the section, accept it. @@ -1012,19 +1049,25 @@ getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, * this should be legal or not. In either case we no longer * need this name pointer. */ - if (result != ISC_R_SUCCESS) { + skip_name_check: + switch (result) { + case ISC_R_SUCCESS: if (!ISC_LIST_EMPTY(*section)) { DO_ERROR(DNS_R_FORMERR); } ISC_LIST_APPEND(*section, name, link); - free_name = false; - } else { + break; + case ISC_R_EXISTS: dns_message_puttempname(msg, &name); name = name2; name2 = NULL; - free_name = false; + break; + default: + UNREACHABLE(); } + free_name = false; + /* * Get type and class. */ @@ -1054,14 +1097,6 @@ getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, msg->tkey = 1; } - /* - * Can't ask the same question twice. - */ - result = dns_message_find(name, rdclass, rdtype, 0, NULL); - if (result == ISC_R_SUCCESS) { - DO_ERROR(DNS_R_FORMERR); - } - /* * Allocate a new rdatalist. */ @@ -1071,6 +1106,7 @@ getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, goto cleanup; } rdataset = isc_mempool_get(msg->rdspool); + dns_rdataset_init(rdataset); /* * Convert rdatalist to rdataset, and attach the latter to @@ -1078,8 +1114,6 @@ getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, */ rdatalist->type = rdtype; rdatalist->rdclass = rdclass; - - dns_rdataset_init(rdataset); result = dns_rdatalist_tordataset(rdatalist, rdataset); if (result != ISC_R_SUCCESS) { goto cleanup; @@ -1087,24 +1121,66 @@ getquestions(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, rdataset->attributes |= DNS_RDATASETATTR_QUESTION; + /* + * Skip the duplicity check for first rdataset + */ + if (ISC_LIST_EMPTY(name->list)) { + result = ISC_R_SUCCESS; + goto skip_rds_check; + } + + /* + * Can't ask the same question twice. + */ + if (name->ht == NULL) { + isc_ht_init(&name->ht, msg->mctx, 1, + ISC_HT_CASE_SENSITIVE); + free_ht = true; + + INSIST(ISC_LIST_HEAD(name->list) == + ISC_LIST_TAIL(name->list)); + + dns_rdataset_t *old_rdataset = + ISC_LIST_HEAD(name->list); + + result = rds_hash_add(name->ht, old_rdataset, NULL); + + INSIST(result == ISC_R_SUCCESS); + } + result = rds_hash_add(name->ht, rdataset, NULL); + if (result == ISC_R_EXISTS) { + DO_ERROR(DNS_R_FORMERR); + } + + skip_rds_check: ISC_LIST_APPEND(name->list, rdataset, link); + rdataset = NULL; } if (seen_problem) { - return (DNS_R_RECOVERABLE); + result = DNS_R_RECOVERABLE; } - return (ISC_R_SUCCESS); cleanup: if (rdataset != NULL) { - INSIST(!dns_rdataset_isassociated(rdataset)); + if (dns_rdataset_isassociated(rdataset)) { + dns_rdataset_disassociate(rdataset); + } isc_mempool_put(msg->rdspool, rdataset); } if (free_name) { dns_message_puttempname(msg, &name); } + if (free_ht) { + cleanup_name_hashmaps(section); + } + + if (name_map != NULL) { + isc_ht_destroy(&name_map); + } + return (result); } @@ -1184,17 +1260,24 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, dns_name_t *name = NULL; dns_name_t *name2 = NULL; dns_rdataset_t *rdataset = NULL; + dns_rdataset_t *found_rdataset = NULL; dns_rdatalist_t *rdatalist = NULL; - isc_result_t result; + isc_result_t result = ISC_R_SUCCESS; dns_rdatatype_t rdtype, covers; dns_rdataclass_t rdclass; dns_rdata_t *rdata = NULL; dns_ttl_t ttl; dns_namelist_t *section = &msg->sections[sectionid]; - bool free_name = false, free_rdataset = false, seen_problem = false; + bool free_name = false, seen_problem = false; + bool free_ht = false; bool preserve_order = ((options & DNS_MESSAGEPARSE_PRESERVEORDER) != 0); bool best_effort = ((options & DNS_MESSAGEPARSE_BESTEFFORT) != 0); bool isedns, issigzero, istsig; + isc_ht_t *name_map = NULL; + + if (msg->counts[sectionid] > 1) { + isc_ht_init(&name_map, msg->mctx, 1, ISC_HT_CASE_INSENSITIVE); + } for (count = 0; count < msg->counts[sectionid]; count++) { int recstart = source->current; @@ -1202,10 +1285,10 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, skip_name_search = false; skip_type_search = false; - free_rdataset = false; isedns = false; issigzero = false; istsig = false; + found_rdataset = NULL; name = NULL; result = dns_message_gettempname(msg, &name); @@ -1245,8 +1328,8 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, if (msg->rdclass_set == 0 && rdtype != dns_rdatatype_opt && /* class is UDP SIZE */ rdtype != dns_rdatatype_tsig && /* class is ANY */ - rdtype != dns_rdatatype_tkey) - { /* class is undefined */ + rdtype != dns_rdatatype_tkey) /* class is undefined */ + { msg->rdclass = rdclass; msg->rdclass_set = 1; } @@ -1353,10 +1436,6 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, * Then put the meta-class back into the finished rdata. */ rdata = newrdata(msg); - if (rdata == NULL) { - result = ISC_R_NOMEMORY; - goto cleanup; - } if (msg->opcode == dns_opcode_update && update(sectionid, rdclass)) { @@ -1445,34 +1524,62 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, free_name = false; } } else { + if (name_map == NULL) { + result = ISC_R_SUCCESS; + goto skip_name_check; + } + /* * Run through the section, looking to see if this name * is already there. If it is found, put back the * allocated name since we no longer need it, and set * our name pointer to point to the name we found. */ - result = findname(&name2, name, section); + result = name_hash_add(name_map, name, &name2); /* * If it is a new name, append to the section. */ - if (result == ISC_R_SUCCESS) { + skip_name_check: + switch (result) { + case ISC_R_SUCCESS: + ISC_LIST_APPEND(*section, name, link); + break; + case ISC_R_EXISTS: dns_message_puttempname(msg, &name); name = name2; - } else { - ISC_LIST_APPEND(*section, name, link); + name2 = NULL; + break; + default: + UNREACHABLE(); } free_name = false; } + rdatalist = newrdatalist(msg); + rdatalist->type = rdtype; + rdatalist->covers = covers; + rdatalist->rdclass = rdclass; + rdatalist->ttl = ttl; + + dns_message_gettemprdataset(msg, &rdataset); + RUNTIME_CHECK(dns_rdatalist_tordataset(rdatalist, rdataset) == + ISC_R_SUCCESS); + dns_rdataset_setownercase(rdataset, name); + rdatalist = NULL; + /* * Search name for the particular type and class. * Skip this stage if in update mode or this is a meta-type. */ - if (preserve_order || msg->opcode == dns_opcode_update || - skip_type_search) + if (isedns || istsig || issigzero) { + /* Skip adding the rdataset to the tables */ + } else if (preserve_order || msg->opcode == dns_opcode_update || + skip_type_search) { - result = ISC_R_NOTFOUND; + result = ISC_R_SUCCESS; + + ISC_LIST_APPEND(name->list, rdataset, link); } else { /* * If this is a type that can only occur in @@ -1482,59 +1589,71 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, DO_ERROR(DNS_R_FORMERR); } - rdataset = NULL; - result = dns_message_find(name, rdclass, rdtype, covers, - &rdataset); - } + if (ISC_LIST_EMPTY(name->list)) { + result = ISC_R_SUCCESS; + goto skip_rds_check; + } + + if (name->ht == NULL) { + isc_ht_init(&name->ht, msg->mctx, 1, + ISC_HT_CASE_SENSITIVE); + free_ht = true; + + INSIST(ISC_LIST_HEAD(name->list) == + ISC_LIST_TAIL(name->list)); + + dns_rdataset_t *old_rdataset = + ISC_LIST_HEAD(name->list); + + result = rds_hash_add(name->ht, old_rdataset, + NULL); + + INSIST(result == ISC_R_SUCCESS); + } + found_rdataset = NULL; + result = rds_hash_add(name->ht, rdataset, + &found_rdataset); + + /* + * If we found an rdataset that matches, we need to + * append this rdata to that set. If we did not, we + * need to create a new rdatalist, store the important + * bits there, convert it to an rdataset, and link the + * latter to the name. Yuck. When appending, make + * certain that the type isn't a singleton type, such as + * SOA or CNAME. + * + * Note that this check will be bypassed when preserving + * order, the opcode is an update, or the type search is + * skipped. + */ + skip_rds_check: + switch (result) { + case ISC_R_EXISTS: + /* Free the rdataset we used as the key */ + dns_rdataset_disassociate(rdataset); + isc_mempool_put(msg->rdspool, rdataset); + result = ISC_R_SUCCESS; + rdataset = found_rdataset; + + if (!dns_rdatatype_issingleton(rdtype)) { + break; + } - /* - * If we found an rdataset that matches, we need to - * append this rdata to that set. If we did not, we need - * to create a new rdatalist, store the important bits there, - * convert it to an rdataset, and link the latter to the name. - * Yuck. When appending, make certain that the type isn't - * a singleton type, such as SOA or CNAME. - * - * Note that this check will be bypassed when preserving order, - * the opcode is an update, or the type search is skipped. - */ - if (result == ISC_R_SUCCESS) { - if (dns_rdatatype_issingleton(rdtype)) { - dns_rdata_t *first; dns_rdatalist_fromrdataset(rdataset, &rdatalist); - first = ISC_LIST_HEAD(rdatalist->rdata); + dns_rdata_t *first = + ISC_LIST_HEAD(rdatalist->rdata); INSIST(first != NULL); if (dns_rdata_compare(rdata, first) != 0) { DO_ERROR(DNS_R_FORMERR); } - } - } - - if (result == ISC_R_NOTFOUND) { - rdataset = isc_mempool_get(msg->rdspool); - free_rdataset = true; - - rdatalist = newrdatalist(msg); - if (rdatalist == NULL) { - result = ISC_R_NOMEMORY; - goto cleanup; - } - - rdatalist->type = rdtype; - rdatalist->covers = covers; - rdatalist->rdclass = rdclass; - rdatalist->ttl = ttl; - - dns_rdataset_init(rdataset); - RUNTIME_CHECK( - dns_rdatalist_tordataset(rdatalist, rdataset) == - ISC_R_SUCCESS); - dns_rdataset_setownercase(rdataset, name); - - if (!isedns && !istsig && !issigzero) { + break; + case ISC_R_SUCCESS: ISC_LIST_APPEND(name->list, rdataset, link); - free_rdataset = false; + break; + default: + UNREACHABLE(); } } @@ -1569,8 +1688,6 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, dns_rcode_t ercode; msg->opt = rdataset; - rdataset = NULL; - free_rdataset = false; ercode = (dns_rcode_t)((msg->opt->ttl & DNS_MESSAGE_EDNSRCODE_MASK) >> 20); @@ -1581,8 +1698,6 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, msg->sig0 = rdataset; msg->sig0name = name; msg->sigstart = recstart; - rdataset = NULL; - free_rdataset = false; free_name = false; } else if (istsig) { msg->tsig = rdataset; @@ -1592,22 +1707,17 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, * Windows doesn't like TSIG names to be compressed. */ msg->tsigname->attributes |= DNS_NAMEATTR_NOCOMPRESS; - rdataset = NULL; - free_rdataset = false; free_name = false; } + rdataset = NULL; if (seen_problem) { if (free_name) { dns_message_puttempname(msg, &name); } - if (free_rdataset) { - isc_mempool_put(msg->rdspool, rdataset); - } - free_name = free_rdataset = false; + free_name = false; } INSIST(!free_name); - INSIST(!free_rdataset); } /* @@ -1625,16 +1735,24 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx, } if (seen_problem) { - return (DNS_R_RECOVERABLE); + result = DNS_R_RECOVERABLE; } - return (ISC_R_SUCCESS); cleanup: + if (rdataset != NULL && rdataset != found_rdataset) { + dns_rdataset_disassociate(rdataset); + isc_mempool_put(msg->rdspool, rdataset); + } if (free_name) { dns_message_puttempname(msg, &name); } - if (free_rdataset) { - isc_mempool_put(msg->rdspool, rdataset); + + if (free_ht) { + cleanup_name_hashmaps(section); + } + + if (name_map != NULL) { + isc_ht_destroy(&name_map); } return (result); @@ -2452,7 +2570,7 @@ dns_message_findname(dns_message_t *msg, dns_section_t section, const dns_name_t *target, dns_rdatatype_t type, dns_rdatatype_t covers, dns_name_t **name, dns_rdataset_t **rdataset) { - dns_name_t *foundname; + dns_name_t *foundname = NULL; isc_result_t result; /* @@ -2499,22 +2617,6 @@ dns_message_findname(dns_message_t *msg, dns_section_t section, return (result); } -void -dns_message_movename(dns_message_t *msg, dns_name_t *name, - dns_section_t fromsection, dns_section_t tosection) { - REQUIRE(msg != NULL); - REQUIRE(msg->from_to_wire == DNS_MESSAGE_INTENTRENDER); - REQUIRE(name != NULL); - REQUIRE(VALID_NAMED_SECTION(fromsection)); - REQUIRE(VALID_NAMED_SECTION(tosection)); - - /* - * Unlink the name from the old section - */ - ISC_LIST_UNLINK(msg->sections[fromsection], name, link); - ISC_LIST_APPEND(msg->sections[tosection], name, link); -} - void dns_message_addname(dns_message_t *msg, dns_name_t *name, dns_section_t section) { @@ -2591,6 +2693,10 @@ dns_message_puttempname(dns_message_t *msg, dns_name_t **itemp) { REQUIRE(!ISC_LINK_LINKED(item, link)); REQUIRE(ISC_LIST_HEAD(item->list) == NULL); + if (item->ht != NULL) { + isc_ht_destroy(&item->ht); + } + /* * we need to check this in case dns_name_dup() was used. */ diff --git a/lib/dns/name.c b/lib/dns/name.c index 8a258a2a2a..90044ba51c 100644 --- a/lib/dns/name.c +++ b/lib/dns/name.c @@ -188,6 +188,7 @@ dns_name_invalidate(dns_name_t *name) { name->offsets = NULL; name->buffer = NULL; ISC_LINK_INIT(name, link); + INSIST(name->ht == NULL); } bool diff --git a/lib/dns/rbt.c b/lib/dns/rbt.c index 4d8c142fc2..29f19c895b 100644 --- a/lib/dns/rbt.c +++ b/lib/dns/rbt.c @@ -1576,6 +1576,7 @@ create_node(isc_mem_t *mctx, const dns_name_t *name, dns_rbtnode_t **nodep) { HASHVAL(node) = 0; ISC_LINK_INIT(node, deadlink); + ISC_LINK_INIT(node, prunelink); LOCKNUM(node) = 0; WILD(node) = 0; diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index 5eb553ddc1..b6089de6bb 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -494,6 +494,10 @@ struct dns_rbtdb { */ rbtnodelist_t *deadnodes; + /* List of nodes from which recursive tree pruning can be started from. + * Locked by tree_lock. */ + rbtnodelist_t prunenodes; + /* * Heaps. These are used for TTL based expiry in a cache, * or for zone resigning in a zone DB. hmctx is the memory @@ -1027,6 +1031,7 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) { unsigned int i; isc_result_t result; char buf[DNS_NAME_FORMATSIZE]; + dns_rbtnode_t *node = NULL; dns_rbt_t **treep; isc_time_t start; @@ -1052,8 +1057,6 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) { * the overhead of unlinking all nodes here should be negligible. */ for (i = 0; i < rbtdb->node_lock_count; i++) { - dns_rbtnode_t *node; - node = ISC_LIST_HEAD(rbtdb->deadnodes[i]); while (node != NULL) { ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink); @@ -1061,6 +1064,12 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) { } } + node = ISC_LIST_HEAD(rbtdb->prunenodes); + while (node != NULL) { + ISC_LIST_UNLINK(rbtdb->prunenodes, node, prunelink); + node = ISC_LIST_HEAD(rbtdb->prunenodes); + } + if (event == NULL) { rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0; } @@ -1866,19 +1875,32 @@ is_leaf(dns_rbtnode_t *node) { node->left == NULL && node->right == NULL); } +/*% + * The tree lock must be held when this function is called as it reads and + * updates rbtdb->prunenodes. + */ static void send_to_prune_tree(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, isc_rwlocktype_t locktype) { - isc_event_t *ev; - dns_db_t *db; + bool pruning_queued = (ISC_LIST_HEAD(rbtdb->prunenodes) != NULL); + + INSIST(locktype == isc_rwlocktype_write); - ev = isc_event_allocate(rbtdb->common.mctx, NULL, DNS_EVENT_RBTPRUNE, - prune_tree, node, sizeof(isc_event_t)); new_reference(rbtdb, node, locktype); - db = NULL; - attach((dns_db_t *)rbtdb, &db); - ev->ev_sender = db; - isc_task_send(rbtdb->task, &ev); + INSIST(!ISC_LINK_LINKED(node, prunelink)); + ISC_LIST_APPEND(rbtdb->prunenodes, node, prunelink); + + if (!pruning_queued) { + isc_event_t *ev = NULL; + dns_db_t *db = NULL; + + attach((dns_db_t *)rbtdb, &db); + + ev = isc_event_allocate(rbtdb->common.mctx, NULL, + DNS_EVENT_RBTPRUNE, prune_tree, db, + sizeof(isc_event_t)); + isc_task_send(rbtdb->task, &ev); + } } /*% @@ -2153,17 +2175,26 @@ restore_locks: } /* - * Prune the tree by recursively cleaning-up single leaves. In the worst - * case, the number of iteration is the number of tree levels, which is at - * most the maximum number of domain name labels, i.e, 127. In practice, this - * should be much smaller (only a few times), and even the worst case would be - * acceptable for a single event. + * Prune the tree by recursively cleaning up single leaves. Go through all + * nodes stored in the rbtdb->prunenodes list; for each of them, in the worst + * case, it will be necessary to traverse a number of tree levels equal to the + * maximum legal number of domain name labels (127); in practice, the number of + * tree levels to traverse will virtually always be much smaller (a few levels + * at most). While holding the tree lock throughout this entire operation is + * less than ideal, so is splitting the latter up by queueing a separate + * prune_tree() run for each node to start pruning from (as queueing requires + * allocating memory and can therefore potentially be exploited to exhaust + * available memory). Also note that actually freeing up the memory used by + * RBTDB nodes (which is what this function does) is essential to keeping cache + * memory use in check, so since the tree lock needs to be acquired anyway, + * freeing as many nodes as possible before the tree lock gets released is + * prudent. */ static void prune_tree(isc_task_t *task, isc_event_t *event) { - dns_rbtdb_t *rbtdb = event->ev_sender; - dns_rbtnode_t *node = event->ev_arg; - dns_rbtnode_t *parent; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)event->ev_arg; + dns_rbtnode_t *node = NULL; + dns_rbtnode_t *parent = NULL; unsigned int locknum; UNUSED(task); @@ -2171,44 +2202,60 @@ prune_tree(isc_task_t *task, isc_event_t *event) { isc_event_free(&event); RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); - locknum = node->locknum; - NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write); - do { - parent = node->parent; - decrement_reference(rbtdb, node, 0, isc_rwlocktype_write, - isc_rwlocktype_write, true); - if (parent != NULL && parent->down == NULL) { - /* - * node was the only down child of the parent and has - * just been removed. We'll then need to examine the - * parent. Keep the lock if possible; otherwise, - * release the old lock and acquire one for the parent. - */ - if (parent->locknum != locknum) { - NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, - isc_rwlocktype_write); - locknum = parent->locknum; - NODE_LOCK(&rbtdb->node_locks[locknum].lock, - isc_rwlocktype_write); + while ((node = ISC_LIST_HEAD(rbtdb->prunenodes)) != NULL) { + locknum = node->locknum; + NODE_LOCK(&rbtdb->node_locks[locknum].lock, + isc_rwlocktype_write); + do { + if (ISC_LINK_LINKED(node, prunelink)) { + ISC_LIST_UNLINK(rbtdb->prunenodes, node, + prunelink); } - /* - * We need to gain a reference to the node before - * decrementing it in the next iteration. - */ - if (ISC_LINK_LINKED(parent, deadlink)) { - ISC_LIST_UNLINK(rbtdb->deadnodes[locknum], + parent = node->parent; + decrement_reference(rbtdb, node, 0, + isc_rwlocktype_write, + isc_rwlocktype_write, true); + + if (parent != NULL && parent->down == NULL) { + /* + * node was the only down child of the parent + * and has just been removed. We'll then need + * to examine the parent. Keep the lock if + * possible; otherwise, release the old lock and + * acquire one for the parent. + */ + if (parent->locknum != locknum) { + NODE_UNLOCK( + &rbtdb->node_locks[locknum].lock, + isc_rwlocktype_write); + locknum = parent->locknum; + NODE_LOCK( + &rbtdb->node_locks[locknum].lock, + isc_rwlocktype_write); + } + + /* + * We need to gain a reference to the node + * before decrementing it in the next iteration. + */ + if (ISC_LINK_LINKED(parent, deadlink)) { + ISC_LIST_UNLINK( + rbtdb->deadnodes[locknum], parent, deadlink); + } + new_reference(rbtdb, parent, + isc_rwlocktype_write); + } else { + parent = NULL; } - new_reference(rbtdb, parent, isc_rwlocktype_write); - } else { - parent = NULL; - } - node = parent; - } while (node != NULL); - NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write); + node = parent; + } while (node != NULL); + NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, + isc_rwlocktype_write); + } RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); detach((dns_db_t **)&rbtdb); @@ -8376,6 +8423,8 @@ dns_rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type, ISC_LIST_INIT(rbtdb->deadnodes[i]); } + ISC_LIST_INIT(rbtdb->prunenodes); + rbtdb->active = rbtdb->node_lock_count; for (i = 0; i < (int)(rbtdb->node_lock_count); i++) { diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index 4b3d1c0b40..60cac293cb 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -10408,8 +10408,8 @@ dns_resolver_create(dns_view_t *view, isc_taskmgr_t *taskmgr, * Since we have a pool of tasks we bind them to task * queues to spread the load evenly */ - result = isc_task_create_bound(taskmgr, 0, - &res->buckets[i].task, i); + result = isc_task_create_bound( + taskmgr, 0, &res->buckets[i].task, ISC_NM_TASK_SLOW(i)); if (result != ISC_R_SUCCESS) { ntasks = i; isc_mutex_destroy(&res->buckets[i].lock); diff --git a/lib/dns/tsig.c b/lib/dns/tsig.c index 857ec4cfcd..8f96008e31 100644 --- a/lib/dns/tsig.c +++ b/lib/dns/tsig.c @@ -1757,8 +1757,9 @@ isc_result_t dns_tsigkey_find(dns_tsigkey_t **tsigkey, const dns_name_t *name, const dns_name_t *algorithm, dns_tsig_keyring_t *ring) { dns_tsigkey_t *key; - isc_stdtime_t now; isc_result_t result; + isc_rwlocktype_t locktype = isc_rwlocktype_read; + isc_stdtime_t now; REQUIRE(tsigkey != NULL); REQUIRE(*tsigkey == NULL); @@ -1770,25 +1771,30 @@ dns_tsigkey_find(dns_tsigkey_t **tsigkey, const dns_name_t *name, RWUNLOCK(&ring->lock, isc_rwlocktype_write); isc_stdtime_get(&now); - RWLOCK(&ring->lock, isc_rwlocktype_read); + +again: + RWLOCK(&ring->lock, locktype); key = NULL; result = dns_rbt_findname(ring->keys, name, 0, NULL, (void *)&key); if (result == DNS_R_PARTIALMATCH || result == ISC_R_NOTFOUND) { - RWUNLOCK(&ring->lock, isc_rwlocktype_read); + RWUNLOCK(&ring->lock, locktype); return (ISC_R_NOTFOUND); } if (algorithm != NULL && !dns_name_equal(key->algorithm, algorithm)) { - RWUNLOCK(&ring->lock, isc_rwlocktype_read); + RWUNLOCK(&ring->lock, locktype); return (ISC_R_NOTFOUND); } if (key->inception != key->expire && isc_serial_lt(key->expire, now)) { /* * The key has expired. */ - RWUNLOCK(&ring->lock, isc_rwlocktype_read); - RWLOCK(&ring->lock, isc_rwlocktype_write); + if (locktype == isc_rwlocktype_read) { + RWUNLOCK(&ring->lock, locktype); + locktype = isc_rwlocktype_write; + goto again; + } remove_fromring(key); - RWUNLOCK(&ring->lock, isc_rwlocktype_write); + RWUNLOCK(&ring->lock, locktype); return (ISC_R_NOTFOUND); } #if 0 @@ -1803,7 +1809,7 @@ dns_tsigkey_find(dns_tsigkey_t **tsigkey, const dns_name_t *name, } #endif /* if 0 */ isc_refcount_increment(&key->refs); - RWUNLOCK(&ring->lock, isc_rwlocktype_read); + RWUNLOCK(&ring->lock, locktype); adjust_lru(key); *tsigkey = key; return (ISC_R_SUCCESS); diff --git a/lib/dns/validator.c b/lib/dns/validator.c index 5e5073e64e..a71e52f25f 100644 --- a/lib/dns/validator.c +++ b/lib/dns/validator.c @@ -1104,8 +1104,8 @@ create_validator(dns_validator_t *val, dns_name_t *name, dns_rdatatype_t type, * 'rdataset'. If found, build a dst_key_t for it and point val->key at * it. * - * If val->key is already non-NULL, locate it in the rdataset and then - * search past it for the *next* key that could have signed 'siginfo', then + * If val->key is already non-NULL, start searching from the next position in + * 'rdataset' to find the *next* key that could have signed 'siginfo', then * set val->key to that. * * Returns ISC_R_SUCCESS if a possible matching key has been found, @@ -1118,59 +1118,59 @@ select_signing_key(dns_validator_t *val, dns_rdataset_t *rdataset) { isc_buffer_t b; dns_rdata_t rdata = DNS_RDATA_INIT; dst_key_t *oldkey = val->key; - bool foundold; + bool no_rdata = false; if (oldkey == NULL) { - foundold = true; + result = dns_rdataset_first(rdataset); } else { - foundold = false; + dst_key_free(&oldkey); val->key = NULL; + result = dns_rdataset_next(rdataset); + } + if (result != ISC_R_SUCCESS) { + goto done; } - result = dns_rdataset_first(rdataset); - if (result != ISC_R_SUCCESS) { - goto failure; - } do { dns_rdataset_current(rdataset, &rdata); isc_buffer_init(&b, rdata.data, rdata.length); isc_buffer_add(&b, rdata.length); INSIST(val->key == NULL); - result = dst_key_fromdns(&siginfo->signer, rdata.rdclass, &b, - val->view->mctx, &val->key); + result = dst_key_fromdns_ex(&siginfo->signer, rdata.rdclass, &b, + val->view->mctx, no_rdata, + &val->key); if (result == ISC_R_SUCCESS) { if (siginfo->algorithm == (dns_secalg_t)dst_key_alg(val->key) && siginfo->keyid == (dns_keytag_t)dst_key_id(val->key) && + (dst_key_flags(val->key) & DNS_KEYFLAG_REVOKE) == + 0 && dst_key_iszonekey(val->key)) { - if (foundold) { - /* - * This is the key we're looking for. - */ - return (ISC_R_SUCCESS); - } else if (dst_key_compare(oldkey, val->key)) { - foundold = true; - dst_key_free(&oldkey); + if (no_rdata) { + /* Retry with full key */ + dns_rdata_reset(&rdata); + dst_key_free(&val->key); + no_rdata = false; + continue; } + /* This is the key we're looking for. */ + goto done; } dst_key_free(&val->key); } dns_rdata_reset(&rdata); result = dns_rdataset_next(rdataset); + no_rdata = true; } while (result == ISC_R_SUCCESS); +done: if (result == ISC_R_NOMORE) { result = ISC_R_NOTFOUND; } -failure: - if (oldkey != NULL) { - dst_key_free(&oldkey); - } - return (result); } @@ -1589,20 +1589,9 @@ validate_answer(dns_validator_t *val, bool resume) { continue; } - do { - isc_result_t tresult; - vresult = verify(val, val->key, &rdata, - val->siginfo->keyid); - if (vresult == ISC_R_SUCCESS) { - break; - } - - tresult = select_signing_key(val, val->keyset); - if (tresult != ISC_R_SUCCESS) { - break; - } - } while (1); + vresult = verify(val, val->key, &rdata, val->siginfo->keyid); if (vresult != ISC_R_SUCCESS) { + val->failed = true; validator_log(val, ISC_LOG_DEBUG(3), "failed to verify rdataset"); } else { @@ -1639,9 +1628,13 @@ validate_answer(dns_validator_t *val, bool resume) { } else { validator_log(val, ISC_LOG_DEBUG(3), "verify failure: %s", - isc_result_totext(result)); + isc_result_totext(vresult)); resume = false; } + if (val->failed) { + result = ISC_R_NOMORE; + break; + } } if (result != ISC_R_NOMORE) { validator_log(val, ISC_LOG_DEBUG(3), diff --git a/lib/isc/include/isc/netmgr.h b/lib/isc/include/isc/netmgr.h index eff33f6acb..d42cfe9a20 100644 --- a/lib/isc/include/isc/netmgr.h +++ b/lib/isc/include/isc/netmgr.h @@ -750,6 +750,9 @@ isc_nm_verify_tls_peer_result_string(const isc_nmhandle_t *handle); * \li 'handle' is a valid netmgr handle object. */ +#define ISC_NM_TASK_SLOW_OFFSET -2 +#define ISC_NM_TASK_SLOW(i) (ISC_NM_TASK_SLOW_OFFSET - 1 - i) + void isc_nm_task_enqueue(isc_nm_t *mgr, isc_task_t *task, int threadid); /*%< diff --git a/lib/isc/netmgr/http.c b/lib/isc/netmgr/http.c index d7a33d5abe..2220edf364 100644 --- a/lib/isc/netmgr/http.c +++ b/lib/isc/netmgr/http.c @@ -2969,7 +2969,7 @@ isc__nm_http_set_max_streams(isc_nmsocket_t *listener, void isc_nm_http_set_endpoints(isc_nmsocket_t *listener, isc_nm_http_endpoints_t *eps) { - size_t nworkers; + size_t nlisteners; REQUIRE(VALID_NMSOCK(listener)); REQUIRE(listener->type == isc_nm_httplistener); @@ -2977,8 +2977,8 @@ isc_nm_http_set_endpoints(isc_nmsocket_t *listener, atomic_store(&eps->in_use, true); - nworkers = (size_t)listener->mgr->nworkers; - for (size_t i = 0; i < nworkers; i++) { + nlisteners = (size_t)listener->mgr->nlisteners; + for (size_t i = 0; i < nlisteners; i++) { isc__netievent__http_eps_t *ievent = isc__nm_get_netievent_httpendpoints(listener->mgr, listener, eps); @@ -3003,20 +3003,20 @@ isc__nm_async_httpendpoints(isc__networker_t *worker, isc__netievent_t *ev0) { static void http_init_listener_endpoints(isc_nmsocket_t *listener, isc_nm_http_endpoints_t *epset) { - size_t nworkers; + size_t nlisteners; REQUIRE(VALID_NMSOCK(listener)); REQUIRE(VALID_NM(listener->mgr)); REQUIRE(VALID_HTTP_ENDPOINTS(epset)); - nworkers = (size_t)listener->mgr->nworkers; - INSIST(nworkers > 0); + nlisteners = (size_t)listener->mgr->nlisteners; + INSIST(nlisteners > 0); listener->h2.listener_endpoints = isc_mem_get(listener->mgr->mctx, - sizeof(isc_nm_http_endpoints_t *) * nworkers); - listener->h2.n_listener_endpoints = nworkers; - for (size_t i = 0; i < nworkers; i++) { + sizeof(isc_nm_http_endpoints_t *) * nlisteners); + listener->h2.n_listener_endpoints = nlisteners; + for (size_t i = 0; i < nlisteners; i++) { listener->h2.listener_endpoints[i] = NULL; isc_nm_http_endpoints_attach( epset, &listener->h2.listener_endpoints[i]); diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index 364a933128..6aca9ab92c 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -776,6 +776,7 @@ struct isc_nm { isc_refcount_t references; isc_mem_t *mctx; int nworkers; + int nlisteners; isc_mutex_t lock; isc_condition_t wkstatecond; isc_condition_t wkpausecond; diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index b19d468820..2310b4b904 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -189,12 +189,12 @@ isc__nm_force_tid(int tid) { } static void -isc__nm_threadpool_initialize(uint32_t workers) { +isc__nm_threadpool_initialize(uint32_t nworkers) { char buf[11]; int r = uv_os_getenv("UV_THREADPOOL_SIZE", buf, &(size_t){ sizeof(buf) }); if (r == UV_ENOENT) { - snprintf(buf, sizeof(buf), "%" PRIu32, workers); + snprintf(buf, sizeof(buf), "%" PRIu32, nworkers); uv_os_setenv("UV_THREADPOOL_SIZE", buf); } } @@ -212,11 +212,11 @@ isc__nm_threadpool_initialize(uint32_t workers) { #endif void -isc__netmgr_create(isc_mem_t *mctx, uint32_t workers, isc_nm_t **netmgrp) { +isc__netmgr_create(isc_mem_t *mctx, uint32_t nworkers, isc_nm_t **netmgrp) { isc_nm_t *mgr = NULL; char name[32]; - REQUIRE(workers > 0); + REQUIRE(nworkers > 0); #ifdef MAXIMAL_UV_VERSION if (uv_version() > MAXIMAL_UV_VERSION) { @@ -234,10 +234,13 @@ isc__netmgr_create(isc_mem_t *mctx, uint32_t workers, isc_nm_t **netmgrp) { uv_version_string(), UV_VERSION_STRING); } - isc__nm_threadpool_initialize(workers); + isc__nm_threadpool_initialize(nworkers); mgr = isc_mem_get(mctx, sizeof(*mgr)); - *mgr = (isc_nm_t){ .nworkers = workers }; + *mgr = (isc_nm_t){ + .nworkers = nworkers * 2, + .nlisteners = nworkers, + }; isc_mem_attach(mctx, &mgr->mctx); isc_mutex_init(&mgr->lock); @@ -272,11 +275,12 @@ isc__netmgr_create(isc_mem_t *mctx, uint32_t workers, isc_nm_t **netmgrp) { atomic_init(&mgr->keepalive, 30000); atomic_init(&mgr->advertised, 30000); - isc_barrier_init(&mgr->pausing, workers); - isc_barrier_init(&mgr->resuming, workers); + isc_barrier_init(&mgr->pausing, mgr->nworkers); + isc_barrier_init(&mgr->resuming, mgr->nworkers); - mgr->workers = isc_mem_get(mctx, workers * sizeof(isc__networker_t)); - for (size_t i = 0; i < workers; i++) { + mgr->workers = isc_mem_get(mctx, + mgr->nworkers * sizeof(isc__networker_t)); + for (int i = 0; i < mgr->nworkers; i++) { isc__networker_t *worker = &mgr->workers[i]; int r; @@ -310,7 +314,7 @@ isc__netmgr_create(isc_mem_t *mctx, uint32_t workers, isc_nm_t **netmgrp) { mgr->workers_running++; isc_thread_create(nm_thread, &mgr->workers[i], &worker->thread); - snprintf(name, sizeof(name), "isc-net-%04zu", i); + snprintf(name, sizeof(name), "isc-net-%04d", i); isc_thread_setname(worker->thread, name); } @@ -817,9 +821,15 @@ isc_nm_task_enqueue(isc_nm_t *nm, isc_task_t *task, int threadid) { isc__networker_t *worker = NULL; if (threadid == -1) { - tid = (int)isc_random_uniform(nm->nworkers); + tid = (int)isc_random_uniform(nm->nlisteners); + } else if (threadid == ISC_NM_TASK_SLOW_OFFSET) { + tid = nm->nlisteners + + (int)isc_random_uniform(nm->nworkers - nm->nlisteners); + } else if (threadid < ISC_NM_TASK_SLOW_OFFSET) { + tid = nm->nlisteners + (ISC_NM_TASK_SLOW(threadid) % + (nm->nworkers - nm->nlisteners)); } else { - tid = threadid % nm->nworkers; + tid = threadid % nm->nlisteners; } worker = &nm->workers[tid]; @@ -3778,7 +3788,7 @@ isc__nm_async_settlsctx(isc__networker_t *worker, isc__netievent_t *ev0) { static void set_tlsctx_workers(isc_nmsocket_t *listener, isc_tlsctx_t *tlsctx) { /* Update the TLS context reference for every worker thread. */ - for (size_t i = 0; i < (size_t)listener->mgr->nworkers; i++) { + for (size_t i = 0; i < (size_t)listener->mgr->nlisteners; i++) { isc__netievent__tlsctx_t *ievent = isc__nm_get_netievent_settlsctx(listener->mgr, listener, tlsctx); diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index 2a644fed3f..16b53cc579 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -341,7 +341,7 @@ isc_nm_tcpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, isc__nm_connectcb(sock, req, result, false); } else { isc__nmsocket_clearcb(sock); - sock->tid = isc_random_uniform(mgr->nworkers); + sock->tid = isc_random_uniform(mgr->nlisteners); isc__nm_connectcb(sock, req, result, true); } atomic_store(&sock->closed, true); @@ -362,7 +362,7 @@ isc_nm_tcpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, isc__nm_put_netievent_tcpconnect(mgr, ievent); } else { atomic_init(&sock->active, false); - sock->tid = isc_random_uniform(mgr->nworkers); + sock->tid = isc_random_uniform(mgr->nlisteners); isc__nm_enqueue_ievent(&mgr->workers[sock->tid], (isc__netievent_t *)ievent); } @@ -457,7 +457,7 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_sockaddr_t *iface, isc__nmsocket_init(sock, mgr, isc_nm_tcplistener, iface); atomic_init(&sock->rchildren, 0); - sock->nchildren = mgr->nworkers; + sock->nchildren = mgr->nlisteners; children_size = sock->nchildren * sizeof(sock->children[0]); sock->children = isc_mem_get(mgr->mctx, children_size); memset(sock->children, 0, children_size); diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c index eda6aa62ce..b2a0b1016d 100644 --- a/lib/isc/netmgr/tcpdns.c +++ b/lib/isc/netmgr/tcpdns.c @@ -324,7 +324,7 @@ isc_nm_tcpdnsconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, isc__nm_put_netievent_tcpdnsconnect(mgr, ievent); } else { atomic_init(&sock->active, false); - sock->tid = isc_random_uniform(mgr->nworkers); + sock->tid = isc_random_uniform(mgr->nlisteners); isc__nm_enqueue_ievent(&mgr->workers[sock->tid], (isc__netievent_t *)ievent); } @@ -422,7 +422,7 @@ isc_nm_listentcpdns(isc_nm_t *mgr, isc_sockaddr_t *iface, isc__nmsocket_init(sock, mgr, isc_nm_tcpdnslistener, iface); atomic_init(&sock->rchildren, 0); - sock->nchildren = mgr->nworkers; + sock->nchildren = mgr->nlisteners; children_size = sock->nchildren * sizeof(sock->children[0]); sock->children = isc_mem_get(mgr->mctx, children_size); memset(sock->children, 0, children_size); @@ -808,6 +808,13 @@ isc__nm_tcpdns_processbuffer(isc_nmsocket_t *sock) { return (ISC_R_CANCELED); } + if (sock->client && !sock->recv_read) { + /* + * We are not reading data - stop here. + */ + return (ISC_R_CANCELED); + } + req = isc__nm_get_read_req(sock, NULL); REQUIRE(VALID_UVREQ(req)); diff --git a/lib/isc/netmgr/tlsdns.c b/lib/isc/netmgr/tlsdns.c index d30e33fbfd..feeb1a8d7d 100644 --- a/lib/isc/netmgr/tlsdns.c +++ b/lib/isc/netmgr/tlsdns.c @@ -419,7 +419,7 @@ isc_nm_tlsdnsconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, isc__nm_put_netievent_tlsdnsconnect(mgr, ievent); } else { atomic_init(&sock->active, false); - sock->tid = isc_random_uniform(mgr->nworkers); + sock->tid = isc_random_uniform(mgr->nlisteners); isc__nm_enqueue_ievent(&mgr->workers[sock->tid], (isc__netievent_t *)ievent); } @@ -532,7 +532,7 @@ isc_nm_listentlsdns(isc_nm_t *mgr, isc_sockaddr_t *iface, isc__nmsocket_init(sock, mgr, isc_nm_tlsdnslistener, iface); atomic_init(&sock->rchildren, 0); - sock->nchildren = mgr->nworkers; + sock->nchildren = mgr->nlisteners; children_size = sock->nchildren * sizeof(sock->children[0]); sock->children = isc_mem_get(mgr->mctx, children_size); memset(sock->children, 0, children_size); @@ -1016,6 +1016,13 @@ isc__nm_tlsdns_processbuffer(isc_nmsocket_t *sock) { return (ISC_R_CANCELED); } + if (sock->client && !sock->recv_read) { + /* + * We are not reading data - stop here. + */ + return (ISC_R_CANCELED); + } + req = isc__nm_get_read_req(sock, NULL); REQUIRE(VALID_UVREQ(req)); diff --git a/lib/isc/netmgr/tlsstream.c b/lib/isc/netmgr/tlsstream.c index 7b490719bb..a3fc6d203c 100644 --- a/lib/isc/netmgr/tlsstream.c +++ b/lib/isc/netmgr/tlsstream.c @@ -1264,18 +1264,18 @@ isc__nm_tls_verify_tls_peer_result_string(const isc_nmhandle_t *handle) { static void tls_init_listener_tlsctx(isc_nmsocket_t *listener, isc_tlsctx_t *ctx) { - size_t nworkers; + size_t nlisteners; REQUIRE(VALID_NM(listener->mgr)); REQUIRE(ctx != NULL); - nworkers = (size_t)listener->mgr->nworkers; - INSIST(nworkers > 0); + nlisteners = (size_t)listener->mgr->nlisteners; + INSIST(nlisteners > 0); listener->tlsstream.listener_tls_ctx = isc_mem_get( - listener->mgr->mctx, sizeof(isc_tlsctx_t *) * nworkers); - listener->tlsstream.n_listener_tls_ctx = nworkers; - for (size_t i = 0; i < nworkers; i++) { + listener->mgr->mctx, sizeof(isc_tlsctx_t *) * nlisteners); + listener->tlsstream.n_listener_tls_ctx = nlisteners; + for (size_t i = 0; i < nlisteners; i++) { listener->tlsstream.listener_tls_ctx[i] = NULL; isc_tlsctx_attach(ctx, &listener->tlsstream.listener_tls_ctx[i]); diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index 476c7992f6..661de96ac6 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -157,14 +157,14 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nm_recv_cb_t cb, REQUIRE(VALID_NM(mgr)); /* - * We are creating mgr->nworkers duplicated sockets, one + * We are creating mgr->nlisteners duplicated sockets, one * socket for each worker thread. */ sock = isc_mem_get(mgr->mctx, sizeof(isc_nmsocket_t)); isc__nmsocket_init(sock, mgr, isc_nm_udplistener, iface); atomic_init(&sock->rchildren, 0); - sock->nchildren = mgr->nworkers; + sock->nchildren = mgr->nlisteners; children_size = sock->nchildren * sizeof(sock->children[0]); sock->children = isc_mem_get(mgr->mctx, children_size); memset(sock->children, 0, children_size); @@ -1037,7 +1037,7 @@ isc_nm_udpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, isc__nm_put_netievent_udpconnect(mgr, event); } else { atomic_init(&sock->active, false); - sock->tid = isc_random_uniform(mgr->nworkers); + sock->tid = isc_random_uniform(mgr->nlisteners); isc__nm_enqueue_ievent(&mgr->workers[sock->tid], (isc__netievent_t *)event); } diff --git a/lib/ns/query.c b/lib/ns/query.c index c8d3fd84e9..98de1be7f7 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -467,10 +467,10 @@ static void query_addnxrrsetnsec(query_ctx_t *qctx); static isc_result_t -query_nxdomain(query_ctx_t *qctx, isc_result_t res); +query_nxdomain(query_ctx_t *qctx, isc_result_t result); static isc_result_t -query_redirect(query_ctx_t *qctx); +query_redirect(query_ctx_t *qctx, isc_result_t result); static isc_result_t query_ncache(query_ctx_t *qctx, isc_result_t result); @@ -6230,6 +6230,13 @@ query_lookup_stale(ns_client_t *client) { query_ctx_t qctx; qctx_init(client, NULL, client->query.qtype, &qctx); + if (DNS64(client)) { + qctx.qtype = qctx.type = dns_rdatatype_a; + qctx.dns64 = true; + } + if (DNS64EXCLUDE(client)) { + qctx.dns64_exclude = true; + } dns_db_attach(client->view->cachedb, &qctx.db); client->query.attributes &= ~NS_QUERYATTR_RECURSIONOK; client->query.dboptions |= DNS_DBFIND_STALETIMEOUT; @@ -7720,8 +7727,7 @@ query_usestale(query_ctx_t *qctx, isc_result_t result) { * result from the search. */ static isc_result_t -query_gotanswer(query_ctx_t *qctx, isc_result_t res) { - isc_result_t result = res; +query_gotanswer(query_ctx_t *qctx, isc_result_t result) { char errmsg[256]; CCTRACE(ISC_LOG_DEBUG(3), "query_gotanswer"); @@ -7797,7 +7803,7 @@ root_key_sentinel: return (query_coveringnsec(qctx)); case DNS_R_NCACHENXDOMAIN: - result = query_redirect(qctx); + result = query_redirect(qctx, result); if (result != ISC_R_COMPLETE) { return (result); } @@ -9614,11 +9620,10 @@ query_addnxrrsetnsec(query_ctx_t *qctx) { * Handle NXDOMAIN and empty wildcard responses. */ static isc_result_t -query_nxdomain(query_ctx_t *qctx, isc_result_t res) { +query_nxdomain(query_ctx_t *qctx, isc_result_t result) { dns_section_t section; uint32_t ttl; - isc_result_t result = res; - bool empty_wild = (res == DNS_R_EMPTYWILD); + bool empty_wild = (result == DNS_R_EMPTYWILD); CCTRACE(ISC_LOG_DEBUG(3), "query_nxdomain"); @@ -9627,7 +9632,7 @@ query_nxdomain(query_ctx_t *qctx, isc_result_t res) { INSIST(qctx->is_zone || REDIRECT(qctx->client)); if (!empty_wild) { - result = query_redirect(qctx); + result = query_redirect(qctx, result); if (result != ISC_R_COMPLETE) { return (result); } @@ -9715,7 +9720,7 @@ cleanup: * redirecting, so query processing should continue past it. */ static isc_result_t -query_redirect(query_ctx_t *qctx) { +query_redirect(query_ctx_t *qctx, isc_result_t saved_result) { isc_result_t result; CCTRACE(ISC_LOG_DEBUG(3), "query_redirect"); @@ -9756,7 +9761,7 @@ query_redirect(query_ctx_t *qctx) { SAVE(qctx->client->query.redirect.rdataset, qctx->rdataset); SAVE(qctx->client->query.redirect.sigrdataset, qctx->sigrdataset); - qctx->client->query.redirect.result = DNS_R_NCACHENXDOMAIN; + qctx->client->query.redirect.result = saved_result; dns_name_copy(qctx->fname, qctx->client->query.redirect.fname); qctx->client->query.redirect.authoritative = qctx->authoritative; @@ -10417,7 +10422,7 @@ query_coveringnsec(query_ctx_t *qctx) { * We now have the proof that we have an NXDOMAIN. Apply * NXDOMAIN redirection if configured. */ - result = query_redirect(qctx); + result = query_redirect(qctx, DNS_R_COVERINGNSEC); if (result != ISC_R_COMPLETE) { redirected = true; goto cleanup; diff --git a/tests/isc/netmgr_test.c b/tests/isc/netmgr_test.c index f75207a223..49e5e01722 100644 --- a/tests/isc/netmgr_test.c +++ b/tests/isc/netmgr_test.c @@ -2401,6 +2401,176 @@ ISC_RUN_TEST_IMPL(tlsdns_recv_one) { atomic_assert_int_eq(ssends, 0); } +static void +tlsdns_many_listen_read_cb(isc_nmhandle_t *handle, isc_result_t eresult, + isc_region_t *region, void *cbarg) { + uint64_t magic = 0; + isc_nmhandle_t *sendhandle = NULL; + isc_buffer_t *send_data = (isc_buffer_t *)cbarg; + isc_region_t send_messages = { 0 }; + + assert_non_null(handle); + assert_non_null(send_data); + + F(); + + if (eresult != ISC_R_SUCCESS) { + goto unref; + } + + atomic_fetch_add(&sreads, 1); + + assert_true(region->length >= sizeof(magic)); + + memmove(&magic, region->base + sizeof(uint16_t), sizeof(magic)); + assert_true(magic == stop_magic || magic == send_magic); + + isc_nmhandle_attach(handle, &sendhandle); + isc_refcount_increment0(&active_ssends); + isc_nmhandle_setwritetimeout(sendhandle, T_IDLE); + /* send multiple DNS messages at once */ + isc_buffer_usedregion(send_data, &send_messages); + isc_nm_send(sendhandle, &send_messages, listen_send_cb, cbarg); +unref: + isc_refcount_decrement(&active_sreads); + isc_nmhandle_detach(&handle); +} + +static isc_result_t +tlsdns_many_listen_accept_cb(isc_nmhandle_t *handle, isc_result_t eresult, + void *cbarg) { + isc_nmhandle_t *readhandle = NULL; + + UNUSED(cbarg); + + F(); + + if (eresult != ISC_R_SUCCESS) { + return (eresult); + } + + atomic_fetch_add(&saccepts, 1); + + isc_refcount_increment0(&active_sreads); + isc_nmhandle_attach(handle, &readhandle); + isc_nm_read(handle, tlsdns_many_listen_read_cb, cbarg); + + return (ISC_R_SUCCESS); +} + +static void +tlsdns_many_connect_read_cb(isc_nmhandle_t *handle, isc_result_t eresult, + isc_region_t *region, void *cbarg) { + isc_nmhandle_t *sendhandle = NULL; + uint64_t magic = 0; + + UNUSED(cbarg); + + assert_non_null(handle); + + F(); + + if (eresult != ISC_R_SUCCESS) { + goto unref; + } + + assert_true(region->length >= sizeof(magic)); + + atomic_fetch_add(&creads, 1); + + memmove(&magic, region->base, sizeof(magic)); + + assert_true(magic == stop_magic || magic == send_magic); + + isc_refcount_increment0(&active_csends); + isc_nmhandle_attach(handle, &sendhandle); + isc_nmhandle_setwritetimeout(handle, T_IDLE); + /* + * At this point the read is completed, so we should stop that - + * but the sending code will make a cycling through input + * attempt. When not properly handled, this situation will cause + * excessive reads. + */ + isc_nm_send(sendhandle, &send_msg, connect_send_cb, NULL); + +unref: + isc_refcount_decrement(&active_creads); + isc_nmhandle_detach(&handle); +} + +/* + * A unit test *VERY* specific to #4487 - it would crash the unit test + * suite without the related fix due to excessive/unexpected reads. + * + * The intention behind the test is to (needlessly ;-)) prove that the + * author of the fix is not fantasising and excessive reads are + * possible in principle. Also, it proves that there is more than one + * way to do that. + * + * It is *not* reproducing the situation from the bug report 1:1, as + * it is impossible to understand what exactly was going on with this + * custom/proprietary server without having access to it (and even in + * that case the bug was hard to reproduce to the point, where the + * reporters considered it to be fixed for a while). There are far too + * many things a play. + */ +ISC_RUN_TEST_IMPL(tlsdns_server_send_many_recv_one) { + isc_result_t result = ISC_R_SUCCESS; + isc_nmsocket_t *listen_sock = NULL; + uint8_t buf[512]; + isc_buffer_t server_send_buf = { 0 }; + + isc_buffer_init(&server_send_buf, buf, sizeof(buf)); + + /* + * Prepare a buffer with three "DNS" messages which we will send + * at once (our code does not normally do that do that). + */ + isc_buffer_putuint16(&server_send_buf, (uint16_t)send_msg.length); + isc_buffer_putmem(&server_send_buf, send_msg.base, send_msg.length); + isc_buffer_putuint16(&server_send_buf, (uint16_t)send_msg.length); + isc_buffer_putmem(&server_send_buf, send_msg.base, send_msg.length); + isc_buffer_putuint16(&server_send_buf, (uint16_t)send_msg.length); + isc_buffer_putmem(&server_send_buf, send_msg.base, send_msg.length); + + atomic_store(&nsends, 1); + + result = isc_nm_listentls( + listen_nm, &tcp_listen_addr, tlsdns_many_listen_accept_cb, + &server_send_buf, 0, 0, NULL, tcp_listen_tlsctx, &listen_sock); + assert_int_equal(result, ISC_R_SUCCESS); + + connect_readcb = tlsdns_many_connect_read_cb; + isc_refcount_increment0(&active_cconnects); + isc_nm_tlsdnsconnect(connect_nm, &tcp_connect_addr, &tcp_listen_addr, + connect_connect_cb, NULL, T_CONNECT, 0, + tcp_connect_tlsctx, tcp_tlsctx_client_sess_cache); + + WAIT_FOR_EQ(cconnects, 1); + WAIT_FOR_LE(nsends, 0); + WAIT_FOR_EQ(csends, 2); + WAIT_FOR_EQ(sreads, 1); + WAIT_FOR_EQ(ssends, 1); + WAIT_FOR_EQ(creads, 1); + + isc_nm_stoplistening(listen_sock); + isc_nmsocket_close(&listen_sock); + assert_null(listen_sock); + isc__netmgr_shutdown(connect_nm); + + X(cconnects); + X(csends); + X(creads); + X(sreads); + X(ssends); + + atomic_assert_int_eq(cconnects, 1); + atomic_assert_int_eq(csends, 2); + atomic_assert_int_eq(creads, 1); + atomic_assert_int_eq(sreads, 1); + atomic_assert_int_eq(ssends, 1); +} + ISC_RUN_TEST_IMPL(tlsdns_recv_two) { isc_result_t result = ISC_R_SUCCESS; isc_nmsocket_t *listen_sock = NULL; @@ -2879,6 +3049,8 @@ ISC_TEST_ENTRY_CUSTOM(tls_half_recv_half_send_quota_sendback, setup_test, /* TLSDNS */ ISC_TEST_ENTRY_CUSTOM(tlsdns_recv_one, setup_test, teardown_test) +ISC_TEST_ENTRY_CUSTOM(tlsdns_server_send_many_recv_one, setup_test, + teardown_test) ISC_TEST_ENTRY_CUSTOM(tlsdns_recv_two, setup_test, teardown_test) ISC_TEST_ENTRY_CUSTOM(tlsdns_noop, setup_test, teardown_test) ISC_TEST_ENTRY_CUSTOM(tlsdns_noresponse, setup_test, teardown_test)