From 0683d76025c488826224fe74960295f82e55cfaf Mon Sep 17 00:00:00 2001 From: Alessio Podda Date: Thu, 18 Dec 2025 00:54:48 +0100 Subject: [PATCH 1/5] Extract heap deregistration This commit changes the deregistration of vecheaders from the heap to go through a private api instead of the dyndb public one. This is safe since vecheader is only used by qpzone. This is done in order to facilitate further refactoring. --- lib/dns/qpzone.c | 15 +++++++++++++++ lib/dns/rdatavec.c | 1 - 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/dns/qpzone.c b/lib/dns/qpzone.c index 2993d833c0..6c022c6a07 100644 --- a/lib/dns/qpzone.c +++ b/lib/dns/qpzone.c @@ -68,6 +68,9 @@ #define HEADERNODE(h) ((qpznode_t *)((h)->node)) +/* Forward declaration */ +static void deletedata(dns_dbnode_t *node, void *data); + #define QPDB_ATTR_LOADED 0x01 #define QPDB_ATTR_LOADING 0x02 @@ -832,6 +835,7 @@ clean_multiple_headers(dns_vectop_t *top) { if (header->serial == parent_serial || IGNORE(header)) { ISC_SLIST_PTR_REMOVE(p, header, next_header); + deletedata(header->node, header); dns_vecheader_destroy(&header); } else { parent_serial = header->serial; @@ -855,6 +859,7 @@ clean_multiple_versions(dns_vectop_t *top, uint32_t least_serial) { dns_vecheader_t *header = *p; if (header->serial < least_serial) { ISC_SLIST_PTR_REMOVE(p, header, next_header); + deletedata(header->node, header); dns_vecheader_destroy(&header); } else { multiple = true; @@ -1846,6 +1851,7 @@ add(qpzonedb_t *qpdb, qpznode_t *node, const dns_name_t *nodename, * alone. It will get cleaned up when * clean_zone_node() runs. */ + deletedata(newheader->node, newheader); dns_vecheader_destroy(&newheader); newheader = merged; dns_vecheader_reset(newheader, @@ -1870,6 +1876,7 @@ add(qpzonedb_t *qpdb, qpznode_t *node, const dns_name_t *nodename, header->typepair), "updating", qpdb->maxrrperset); } + deletedata(newheader->node, newheader); dns_vecheader_destroy(&newheader); return result; } @@ -1896,6 +1903,7 @@ add(qpzonedb_t *qpdb, qpznode_t *node, const dns_name_t *nodename, maybe_update_recordsandsize(false, version, header, nodename->length); + deletedata(header->node, header); dns_vecheader_destroy(&header); } else { if (RESIGN(newheader)) { @@ -1922,6 +1930,7 @@ add(qpzonedb_t *qpdb, qpznode_t *node, const dns_name_t *nodename, * If we're trying to delete the type, don't bother. */ if (!EXISTS(newheader)) { + deletedata(newheader->node, newheader); dns_vecheader_destroy(&newheader); return DNS_R_UNCHANGED; } @@ -1957,6 +1966,7 @@ add(qpzonedb_t *qpdb, qpznode_t *node, const dns_name_t *nodename, if (qpdb->maxtypepername > 0 && ntypes >= qpdb->maxtypepername) { + deletedata(newheader->node, newheader); dns_vecheader_destroy(&newheader); return DNS_R_TOOMANYRECORDS; } @@ -4920,6 +4930,7 @@ qpzone_subtractrdataset(dns_db_t *db, dns_dbnode_t *dbnode, &subresult); } if (result == ISC_R_SUCCESS) { + deletedata(newheader->node, newheader); dns_vecheader_destroy(&newheader); newheader = subresult; dns_vecheader_reset(newheader, (dns_dbnode_t *)node); @@ -4952,6 +4963,7 @@ qpzone_subtractrdataset(dns_db_t *db, dns_dbnode_t *dbnode, * This subtraction would remove all of the rdata; * add a nonexistent header instead. */ + deletedata(newheader->node, newheader); dns_vecheader_destroy(&newheader); newheader = dns_vecheader_new(db->mctx, (dns_dbnode_t *)node); @@ -4961,6 +4973,7 @@ qpzone_subtractrdataset(dns_db_t *db, dns_dbnode_t *dbnode, DNS_VECHEADERATTR_NONEXISTENT); newheader->serial = version->serial; } else { + deletedata(newheader->node, newheader); dns_vecheader_destroy(&newheader); goto unlock; } @@ -4981,6 +4994,7 @@ qpzone_subtractrdataset(dns_db_t *db, dns_dbnode_t *dbnode, * The rdataset doesn't exist, so we don't need to do anything * to satisfy the deletion request. */ + deletedata(newheader->node, newheader); dns_vecheader_destroy(&newheader); if ((options & DNS_DBSUB_EXACT) != 0) { result = DNS_R_NOTEXACT; @@ -5575,6 +5589,7 @@ static void destroy_qpznode(qpznode_t *node) { ISC_SLIST_FOREACH(top, node->next_type, next_type) { ISC_SLIST_FOREACH(header, top->headers, next_header) { + deletedata(header->node, header); dns_vecheader_destroy(&header); } diff --git a/lib/dns/rdatavec.c b/lib/dns/rdatavec.c index 62bc189b47..7ae9331c03 100644 --- a/lib/dns/rdatavec.c +++ b/lib/dns/rdatavec.c @@ -821,7 +821,6 @@ dns_vecheader_destroy(dns_vecheader_t **headerp) { *headerp = NULL; isc_mem_t *mctx = header->node->mctx; - dns_db_deletedata(header->node, header); if (EXISTS(header)) { size = dns_rdatavec_size(header); From 1aa0768151d1fcf8416f7b9029c4dacea90bafb5 Mon Sep 17 00:00:00 2001 From: Alessio Podda Date: Thu, 18 Dec 2025 00:54:24 +0100 Subject: [PATCH 2/5] Refactor setsigningtime Change setsigningtime to take the node of the header being changed. Done to facilitate further refactoring that will remove the header pointer from vecheader. --- bin/tests/system/dyndb/driver/db.c | 5 +++-- lib/dns/db.c | 7 ++++--- lib/dns/diff.c | 2 +- lib/dns/include/dns/db.h | 9 +++++---- lib/dns/qpzone.c | 14 ++++++++------ 5 files changed, 21 insertions(+), 16 deletions(-) diff --git a/bin/tests/system/dyndb/driver/db.c b/bin/tests/system/dyndb/driver/db.c index 0db774af88..57c2e5de72 100644 --- a/bin/tests/system/dyndb/driver/db.c +++ b/bin/tests/system/dyndb/driver/db.c @@ -276,12 +276,13 @@ findnsec3node(dns_db_t *db, const dns_name_t *name, bool create, } static isc_result_t -setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) { +setsigningtime(dns_db_t *db, dns_dbnode_t *node, dns_rdataset_t *rdataset, + isc_stdtime_t resign) { sampledb_t *sampledb = (sampledb_t *)db; REQUIRE(VALID_SAMPLEDB(sampledb)); - return dns_db_setsigningtime(sampledb->db, rdataset, resign); + return dns_db_setsigningtime(sampledb->db, node, rdataset, resign); } static isc_result_t diff --git a/lib/dns/db.c b/lib/dns/db.c index 2cbf50c2da..e86e1cba2b 100644 --- a/lib/dns/db.c +++ b/lib/dns/db.c @@ -875,10 +875,11 @@ dns_db_getsize(dns_db_t *db, dns_dbversion_t *version, uint64_t *records, } isc_result_t -dns_db_setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, - isc_stdtime_t resign) { +dns_db_setsigningtime(dns_db_t *db, dns_dbnode_t *node, + dns_rdataset_t *rdataset, isc_stdtime_t resign) { if (db->methods->setsigningtime != NULL) { - return (db->methods->setsigningtime)(db, rdataset, resign); + return (db->methods->setsigningtime)(db, node, rdataset, + resign); } return ISC_R_NOTIMPLEMENTED; } diff --git a/lib/dns/diff.c b/lib/dns/diff.c index 97ab6db7b2..4e9ea0c494 100644 --- a/lib/dns/diff.c +++ b/lib/dns/diff.c @@ -264,7 +264,7 @@ update_rdataset(dns_db_t *db, dns_dbversion_t *ver, dns_name_t *name, if (is_resign) { isc_stdtime_t resign; resign = dns_rdataset_minresign(&ardataset); - dns_db_setsigningtime(db, &ardataset, resign); + dns_db_setsigningtime(db, node, &ardataset, resign); } cleanup: diff --git a/lib/dns/include/dns/db.h b/lib/dns/include/dns/db.h index 72c735a39a..b73c3b12b4 100644 --- a/lib/dns/include/dns/db.h +++ b/lib/dns/include/dns/db.h @@ -139,8 +139,9 @@ typedef struct dns_db_methods { isc_result_t (*findnsec3node)(dns_db_t *db, const dns_name_t *name, bool create, dns_dbnode_t **nodep DNS__DB_FLARG); - isc_result_t (*setsigningtime)(dns_db_t *db, dns_rdataset_t *rdataset, - isc_stdtime_t resign); + isc_result_t (*setsigningtime)(dns_db_t *db, dns_dbnode_t *node, + dns_rdataset_t *rdataset, + isc_stdtime_t resign); isc_result_t (*getsigningtime)(dns_db_t *db, isc_stdtime_t *resign, dns_name_t *name, dns_typepair_t *typepair); @@ -1572,8 +1573,8 @@ dns__db_findnsec3node(dns_db_t *db, const dns_name_t *name, bool create, */ isc_result_t -dns_db_setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, - isc_stdtime_t resign); +dns_db_setsigningtime(dns_db_t *db, dns_dbnode_t *node, + dns_rdataset_t *rdataset, isc_stdtime_t resign); /*%< * Sets the re-signing time associated with 'rdataset' to 'resign'. * diff --git a/lib/dns/qpzone.c b/lib/dns/qpzone.c index 6c022c6a07..0dce5ffb69 100644 --- a/lib/dns/qpzone.c +++ b/lib/dns/qpzone.c @@ -2308,7 +2308,8 @@ getsize(dns_db_t *db, dns_dbversion_t *dbversion, uint64_t *records, } static isc_result_t -setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) { +setsigningtime(dns_db_t *db, dns_dbnode_t *node, dns_rdataset_t *rdataset, + isc_stdtime_t resign) { qpzonedb_t *qpdb = (qpzonedb_t *)db; dns_vecheader_t *header = NULL, oldheader; isc_rwlocktype_t nlocktype = isc_rwlocktype_none; @@ -2320,7 +2321,7 @@ setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) { header = dns_vecheader_getheader(rdataset); - nlock = qpzone_get_lock(HEADERNODE(header)); + nlock = qpzone_get_lock((qpznode_t *)node); NODE_WRLOCK(nlock, &nlocktype); oldheader = *header; @@ -2339,14 +2340,14 @@ setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) { INSIST(RESIGN(header)); LOCK(get_heap_lock(header)); if (resign == 0) { - isc_heap_delete(HEADERNODE(header)->heap->heap, + isc_heap_delete(((qpznode_t *)node)->heap->heap, header->heap_index); header->heap_index = 0; } else if (resign_sooner(header, &oldheader)) { - isc_heap_increased(HEADERNODE(header)->heap->heap, + isc_heap_increased(((qpznode_t *)node)->heap->heap, header->heap_index); } else if (resign_sooner(&oldheader, header)) { - isc_heap_decreased(HEADERNODE(header)->heap->heap, + isc_heap_decreased(((qpznode_t *)node)->heap->heap, header->heap_index); } UNLOCK(get_heap_lock(header)); @@ -5450,7 +5451,8 @@ qpzone_update_rdataset(qpzonedb_t *qpdb, qpz_version_t *version, dns_qp_t *qp, if (result == ISC_R_SUCCESS && is_resign) { isc_stdtime_t resign; resign = dns_rdataset_minresign(&ardataset); - dns_db_setsigningtime((dns_db_t *)qpdb, &ardataset, resign); + dns_db_setsigningtime((dns_db_t *)qpdb, (dns_dbnode_t *)node, + &ardataset, resign); } failure: From 3521900ecda1544bccfc9d6926e7163ca736ddf7 Mon Sep 17 00:00:00 2001 From: Alessio Podda Date: Thu, 18 Dec 2025 01:37:53 +0100 Subject: [PATCH 3/5] Add hashmap to qpz_heap This commit adds a level of indirection to the signing operations. Instead of being intrusive, the qpz_heap will keep track of which headers must be resigned through a hashmap. The intent is to make dns_vecheader_t entirely self-contained. In particular, the ownership structure between the heap and the headers is flipped. Before, the headers would "own" the heap, now the heap owns the header. --- lib/dns/include/dns/rdatavec.h | 45 +-- lib/dns/qpzone.c | 636 ++++++++++++++++++++------------- lib/dns/rdatavec.c | 91 ++--- tests/dns/qpzone_test.c | 8 +- tests/dns/vecheader_test.c | 264 ++++++++++++++ 5 files changed, 718 insertions(+), 326 deletions(-) diff --git a/lib/dns/include/dns/rdatavec.h b/lib/dns/include/dns/rdatavec.h index b92f676274..37d10b5a86 100644 --- a/lib/dns/include/dns/rdatavec.h +++ b/lib/dns/include/dns/rdatavec.h @@ -81,35 +81,31 @@ struct dns_vecheader { _Atomic(uint16_t) attributes; _Atomic(dns_trust_t) trust; + dns_typepair_t typepair; + + isc_refcount_t references; + /*% - * Locked by the heap lock. Can't be packed together with other fields - * since it is protected by a different lock. + * Memory context for this header. */ - unsigned int heap_index; + isc_mem_t *mctx; /*% * Locked by the owning node's lock. */ - uint32_t serial; - dns_ttl_t ttl; - dns_typepair_t typepair; + uint32_t serial; + dns_ttl_t ttl; /* * resigning (zone). */ - isc_stdtime_t resign; - uint16_t resign_lsb : 1; + int64_t resign; /*% * Link to the other versions of this rdataset. */ ISC_SLINK(dns_vecheader_t) next_header; - /*% - * The database node objects containing this rdataset, if any. - */ - dns_dbnode_t *node; - /*% * Cached glue records for an rdataset of type NS (zone only). */ @@ -240,24 +236,10 @@ dns_vecheader_setownercase(dns_vecheader_t *header, const dns_name_t *name); * \li 'name' is a valid name. */ -void -dns_vecheader_reset(dns_vecheader_t *h, dns_dbnode_t *node); -/*%< - * Reset an rdatavec header 'h' so it can be used to store data in - * database node 'node'. - */ - dns_vecheader_t * -dns_vecheader_new(isc_mem_t *mctx, dns_dbnode_t *node); +dns_vecheader_new(isc_mem_t *mctx); /*%< - * Allocate memory for an rdatavec header and initialize it for use - * in database node 'node'. - */ - -void -dns_vecheader_destroy(dns_vecheader_t **headerp); -/*%< - * Free all memory associated with '*headerp'. + * Allocate memory for an rdatavec header and initialize it. */ dns_vectop_t * @@ -272,3 +254,8 @@ dns_vectop_destroy(isc_mem_t *mctx, dns_vectop_t **topp); /*%< * Free all memory associated with '*vectopp'. */ + +/* + * Reference counting for dns_vecheader_t + */ +ISC_REFCOUNT_DECL(dns_vecheader); diff --git a/lib/dns/qpzone.c b/lib/dns/qpzone.c index 0dce5ffb69..2731d6b196 100644 --- a/lib/dns/qpzone.c +++ b/lib/dns/qpzone.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -66,11 +67,6 @@ #include "qpzone_p.h" #include "rdatavec_p.h" -#define HEADERNODE(h) ((qpznode_t *)((h)->node)) - -/* Forward declaration */ -static void deletedata(dns_dbnode_t *node, void *data); - #define QPDB_ATTR_LOADED 0x01 #define QPDB_ATTR_LOADING 0x02 @@ -121,6 +117,7 @@ typedef struct qpz_changed { typedef ISC_LIST(qpz_changed_t) qpz_changedlist_t; typedef struct qpz_resigned { + qpznode_t *node; dns_vecheader_t *header; ISC_LINK(struct qpz_resigned) link; } qpz_resigned_t; @@ -166,14 +163,19 @@ typedef struct qpz_heap { /* Locks the data in this struct */ isc_mutex_t lock; isc_heap_t *heap; + isc_hashmap_t *hashmap; } qpz_heap_t; +typedef struct qpz_resign { + qpznode_t *node; + dns_vecheader_t *header; + unsigned int heap_index; +} qpz_resign_t; + ISC_REFCOUNT_STATIC_DECL(qpz_heap); struct qpznode { DBNODE_FIELDS; - - qpz_heap_t *heap; /* * 'erefs' counts external references held by a caller: for * example, it could be incremented by dns_db_findnode(), @@ -293,6 +295,79 @@ ISC_REFCOUNT_STATIC_TRACE_DECL(qpznode); ISC_REFCOUNT_STATIC_DECL(qpznode); #endif +/* Forward declarations for functions used in constructors */ +static void +qpznode_acquire(qpznode_t *node DNS__DB_FLARG); +static bool +qpznode_release(qpznode_t *node DNS__DB_FLARG); +static void +qpznode_erefs_increment(qpznode_t *node DNS__DB_FLARG); + +/*% + * Constructor and destructor for qpz_changed_t + */ +static qpz_changed_t * +qpz_changed_new(isc_mem_t *mctx, qpznode_t *node DNS__DB_FLARG) { + qpz_changed_t *changed = isc_mem_get(mctx, sizeof(qpz_changed_t)); + *changed = (qpz_changed_t){ + .node = node, + .dirty = false, + .link = ISC_LINK_INITIALIZER, + }; + qpznode_acquire(node DNS__DB_FLARG_PASS); + return changed; +} + +/*% + * Constructor and destructor for qpz_resigned_t + */ +static qpz_resigned_t * +qpz_resigned_new(isc_mem_t *mctx, qpznode_t *node, dns_vecheader_t *header) { + qpz_resigned_t *resigned = isc_mem_get(mctx, sizeof(qpz_resigned_t)); + *resigned = (qpz_resigned_t){ + .node = node, + .header = header, + .link = ISC_LINK_INITIALIZER, + }; + qpznode_ref(node); + dns_vecheader_ref(header); + return resigned; +} + +static void +qpz_resigned_destroy(isc_mem_t *mctx, qpz_resigned_t **resignedp) { + qpz_resigned_t *resigned = *resignedp; + *resignedp = NULL; + qpznode_unref(resigned->node); + dns_vecheader_unref(resigned->header); + isc_mem_put(mctx, resigned, sizeof(qpz_resigned_t)); +} + +/*% + * Constructor and destructor for qpz_resign_t + */ +static qpz_resign_t * +qpz_resign_new(isc_mem_t *mctx, qpznode_t *node, dns_vecheader_t *header) { + qpz_resign_t *elem = isc_mem_get(mctx, sizeof(qpz_resign_t)); + *elem = (qpz_resign_t){ + .node = node, + .header = header, + .heap_index = 0, + }; + qpznode_ref(node); + dns_vecheader_ref(header); + return elem; +} + +static void +qpz_resign_destroy(isc_mem_t *mctx, qpz_resign_t **elemp) { + qpz_resign_t *elem = *elemp; + *elemp = NULL; + qpznode_unref(elem->node); + dns_vecheader_unref(elem->header); + isc_mem_put(mctx, elem, sizeof(qpz_resign_t)); +} + /* QP trie methods */ static void qp_attach(void *uctx, void *pval, uint32_t ival); @@ -416,31 +491,6 @@ qpzone_get_locknum(void) { return isc_random_uniform(ARRAY_SIZE(qpzone_buckets_g)); } -/*% - * Return which RRset should be resigned sooner. If the RRsets have the - * same signing time, prefer the other RRset over the SOA RRset. - */ -static bool -resign_sooner(void *v1, void *v2) { - dns_vecheader_t *h1 = v1; - dns_vecheader_t *h2 = v2; - - return h1->resign < h2->resign || - (h1->resign == h2->resign && h1->resign_lsb < h2->resign_lsb) || - (h1->resign == h2->resign && h1->resign_lsb == h2->resign_lsb && - h2->typepair == DNS_SIGTYPEPAIR(dns_rdatatype_soa)); -} - -/*% - * This function sets the heap index into the header. - */ -static void -set_index(void *what, unsigned int idx) { - dns_vecheader_t *h = what; - - h->heap_index = idx; -} - static void free_glue(isc_mem_t *mctx, dns_glue_t *glue) { while (glue != NULL) { @@ -584,6 +634,128 @@ qpdb_destroy(dns_db_t *arg) { qpzone_destroy(qpdb); } +/*% + * Compare resign times with SOA priority logic. + * Returns true if lhs should be resigned sooner than rhs. + */ +static bool +resign_sooner_values(int64_t lhs_resign, int64_t rhs_resign, + dns_typepair_t rhs_typepair) { + return lhs_resign < rhs_resign || + (lhs_resign == rhs_resign && + rhs_typepair == DNS_SIGTYPEPAIR(dns_rdatatype_soa)); +} + +/*% + * Return which RRset should be resigned sooner. If the RRsets have the + * same signing time, prefer the other RRset over the SOA RRset. + */ +static bool +resign_sooner(void *v1, void *v2) { + qpz_resign_t *elem1 = v1; + qpz_resign_t *elem2 = v2; + + return resign_sooner_values(elem1->header->resign, + elem2->header->resign, + elem2->header->typepair); +} + +/*% + * This function sets the heap index into the qpz_resign_t. + */ +static void +set_index(void *what, unsigned int idx) { + qpz_resign_t *elem = what; + + elem->heap_index = idx; +} + +/*% + * Hashmap matching function for qpz_resign_t entries. + * Matches based on header and node pointers. + */ +static bool +resign_match(void *elem_ptr, const void *key) { + qpz_resign_t *elem = elem_ptr; + const qpz_resign_t *search_elem = key; + + return elem->header == search_elem->header && + elem->node == search_elem->node; +} + +/*% + * Generate hash value for a heap element based on header pointer. + */ +static uint32_t +resign_hash(dns_vecheader_t *header) { + uintptr_t headerptr = (uintptr_t)header; + return isc_hash32(&headerptr, sizeof(headerptr), true); +} + +/*% + * Find an element in the heap/hashmap by header and node. + * Returns ISC_R_SUCCESS if found, ISC_R_NOTFOUND if not found. + * If found, *found_elem will point to the element. + * Assumes heap lock is already held. + */ +static isc_result_t +resign_lookup(qpz_heap_t *heap, dns_vecheader_t *header, qpznode_t *node, + qpz_resign_t **found_elem) { + qpz_resign_t search_elem = { + .header = header, + .node = node, + }; + uint32_t hashval = resign_hash(header); + + return isc_hashmap_find(heap->hashmap, hashval, resign_match, + &search_elem, (void **)found_elem); +} + +/*% + * Add an element to the heap/hashmap. + * Assumes heap lock is already held. + */ +static void +resign_register(qpz_heap_t *heap, qpznode_t *node, dns_vecheader_t *header) { + qpz_resign_t *elem = qpz_resign_new(heap->mctx, node, header); + uint32_t hashval = resign_hash(header); + + /* Verify invariant: element should not already be in hashmap */ + isc_result_t result = isc_hashmap_add(heap->hashmap, hashval, + resign_match, elem, elem, NULL); + INSIST(result == ISC_R_SUCCESS); + + isc_heap_insert(heap->heap, elem); +} + +/*% + * Remove an element from the heap/hashmap. + * Assumes heap lock is already held. + */ +static void +resign_unregister(qpz_heap_t *heap, qpznode_t *node, dns_vecheader_t *header) { + if (header == NULL) { + return; + } + + qpz_resign_t *found_elem = NULL; + uint32_t hashval = resign_hash(header); + qpz_resign_t search_elem = { + .header = header, + .node = node, + }; + isc_result_t result = isc_hashmap_find(heap->hashmap, hashval, + resign_match, &search_elem, + (void **)&found_elem); + + if (result == ISC_R_SUCCESS) { + isc_heap_delete(heap->heap, found_elem->heap_index); + isc_hashmap_delete(heap->hashmap, hashval, resign_match, + found_elem); + qpz_resign_destroy(heap->mctx, &found_elem); + } +} + static qpz_heap_t * new_qpz_heap(isc_mem_t *mctx) { qpz_heap_t *new_heap = isc_mem_get(mctx, sizeof(*new_heap)); @@ -595,36 +767,42 @@ new_qpz_heap(isc_mem_t *mctx) { isc_heap_create(mctx, resign_sooner, set_index, 0, &new_heap->heap); isc_mem_attach(mctx, &new_heap->mctx); + isc_hashmap_create(mctx, 1u, &new_heap->hashmap); + return new_heap; } -/* - * This function accesses the heap lock through the header and node rather than - * directly through &qpdb->heap->lock to handle a critical race condition. - * - * Consider this scenario: - * 1. A reference is taken to a qpznode - * 2. The database containing that node is freed - * 3. The qpznode reference is finally released - * - * When the qpznode reference is released, it needs to unregister all its - * vecheaders from the resigning heap. The heap is a separate refcounted - * object with references from both the database and every qpznode. This - * design ensures that even after the database is destroyed, if nodes are - * still alive, the heap remains accessible for safe cleanup. - * - * Accessing the heap lock through the database (&qpdb->heap->lock) would - * cause a segfault in this scenario, even though the heap itself is still - * alive. By going through the node's heap reference, we maintain safe access - * to the heap lock regardless of the database's lifecycle. - */ -static isc_mutex_t * -get_heap_lock(dns_vecheader_t *header) { - return &HEADERNODE(header)->heap->lock; +static void +resign_rollback(qpzonedb_t *qpdb, qpznode_t *node, qpz_version_t *version, + dns_vecheader_t *header DNS__DB_FLARG) { + if (header == NULL) { + return; + } + + qpz_resigned_t *resigned = qpz_resigned_new(((dns_db_t *)qpdb)->mctx, + node, header); + + RWLOCK(&qpdb->lock, isc_rwlocktype_write); + ISC_LIST_APPEND(version->resigned_list, resigned, link); + RWUNLOCK(&qpdb->lock, isc_rwlocktype_write); } static void qpz_heap_destroy(qpz_heap_t *qpheap) { + /* Clean up hashmap entries */ + isc_hashmap_iter_t *iter = NULL; + isc_hashmap_iter_create(qpheap->hashmap, &iter); + for (isc_result_t result = isc_hashmap_iter_first(iter); + result == ISC_R_SUCCESS; result = isc_hashmap_iter_next(iter)) + { + qpz_resign_t *elem = NULL; + isc_hashmap_iter_current(iter, (void **)&elem); + /* Release the node reference and deallocate */ + qpz_resign_destroy(qpheap->mctx, &elem); + } + isc_hashmap_iter_destroy(&iter); + isc_hashmap_destroy(&qpheap->hashmap); + isc_mutex_destroy(&qpheap->lock); isc_heap_destroy(&qpheap->heap); isc_mem_putanddetach(&qpheap->mctx, qpheap, sizeof(*qpheap)); @@ -638,14 +816,12 @@ new_qpznode(qpzonedb_t *qpdb, const dns_name_t *name, dns_namespace_t nspace) { .methods = &qpznode_methods, .name = DNS_NAME_INITEMPTY, .nspace = nspace, - .heap = qpdb->heap, .references = ISC_REFCOUNT_INITIALIZER(1), .locknum = qpzone_get_locknum(), }; isc_mem_attach(qpdb->common.mctx, &newdata->mctx); dns_name_dup(name, qpdb->common.mctx, &newdata->name); - qpz_heap_ref(newdata->heap); #if DNS_DB_NODETRACE fprintf(stderr, "new_qpznode:%s:%s:%d:%p->references = 1\n", __func__, @@ -824,7 +1000,7 @@ first_existing_header(dns_vectop_t *top, uint32_t serial) { } static void -clean_multiple_headers(dns_vectop_t *top) { +clean_multiple_headers(qpz_heap_t *heap, qpznode_t *node, dns_vectop_t *top) { uint32_t parent_serial = UINT32_MAX; REQUIRE(top != NULL); @@ -835,8 +1011,10 @@ clean_multiple_headers(dns_vectop_t *top) { if (header->serial == parent_serial || IGNORE(header)) { ISC_SLIST_PTR_REMOVE(p, header, next_header); - deletedata(header->node, header); - dns_vecheader_destroy(&header); + LOCK(&heap->lock); + resign_unregister(heap, node, header); + UNLOCK(&heap->lock); + dns_vecheader_unref(header); } else { parent_serial = header->serial; ISC_SLIST_PTR_ADVANCE(p, next_header); @@ -845,7 +1023,8 @@ clean_multiple_headers(dns_vectop_t *top) { } static bool -clean_multiple_versions(dns_vectop_t *top, uint32_t least_serial) { +clean_multiple_versions(qpz_heap_t *heap, qpznode_t *node, dns_vectop_t *top, + uint32_t least_serial) { REQUIRE(top != NULL); if (ISC_SLIST_EMPTY(top->headers)) { @@ -859,8 +1038,10 @@ clean_multiple_versions(dns_vectop_t *top, uint32_t least_serial) { dns_vecheader_t *header = *p; if (header->serial < least_serial) { ISC_SLIST_PTR_REMOVE(p, header, next_header); - deletedata(header->node, header); - dns_vecheader_destroy(&header); + LOCK(&heap->lock); + resign_unregister(heap, node, header); + UNLOCK(&heap->lock); + dns_vecheader_unref(header); } else { multiple = true; ISC_SLIST_PTR_ADVANCE(p, next_header); @@ -870,7 +1051,7 @@ clean_multiple_versions(dns_vectop_t *top, uint32_t least_serial) { } static void -clean_zone_node(qpznode_t *node, uint32_t least_serial) { +clean_zone_node(qpz_heap_t *heap, qpznode_t *node, uint32_t least_serial) { bool still_dirty = false; /* @@ -888,7 +1069,7 @@ clean_zone_node(qpznode_t *node, uint32_t least_serial) { * with the same serial number, or that have the IGNORE * attribute. */ - clean_multiple_headers(top); + clean_multiple_headers(heap, node, top); if (first_header(top) != NULL) { /* @@ -900,7 +1081,7 @@ clean_zone_node(qpznode_t *node, uint32_t least_serial) { * less than least_serial too, but we cannot delete it * because it is the most recent version. */ - still_dirty = clean_multiple_versions(top, + still_dirty = clean_multiple_versions(heap, node, top, least_serial); } } @@ -955,37 +1136,24 @@ qpznode_erefs_decrement(qpznode_t *node DNS__DB_FLARG) { * (and possibly the node use counter), cleans up and deletes the node * if necessary, then decrements the internal reference counter as well. */ -static void -qpznode_release(qpznode_t *node, uint32_t least_serial, - isc_rwlocktype_t *nlocktypep DNS__DB_FLARG) { - REQUIRE(*nlocktypep != isc_rwlocktype_none); +static bool +qpznode_release(qpznode_t *node DNS__DB_FLARG) { + bool has_erefs = false; if (!qpznode_erefs_decrement(node DNS__DB_FLARG_PASS)) { + has_erefs = true; goto unref; } /* Handle easy and typical case first. */ if (!node->dirty && !ISC_SLIST_EMPTY(node->next_type)) { + has_erefs = true; goto unref; } - if (node->dirty && least_serial > 0) { - /* - * Only do node cleanup when called from closeversion. - * Closeversion, unlike other call sites, will provide the - * least_serial, and will hold a write lock instead of a read - * lock. - * - * This way we avoid having to protect the db by increasing - * the db reference count, avoiding contention in single - * zone workloads. - */ - REQUIRE(*nlocktypep == isc_rwlocktype_write); - clean_zone_node(node, least_serial); - } - unref: qpznode_unref(node); + return has_erefs; } static void @@ -995,8 +1163,6 @@ bindrdataset(qpzonedb_t *qpdb, qpznode_t *node, dns_vecheader_t *header, return; } - qpznode_acquire(node DNS__DB_FLARG_PASS); - INSIST(rdataset->methods == NULL); /* We must be disassociated. */ rdataset->methods = &dns_rdatavec_rdatasetmethods; @@ -1013,6 +1179,7 @@ bindrdataset(qpzonedb_t *qpdb, qpznode_t *node, dns_vecheader_t *header, rdataset->vec.db = (dns_db_t *)qpdb; rdataset->vec.node = (dns_dbnode_t *)node; rdataset->vec.header = header; + dns_vecheader_ref(header); rdataset->vec.iter.iter_pos = NULL; rdataset->vec.iter.iter_count = 0; @@ -1025,7 +1192,7 @@ bindrdataset(qpzonedb_t *qpdb, qpznode_t *node, dns_vecheader_t *header, */ if (RESIGN(header)) { rdataset->attributes.resign = true; - rdataset->resign = (header->resign << 1) | header->resign_lsb; + rdataset->resign = header->resign; } else { rdataset->resign = 0; } @@ -1230,39 +1397,6 @@ newversion(dns_db_t *db, dns_dbversion_t **versionp) { return ISC_R_SUCCESS; } -static void -resigninsert(dns_vecheader_t *newheader) { - REQUIRE(newheader->heap_index == 0); - - LOCK(get_heap_lock(newheader)); - isc_heap_insert(HEADERNODE(newheader)->heap->heap, newheader); - UNLOCK(get_heap_lock(newheader)); -} - -static void -resigndelete(qpzonedb_t *qpdb ISC_ATTR_UNUSED, qpz_version_t *version, - dns_vecheader_t *header DNS__DB_FLARG) { - if (header == NULL || header->heap_index == 0) { - return; - } - - LOCK(get_heap_lock(header)); - isc_heap_delete(HEADERNODE(header)->heap->heap, header->heap_index); - UNLOCK(get_heap_lock(header)); - - header->heap_index = 0; - qpznode_acquire(HEADERNODE(header) DNS__DB_FLARG_PASS); - - qpz_resigned_t *resigned = isc_mem_get(((dns_db_t *)qpdb)->mctx, - sizeof(*resigned)); - *resigned = (qpz_resigned_t){ - .header = header, - .link = ISC_LINK_INITIALIZER, - }; - - ISC_LIST_APPEND(version->resigned_list, resigned, link); -} - static void make_least_version(qpzonedb_t *qpdb, qpz_version_t *version, qpz_changedlist_t *cleanup_list) { @@ -1481,18 +1615,18 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_rwlock_t *nlock = NULL; isc_rwlocktype_t nlocktype = isc_rwlocktype_none; dns_vecheader_t *header = resigned->header; + qpznode_t *resigned_node = resigned->node; ISC_LIST_UNLINK(resigned_list, resigned, link); - isc_mem_put(db->mctx, resigned, sizeof(*resigned)); - - nlock = qpzone_get_lock(HEADERNODE(header)); + nlock = qpzone_get_lock(resigned_node); NODE_WRLOCK(nlock, &nlocktype); if (rollback && !IGNORE(header)) { - resigninsert(header); + LOCK(&qpdb->heap->lock); + resign_register(qpdb->heap, resigned_node, header); + UNLOCK(&qpdb->heap->lock); } - qpznode_release(HEADERNODE(header), least_serial, - &nlocktype DNS__DB_FLARG_PASS); + qpz_resigned_destroy(db->mctx, &resigned); NODE_UNLOCK(nlock, &nlocktype); } @@ -1512,11 +1646,17 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp, if (rollback) { rollback_node(node, serial); } - qpznode_release(node, least_serial, - &nlocktype DNS__DB_FILELINE); + bool has_erefs = qpznode_release(node DNS__DB_FILELINE); + if (!has_erefs) { + clean_zone_node(qpdb->heap, node, least_serial); + } NODE_UNLOCK(nlock, &nlocktype); + /* + * The node reference is released separately above, so + * we just free the changed structure here. + */ isc_mem_put(qpdb->common.mctx, changed, sizeof(*changed)); } @@ -1713,19 +1853,15 @@ cname_and_other(qpznode_t *node, uint32_t serial) { } static qpz_changed_t * -add_changed(qpzonedb_t *qpdb, dns_vecheader_t *header, +add_changed(qpzonedb_t *qpdb, qpznode_t *node, qpz_version_t *version DNS__DB_FLARG) { - qpz_changed_t *changed = NULL; - qpznode_t *node = HEADERNODE(header); - - changed = isc_mem_get(qpdb->common.mctx, sizeof(*changed)); + qpz_changed_t *changed = qpz_changed_new(qpdb->common.mctx, + node DNS__DB_FLARG_PASS); RWLOCK(&qpdb->lock, isc_rwlocktype_write); REQUIRE(version->writer); - *changed = (qpz_changed_t){ .node = node }; ISC_LIST_INITANDAPPEND(version->changed_list, changed, link); - qpznode_acquire(node DNS__DB_FLARG_PASS); RWUNLOCK(&qpdb->lock, isc_rwlocktype_write); return changed; @@ -1779,8 +1915,7 @@ add(qpzonedb_t *qpdb, qpznode_t *node, const dns_name_t *nodename, * being made to this node, because it's harmless and * simplifies the code. */ - changed = add_changed(qpdb, newheader, - version DNS__DB_FLARG_PASS); + changed = add_changed(qpdb, node, version DNS__DB_FLARG_PASS); } ntypes = 0; @@ -1851,22 +1986,19 @@ add(qpzonedb_t *qpdb, qpznode_t *node, const dns_name_t *nodename, * alone. It will get cleaned up when * clean_zone_node() runs. */ - deletedata(newheader->node, newheader); - dns_vecheader_destroy(&newheader); + dns_vecheader_unref(newheader); newheader = merged; - dns_vecheader_reset(newheader, - (dns_dbnode_t *)node); /* * dns_rdatavec_subtract takes the header from * the first argument, so it preserves the case */ if (loading && RESIGN(newheader) && RESIGN(header) && - resign_sooner(header, newheader)) + resign_sooner_values(header->resign, + newheader->resign, + newheader->typepair)) { newheader->resign = header->resign; - newheader->resign_lsb = - header->resign_lsb; } } else { if (result == DNS_R_TOOMANYRECORDS) { @@ -1876,8 +2008,7 @@ add(qpzonedb_t *qpdb, qpznode_t *node, const dns_name_t *nodename, header->typepair), "updating", qpdb->maxrrperset); } - deletedata(newheader->node, newheader); - dns_vecheader_destroy(&newheader); + dns_vecheader_unref(newheader); return result; } } @@ -1887,7 +2018,9 @@ add(qpzonedb_t *qpdb, qpznode_t *node, const dns_name_t *nodename, if (loading) { if (RESIGN(newheader)) { - resigninsert(newheader); + LOCK(&qpdb->heap->lock); + resign_register(qpdb->heap, node, newheader); + UNLOCK(&qpdb->heap->lock); /* resigndelete not needed here */ } @@ -1903,13 +2036,18 @@ add(qpzonedb_t *qpdb, qpznode_t *node, const dns_name_t *nodename, maybe_update_recordsandsize(false, version, header, nodename->length); - deletedata(header->node, header); - dns_vecheader_destroy(&header); + LOCK(&qpdb->heap->lock); + resign_unregister(qpdb->heap, node, header); + UNLOCK(&qpdb->heap->lock); + dns_vecheader_unref(header); } else { if (RESIGN(newheader)) { - resigninsert(newheader); - resigndelete(qpdb, version, - header DNS__DB_FLARG_PASS); + LOCK(&qpdb->heap->lock); + resign_register(qpdb->heap, node, newheader); + resign_unregister(qpdb->heap, node, header); + UNLOCK(&qpdb->heap->lock); + resign_rollback(qpdb, node, version, + header DNS__DB_FLARG_PASS); } ISC_SLIST_PREPEND(foundtop->headers, newheader, @@ -1930,14 +2068,17 @@ add(qpzonedb_t *qpdb, qpznode_t *node, const dns_name_t *nodename, * If we're trying to delete the type, don't bother. */ if (!EXISTS(newheader)) { - deletedata(newheader->node, newheader); - dns_vecheader_destroy(&newheader); + dns_vecheader_unref(newheader); return DNS_R_UNCHANGED; } if (RESIGN(newheader)) { - resigninsert(newheader); - resigndelete(qpdb, version, header DNS__DB_FLARG_PASS); + LOCK(&qpdb->heap->lock); + resign_register(qpdb->heap, node, newheader); + resign_unregister(qpdb->heap, node, header); + UNLOCK(&qpdb->heap->lock); + resign_rollback(qpdb, node, version, + header DNS__DB_FLARG_PASS); } if (foundtop != NULL) { @@ -1966,8 +2107,10 @@ add(qpzonedb_t *qpdb, qpznode_t *node, const dns_name_t *nodename, if (qpdb->maxtypepername > 0 && ntypes >= qpdb->maxtypepername) { - deletedata(newheader->node, newheader); - dns_vecheader_destroy(&newheader); + LOCK(&qpdb->heap->lock); + resign_unregister(qpdb->heap, node, newheader); + UNLOCK(&qpdb->heap->lock); + dns_vecheader_unref(newheader); return DNS_R_TOOMANYRECORDS; } @@ -2114,7 +2257,6 @@ loading_addrdataset(void *arg, const dns_name_t *name, dns_rdataset_t *rdataset, } dns_vecheader_t *newheader = (dns_vecheader_t *)region.base; - dns_vecheader_reset(newheader, (dns_dbnode_t *)node); newheader->ttl = rdataset->ttl; newheader->serial = 1; atomic_store(&newheader->trust, rdataset->trust); @@ -2123,10 +2265,7 @@ loading_addrdataset(void *arg, const dns_name_t *name, dns_rdataset_t *rdataset, if (rdataset->attributes.resign) { DNS_VECHEADER_SETATTR(newheader, DNS_VECHEADERATTR_RESIGN); - newheader->resign = - (isc_stdtime_t)(dns_time64_from32(rdataset->resign) >> - 1); - newheader->resign_lsb = rdataset->resign & 0x1; + newheader->resign = dns_time64_from32(rdataset->resign); } nlock = qpzone_get_lock(node); @@ -2308,10 +2447,11 @@ getsize(dns_db_t *db, dns_dbversion_t *dbversion, uint64_t *records, } static isc_result_t -setsigningtime(dns_db_t *db, dns_dbnode_t *node, dns_rdataset_t *rdataset, +setsigningtime(dns_db_t *db, dns_dbnode_t *dbnode, dns_rdataset_t *rdataset, isc_stdtime_t resign) { qpzonedb_t *qpdb = (qpzonedb_t *)db; - dns_vecheader_t *header = NULL, oldheader; + qpznode_t *node = (qpznode_t *)dbnode; + dns_vecheader_t *header = NULL; isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlock_t *nlock = NULL; @@ -2321,40 +2461,51 @@ setsigningtime(dns_db_t *db, dns_dbnode_t *node, dns_rdataset_t *rdataset, header = dns_vecheader_getheader(rdataset); - nlock = qpzone_get_lock((qpznode_t *)node); + nlock = qpzone_get_lock(node); NODE_WRLOCK(nlock, &nlocktype); - oldheader = *header; - /* - * Only break the heap invariant (by adjusting resign and resign_lsb) - * if we are going to be restoring it by calling isc_heap_increased - * or isc_heap_decreased. + * Check if element is in the heap using hashmap lookup. */ - if (resign != 0) { - header->resign = (isc_stdtime_t)(dns_time64_from32(resign) >> - 1); - header->resign_lsb = resign & 0x1; - } - if (header->heap_index != 0) { + qpz_resign_t *found_elem = NULL; + isc_result_t find_result; + + LOCK(&qpdb->heap->lock); + find_result = resign_lookup(qpdb->heap, header, node, &found_elem); + + if (find_result == ISC_R_SUCCESS) { + /* Element is in heap */ INSIST(RESIGN(header)); - LOCK(get_heap_lock(header)); if (resign == 0) { - isc_heap_delete(((qpznode_t *)node)->heap->heap, - header->heap_index); - header->heap_index = 0; - } else if (resign_sooner(header, &oldheader)) { - isc_heap_increased(((qpznode_t *)node)->heap->heap, - header->heap_index); - } else if (resign_sooner(&oldheader, header)) { - isc_heap_decreased(((qpznode_t *)node)->heap->heap, - header->heap_index); + resign_unregister(qpdb->heap, node, header); + } else { + int64_t old_resign = header->resign; + int64_t new_resign = dns_time64_from32(resign); + + header->resign = new_resign; + + if (resign_sooner_values(new_resign, old_resign, + header->typepair)) + { + isc_heap_increased(qpdb->heap->heap, + found_elem->heap_index); + } else if (resign_sooner_values(old_resign, new_resign, + header->typepair)) + { + isc_heap_decreased(qpdb->heap->heap, + found_elem->heap_index); + } + /* No heap adjustment needed if neither direction + * indicates sooner */ } - UNLOCK(get_heap_lock(header)); } else if (resign != 0) { + /* Element not in heap, add it */ + header->resign = dns_time64_from32(resign); DNS_VECHEADER_SETATTR(header, DNS_VECHEADERATTR_RESIGN); - resigninsert(header); + + resign_register(qpdb->heap, node, header); } + UNLOCK(&qpdb->heap->lock); NODE_UNLOCK(nlock, &nlocktype); return ISC_R_SUCCESS; } @@ -2363,6 +2514,7 @@ static isc_result_t getsigningtime(dns_db_t *db, isc_stdtime_t *resign, dns_name_t *foundname, dns_typepair_t *typepair) { qpzonedb_t *qpdb = (qpzonedb_t *)db; + qpz_resign_t *elem = NULL; dns_vecheader_t *header = NULL; isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlock_t *nlock = NULL; @@ -2374,33 +2526,33 @@ getsigningtime(dns_db_t *db, isc_stdtime_t *resign, dns_name_t *foundname, REQUIRE(typepair != NULL); LOCK(&qpdb->heap->lock); - header = isc_heap_element(qpdb->heap->heap, 1); - if (header == NULL) { + elem = isc_heap_element(qpdb->heap->heap, 1); + if (elem == NULL) { UNLOCK(&qpdb->heap->lock); return ISC_R_NOTFOUND; } - nlock = qpzone_get_lock(HEADERNODE(header)); + header = elem->header; + nlock = qpzone_get_lock(elem->node); UNLOCK(&qpdb->heap->lock); again: NODE_RDLOCK(nlock, &nlocktype); LOCK(&qpdb->heap->lock); - header = isc_heap_element(qpdb->heap->heap, 1); + elem = isc_heap_element(qpdb->heap->heap, 1); - if (header != NULL && qpzone_get_lock(HEADERNODE(header)) != nlock) { + if (elem != NULL && qpzone_get_lock(elem->node) != nlock) { UNLOCK(&qpdb->heap->lock); NODE_UNLOCK(nlock, &nlocktype); - nlock = qpzone_get_lock(HEADERNODE(header)); + nlock = qpzone_get_lock(elem->node); goto again; } - if (header != NULL) { - *resign = RESIGN(header) - ? (header->resign << 1) | header->resign_lsb - : 0; - dns_name_copy(&HEADERNODE(header)->name, foundname); + if (elem != NULL) { + header = elem->header; + *resign = RESIGN(header) ? (uint32_t)header->resign : 0; + dns_name_copy(&elem->node->name, foundname); *typepair = header->typepair; result = ISC_R_SUCCESS; } @@ -3812,7 +3964,7 @@ tree_exit: nlock = qpzone_get_lock(node); NODE_RDLOCK(nlock, &nlocktype); - qpznode_release(node, 0, &nlocktype DNS__DB_FLARG_PASS); + (void)qpznode_release(node DNS__DB_FLARG_PASS); NODE_UNLOCK(nlock, &nlocktype); } @@ -3891,7 +4043,7 @@ qpzone_detachnode(dns_dbnode_t **nodep DNS__DB_FLARG) { rcu_read_lock(); NODE_RDLOCK(nlock, &nlocktype); - qpznode_release(node, 0, &nlocktype DNS__DB_FLARG_PASS); + (void)qpznode_release(node DNS__DB_FLARG_PASS); NODE_UNLOCK(nlock, &nlocktype); rcu_read_unlock(); } @@ -3923,19 +4075,6 @@ getoriginnode(dns_db_t *db, dns_dbnode_t **nodep DNS__DB_FLARG) { return ISC_R_SUCCESS; } -static void -deletedata(dns_dbnode_t *node ISC_ATTR_UNUSED, void *data) { - dns_vecheader_t *header = data; - - if (header->heap_index != 0) { - LOCK(get_heap_lock(header)); - isc_heap_delete(HEADERNODE(header)->heap->heap, - header->heap_index); - UNLOCK(get_heap_lock(header)); - } - header->heap_index = 0; -} - /* * Rdataset Iterator Methods */ @@ -4076,7 +4215,7 @@ dereference_iter_node(qpdb_dbiterator_t *iter DNS__DB_FLARG) { nlock = qpzone_get_lock(node); NODE_RDLOCK(nlock, &nlocktype); - qpznode_release(node, 0, &nlocktype DNS__DB_FLARG_PASS); + (void)qpznode_release(node DNS__DB_FLARG_PASS); NODE_UNLOCK(nlock, &nlocktype); } @@ -4488,7 +4627,7 @@ dbiterator_prev(dns_dbiterator_t *iterator DNS__DB_FLARG) { */ nlock = qpzone_get_lock(node); NODE_RDLOCK(nlock, &nlocktype); - qpznode_release(node, 0, &nlocktype DNS__DB_FLARG_PASS); + (void)qpznode_release(node DNS__DB_FLARG_PASS); NODE_UNLOCK(nlock, &nlocktype); if (result == ISC_R_SUCCESS) { @@ -4582,7 +4721,7 @@ dbiterator_next(dns_dbiterator_t *iterator DNS__DB_FLARG) { */ nlock = qpzone_get_lock(node); NODE_RDLOCK(nlock, &nlocktype); - qpznode_release(node, 0, &nlocktype DNS__DB_FLARG_PASS); + (void)qpznode_release(node DNS__DB_FLARG_PASS); NODE_UNLOCK(nlock, &nlocktype); if (result == ISC_R_SUCCESS) { @@ -4741,7 +4880,6 @@ qpzone_addrdataset_inner(qpzonedb_t *qpdb, qpznode_t *node, dns_rdataset_getownercase(rdataset, name); dns_vecheader_t *newheader = (dns_vecheader_t *)region.base; - dns_vecheader_reset(newheader, (dns_dbnode_t *)node); dns_vecheader_setownercase(newheader, name); @@ -4753,10 +4891,7 @@ qpzone_addrdataset_inner(qpzonedb_t *qpdb, qpznode_t *node, newheader->serial = version->serial; if (rdataset->attributes.resign) { DNS_VECHEADER_SETATTR(newheader, DNS_VECHEADERATTR_RESIGN); - newheader->resign = - (isc_stdtime_t)(dns_time64_from32(rdataset->resign) >> - 1); - newheader->resign_lsb = rdataset->resign & 0x1; + newheader->resign = dns_time64_from32(rdataset->resign); } /* @@ -4877,22 +5012,18 @@ qpzone_subtractrdataset(dns_db_t *db, dns_dbnode_t *dbnode, } newheader = (dns_vecheader_t *)region.base; - dns_vecheader_reset(newheader, (dns_dbnode_t *)node); newheader->ttl = rdataset->ttl; atomic_init(&newheader->attributes, 0); newheader->serial = version->serial; if (rdataset->attributes.resign) { DNS_VECHEADER_SETATTR(newheader, DNS_VECHEADERATTR_RESIGN); - newheader->resign = - (isc_stdtime_t)(dns_time64_from32(rdataset->resign) >> - 1); - newheader->resign_lsb = rdataset->resign & 0x1; + newheader->resign = dns_time64_from32(rdataset->resign); } nlock = qpzone_get_lock(node); NODE_WRLOCK(nlock, &nlocktype); - changed = add_changed(qpdb, newheader, version DNS__DB_FLARG_PASS); + changed = add_changed(qpdb, node, version DNS__DB_FLARG_PASS); ISC_SLIST_FOREACH(top, node->next_type, next_type) { if (top->typepair == newheader->typepair) { foundtop = top; @@ -4931,10 +5062,11 @@ qpzone_subtractrdataset(dns_db_t *db, dns_dbnode_t *dbnode, &subresult); } if (result == ISC_R_SUCCESS) { - deletedata(newheader->node, newheader); - dns_vecheader_destroy(&newheader); + LOCK(&qpdb->heap->lock); + resign_unregister(qpdb->heap, node, newheader); + UNLOCK(&qpdb->heap->lock); + dns_vecheader_unref(newheader); newheader = subresult; - dns_vecheader_reset(newheader, (dns_dbnode_t *)node); /* * dns_rdatavec_subtract takes the header from the first * argument, so it preserves the case @@ -4943,8 +5075,9 @@ qpzone_subtractrdataset(dns_db_t *db, dns_dbnode_t *dbnode, DNS_VECHEADER_SETATTR(newheader, DNS_VECHEADERATTR_RESIGN); newheader->resign = header->resign; - newheader->resign_lsb = header->resign_lsb; - resigninsert(newheader); + LOCK(&qpdb->heap->lock); + resign_register(qpdb->heap, node, newheader); + UNLOCK(&qpdb->heap->lock); } /* * We have to set the serial since the rdatavec @@ -4964,18 +5097,21 @@ qpzone_subtractrdataset(dns_db_t *db, dns_dbnode_t *dbnode, * This subtraction would remove all of the rdata; * add a nonexistent header instead. */ - deletedata(newheader->node, newheader); - dns_vecheader_destroy(&newheader); - newheader = dns_vecheader_new(db->mctx, - (dns_dbnode_t *)node); + LOCK(&qpdb->heap->lock); + resign_unregister(qpdb->heap, node, newheader); + UNLOCK(&qpdb->heap->lock); + dns_vecheader_unref(newheader); + newheader = dns_vecheader_new(db->mctx); newheader->ttl = 0; newheader->typepair = foundtop->typepair; atomic_init(&newheader->attributes, DNS_VECHEADERATTR_NONEXISTENT); newheader->serial = version->serial; } else { - deletedata(newheader->node, newheader); - dns_vecheader_destroy(&newheader); + LOCK(&qpdb->heap->lock); + resign_unregister(qpdb->heap, node, newheader); + UNLOCK(&qpdb->heap->lock); + dns_vecheader_unref(newheader); goto unlock; } @@ -4989,14 +5125,19 @@ qpzone_subtractrdataset(dns_db_t *db, dns_dbnode_t *dbnode, node->dirty = true; changed->dirty = true; - resigndelete(qpdb, version, header DNS__DB_FLARG_PASS); + LOCK(&qpdb->heap->lock); + resign_unregister(qpdb->heap, node, header); + UNLOCK(&qpdb->heap->lock); + resign_rollback(qpdb, node, version, header DNS__DB_FLARG_PASS); } else { /* * The rdataset doesn't exist, so we don't need to do anything * to satisfy the deletion request. */ - deletedata(newheader->node, newheader); - dns_vecheader_destroy(&newheader); + LOCK(&qpdb->heap->lock); + resign_unregister(qpdb->heap, node, newheader); + UNLOCK(&qpdb->heap->lock); + dns_vecheader_unref(newheader); if ((options & DNS_DBSUB_EXACT) != 0) { result = DNS_R_NOTEXACT; } else { @@ -5045,7 +5186,7 @@ qpzone_deleterdataset(dns_db_t *db, dns_dbnode_t *dbnode, return ISC_R_NOTIMPLEMENTED; } - newheader = dns_vecheader_new(db->mctx, (dns_dbnode_t *)node); + newheader = dns_vecheader_new(db->mctx); newheader->typepair = DNS_TYPEPAIR_VALUE(type, covers); newheader->ttl = 0; atomic_init(&newheader->attributes, DNS_VECHEADERATTR_NONEXISTENT); @@ -5584,21 +5725,18 @@ static dns_dbmethods_t qpdb_zonemethods = { static dns_dbnode_methods_t qpznode_methods = (dns_dbnode_methods_t){ .attachnode = qpzone_attachnode, .detachnode = qpzone_detachnode, - .deletedata = deletedata, }; static void destroy_qpznode(qpznode_t *node) { ISC_SLIST_FOREACH(top, node->next_type, next_type) { ISC_SLIST_FOREACH(header, top->headers, next_header) { - deletedata(header->node, header); - dns_vecheader_destroy(&header); + dns_vecheader_unref(header); } dns_vectop_destroy(node->mctx, &top); } - qpz_heap_unref(node->heap); dns_name_free(&node->name, node->mctx); isc_mem_putanddetach(&node->mctx, node, sizeof(qpznode_t)); } diff --git a/lib/dns/rdatavec.c b/lib/dns/rdatavec.c index 7ae9331c03..85a78fd2f4 100644 --- a/lib/dns/rdatavec.c +++ b/lib/dns/rdatavec.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -133,6 +134,8 @@ newvec(dns_rdataset_t *rdataset, isc_mem_t *mctx, isc_region_t *region, .next_header = ISC_SLINK_INITIALIZER, .trust = rdataset->trust, .ttl = rdataset->ttl, + .references = ISC_REFCOUNT_INITIALIZER(1), + .mctx = isc_mem_ref(mctx), }; region->base = (unsigned char *)header; @@ -356,11 +359,16 @@ dns_rdatavec_fromrdataset(dns_rdataset_t *rdataset, isc_mem_t *mctx, rdataset->covers); } + /* + * Reset the vecheader content, but keep the refcount and mctx. + */ *new = (dns_vecheader_t){ .next_header = ISC_SLINK_INITIALIZER, .typepair = typepair, .trust = rdataset->trust, .ttl = rdataset->ttl, + .references = atomic_load_acquire(&new->references), + .mctx = new->mctx, }; } @@ -583,10 +591,17 @@ dns_rdatavec_merge(dns_vecheader_t *oheader, dns_vecheader_t *nheader, uint16_t case_attrs = DNS_VECHEADER_GETATTR( oheader, DNS_VECHEADERATTR_CASESET | DNS_VECHEADERATTR_CASEFULLYLOWER); - DNS_VECHEADER_CLRATTR(as_header, - DNS_VECHEADERATTR_CASESET | - DNS_VECHEADERATTR_CASEFULLYLOWER); + atomic_init(&as_header->attributes, 0); DNS_VECHEADER_SETATTR(as_header, case_attrs); + if (RESIGN(nheader)) { + DNS_VECHEADER_SETATTR(as_header, DNS_VECHEADERATTR_RESIGN); + } + + /* Initialize refcount for the new header */ + isc_refcount_init(&as_header->references, 1); + + /* We need to re-attach to the memory context for refcount reasons. */ + as_header->mctx = isc_mem_ref(mctx); tcurrent = tstart + header_size(nheader); @@ -740,6 +755,18 @@ dns_rdatavec_subtract(dns_vecheader_t *oheader, dns_vecheader_t *sheader, */ tstart = isc_mem_get(mctx, tlength); memmove(tstart, oheader, header_size(oheader)); + + /* Initialize refcount for the new header */ + dns_vecheader_t *as_header = (dns_vecheader_t *)tstart; + isc_refcount_init(&as_header->references, 1); + atomic_init(&as_header->attributes, 0); + if (RESIGN(oheader)) { + DNS_VECHEADER_SETATTR(as_header, DNS_VECHEADERATTR_RESIGN); + } + + /* We need to re-attach to the memory context for refcount reasons. */ + as_header->mctx = isc_mem_ref(mctx); + tcurrent = tstart + header_size(oheader); /* @@ -790,47 +817,18 @@ dns_vecheader_setownercase(dns_vecheader_t *header, const dns_name_t *name) { DNS_VECHEADER_SETATTR(header, DNS_VECHEADERATTR_CASESET); } -void -dns_vecheader_reset(dns_vecheader_t *h, dns_dbnode_t *node) { - h->heap_index = 0; - h->node = node; - - atomic_init(&h->attributes, 0); - - STATIC_ASSERT(sizeof(h->attributes) == 2, - "The .attributes field of dns_vecheader_t needs to be " - "16-bit int type exactly."); -} - dns_vecheader_t * -dns_vecheader_new(isc_mem_t *mctx, dns_dbnode_t *node) { +dns_vecheader_new(isc_mem_t *mctx) { dns_vecheader_t *h = NULL; h = isc_mem_get(mctx, sizeof(*h)); *h = (dns_vecheader_t){ - .node = node, + .references = ISC_REFCOUNT_INITIALIZER(1), + .mctx = isc_mem_ref(mctx), }; return h; } -void -dns_vecheader_destroy(dns_vecheader_t **headerp) { - unsigned int size; - dns_vecheader_t *header = *headerp; - - *headerp = NULL; - - isc_mem_t *mctx = header->node->mctx; - - if (EXISTS(header)) { - size = dns_rdatavec_size(header); - } else { - size = sizeof(*header); - } - - isc_mem_put(mctx, header, size); -} - /* Iterators for already bound rdatavec */ isc_result_t @@ -912,9 +910,7 @@ vecheader_current(rdatavec_iter_t *iter, dns_rdata_t *rdata) { static void rdataset_disassociate(dns_rdataset_t *rdataset DNS__DB_FLARG) { - dns_dbnode_t *node = rdataset->vec.node; - - dns__db_detachnode(&node DNS__DB_FLARG_PASS); + dns_vecheader_unref(rdataset->vec.header); } static isc_result_t @@ -936,16 +932,14 @@ rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) { static void rdataset_clone(const dns_rdataset_t *source, dns_rdataset_t *target DNS__DB_FLARG) { - dns_dbnode_t *node = source->vec.node; - dns_dbnode_t *cloned_node = NULL; - - dns__db_attachnode(node, &cloned_node DNS__DB_FLARG_PASS); INSIST(!ISC_LINK_LINKED(target, link)); *target = *source; ISC_LINK_INIT(target, link); target->vec.iter.iter_pos = NULL; target->vec.iter.iter_count = 0; + + dns_vecheader_ref(target->vec.header); } static unsigned int @@ -1013,3 +1007,16 @@ dns_vectop_destroy(isc_mem_t *mctx, dns_vectop_t **topp) { *topp = NULL; isc_mem_put(mctx, top, sizeof(*top)); } + +static void +vecheader_destroy(dns_vecheader_t *header) { + unsigned int size = EXISTS(header) ? dns_rdatavec_size(header) + : sizeof(*header); + + isc_mem_putanddetach(&header->mctx, header, size); +} + +/* + * Reference counting implementation for dns_vecheader_t + */ +ISC_REFCOUNT_IMPL(dns_vecheader, vecheader_destroy); diff --git a/tests/dns/qpzone_test.c b/tests/dns/qpzone_test.c index 46c3cd7448..817e72105f 100644 --- a/tests/dns/qpzone_test.c +++ b/tests/dns/qpzone_test.c @@ -187,9 +187,7 @@ ownercase_test_one(const char *str1, const char *str2) { .common.mctx = isc_g_mctx, }; qpznode_t node = { .methods = &qpznode_methods, .locknum = 0 }; - dns_vecheader_t header = { - .node = (dns_dbnode_t *)&node, - }; + dns_vecheader_t header = { 0 }; dns_rdataset_t rdataset = { .magic = DNS_RDATASET_MAGIC, .vec = { .db = (dns_db_t *)qpdb, @@ -360,9 +358,7 @@ ISC_RUN_TEST_IMPL(setownercase) { .common.mctx = isc_g_mctx, }; qpznode_t node = { .methods = &qpznode_methods, .locknum = 0 }; - dns_vecheader_t header = { - .node = (dns_dbnode_t *)&node, - }; + dns_vecheader_t header = { 0 }; dns_rdataset_t rdataset = { .magic = DNS_RDATASET_MAGIC, .vec = { .db = (dns_db_t *)qpdb, diff --git a/tests/dns/vecheader_test.c b/tests/dns/vecheader_test.c index fd486bd88f..f64e7a8d94 100644 --- a/tests/dns/vecheader_test.c +++ b/tests/dns/vecheader_test.c @@ -282,10 +282,274 @@ cleanup: } } +/* Test case where dns_rdatavec_subtract causes assertion failure */ +ISC_RUN_TEST_IMPL(rdatavec_subtract_assertion_failure) { + isc_mem_t *mctx = isc_g_mctx; + UNUSED(state); + dns_vecheader_t *original_header = NULL, *subtract_header = NULL, + *result_header = NULL; + dns_rdataset_t original_rdataset, subtract_rdataset; + dns_rdatalist_t *original_rdatalist = NULL, *subtract_rdatalist = NULL; + dns_rdata_t *original_rdata1 = NULL, *original_rdata2 = NULL, + *subtract_rdata = NULL; + unsigned char *original_data1 = NULL, *original_data2 = NULL, + *subtract_data = NULL; + isc_region_t original_region, subtract_region; + isc_result_t result; + + /* Allocate temporary structures for original rdatalist with 2 records + */ + original_data1 = isc_mem_get(mctx, 256); + original_data2 = isc_mem_get(mctx, 256); + original_rdatalist = isc_mem_get(mctx, sizeof(*original_rdatalist)); + original_rdata1 = isc_mem_get(mctx, sizeof(*original_rdata1)); + original_rdata2 = isc_mem_get(mctx, sizeof(*original_rdata2)); + + /* Allocate temporary structures for subtract rdatalist with 1 record */ + subtract_data = isc_mem_get(mctx, 256); + subtract_rdatalist = isc_mem_get(mctx, sizeof(*subtract_rdatalist)); + subtract_rdata = isc_mem_get(mctx, sizeof(*subtract_rdata)); + + /* Initialize original rdataset and rdatalist with 2 A records */ + dns_rdataset_init(&original_rdataset); + dns_rdatalist_init(original_rdatalist); + original_rdatalist->type = dns_rdatatype_a; + original_rdatalist->rdclass = dns_rdataclass_in; + original_rdatalist->ttl = 300; + + /* Create first rdata: 192.168.1.1 */ + dns_rdata_init(original_rdata1); + CHECK(dns_test_rdatafromstring(original_rdata1, dns_rdataclass_in, + dns_rdatatype_a, original_data1, 256, + "192.168.1.1", false)); + ISC_LIST_APPEND(original_rdatalist->rdata, original_rdata1, link); + + /* Create second rdata: 192.168.1.2 */ + dns_rdata_init(original_rdata2); + CHECK(dns_test_rdatafromstring(original_rdata2, dns_rdataclass_in, + dns_rdatatype_a, original_data2, 256, + "192.168.1.2", false)); + ISC_LIST_APPEND(original_rdatalist->rdata, original_rdata2, link); + + dns_rdatalist_tordataset(original_rdatalist, &original_rdataset); + + /* Initialize subtract rdataset and rdatalist with 1 A record */ + dns_rdataset_init(&subtract_rdataset); + dns_rdatalist_init(subtract_rdatalist); + subtract_rdatalist->type = dns_rdatatype_a; + subtract_rdatalist->rdclass = dns_rdataclass_in; + subtract_rdatalist->ttl = 300; + + /* Create subtract rdata: 192.168.1.1 (same as first record) */ + dns_rdata_init(subtract_rdata); + CHECK(dns_test_rdatafromstring(subtract_rdata, dns_rdataclass_in, + dns_rdatatype_a, subtract_data, 256, + "192.168.1.1", false)); + ISC_LIST_APPEND(subtract_rdatalist->rdata, subtract_rdata, link); + + dns_rdatalist_tordataset(subtract_rdatalist, &subtract_rdataset); + + /* Convert to vecheaders (each starts with refcount = 1) */ + CHECK(dns_rdatavec_fromrdataset(&original_rdataset, mctx, + &original_region, 0)); + original_header = (dns_vecheader_t *)original_region.base; + + CHECK(dns_rdatavec_fromrdataset(&subtract_rdataset, mctx, + &subtract_region, 0)); + subtract_header = (dns_vecheader_t *)subtract_region.base; + + /* + * This should cause assertion failure because dns_rdatavec_subtract() + * copies the original header (including its mctx) with memmove(), then + * tries to call isc_mem_attach() on the already-attached mctx field. + * Since we're subtracting 1 record from 2, it should create a new + * header and hit the problematic code path at rdatavec.c:759 + */ + result = dns_rdatavec_subtract(original_header, subtract_header, mctx, + dns_rdataclass_in, dns_rdatatype_a, 0, + &result_header); + + /* If we get here without assertion failure, the bug has been fixed */ + assert_int_equal(result, ISC_R_SUCCESS); + assert_non_null(result_header); + + /* Result should contain only the second record (192.168.1.2) */ + unsigned int result_count = dns_rdatavec_count(result_header); + assert_int_equal(result_count, 1); + +cleanup: + /* Cleanup rdatasets */ + if (DNS_RDATASET_VALID(&original_rdataset)) { + dns_rdataset_disassociate(&original_rdataset); + } + if (DNS_RDATASET_VALID(&subtract_rdataset)) { + dns_rdataset_disassociate(&subtract_rdataset); + } + + /* Cleanup vecheaders */ + if (original_header != NULL) { + dns_vecheader_unref(original_header); + } + if (subtract_header != NULL) { + dns_vecheader_unref(subtract_header); + } + if (result_header != NULL) { + dns_vecheader_unref(result_header); + } + + /* Cleanup temporary structures for original */ + if (original_rdata1 != NULL) { + isc_mem_put(mctx, original_rdata1, sizeof(*original_rdata1)); + } + if (original_rdata2 != NULL) { + isc_mem_put(mctx, original_rdata2, sizeof(*original_rdata2)); + } + if (original_rdatalist != NULL) { + isc_mem_put(mctx, original_rdatalist, + sizeof(*original_rdatalist)); + } + if (original_data1 != NULL) { + isc_mem_put(mctx, original_data1, 256); + } + if (original_data2 != NULL) { + isc_mem_put(mctx, original_data2, 256); + } + + /* Cleanup temporary structures for subtract */ + if (subtract_rdata != NULL) { + isc_mem_put(mctx, subtract_rdata, sizeof(*subtract_rdata)); + } + if (subtract_rdatalist != NULL) { + isc_mem_put(mctx, subtract_rdatalist, + sizeof(*subtract_rdatalist)); + } + if (subtract_data != NULL) { + isc_mem_put(mctx, subtract_data, 256); + } +} + +/* Test refcount functionality with merge and cleanup */ +ISC_RUN_TEST_IMPL(rdatavec_refcount_merge) { + isc_mem_t *mctx = isc_g_mctx; + UNUSED(state); + dns_vecheader_t *header1 = NULL, *header2 = NULL, *merged_header = NULL; + dns_rdataset_t rdataset1, rdataset2; + dns_rdatalist_t *rdatalist1 = NULL, *rdatalist2 = NULL; + dns_rdata_t *rdata1 = NULL, *rdata2 = NULL; + unsigned char *data1 = NULL, *data2 = NULL; + isc_region_t region1, region2; + isc_result_t result; + + /* Allocate temporary structures for first rdatalist */ + data1 = isc_mem_get(mctx, 256); + rdatalist1 = isc_mem_get(mctx, sizeof(*rdatalist1)); + rdata1 = isc_mem_get(mctx, sizeof(*rdata1)); + + /* Allocate temporary structures for second rdatalist */ + data2 = isc_mem_get(mctx, 256); + rdatalist2 = isc_mem_get(mctx, sizeof(*rdatalist2)); + rdata2 = isc_mem_get(mctx, sizeof(*rdata2)); + + /* Initialize first rdataset and rdatalist */ + dns_rdataset_init(&rdataset1); + dns_rdatalist_init(rdatalist1); + rdatalist1->type = dns_rdatatype_a; + rdatalist1->rdclass = dns_rdataclass_in; + rdatalist1->ttl = 300; + + /* Create first rdata */ + dns_rdata_init(rdata1); + CHECK(dns_test_rdatafromstring(rdata1, dns_rdataclass_in, + dns_rdatatype_a, data1, 256, + "192.168.1.1", false)); + + ISC_LIST_APPEND(rdatalist1->rdata, rdata1, link); + dns_rdatalist_tordataset(rdatalist1, &rdataset1); + + /* Initialize second rdataset and rdatalist */ + dns_rdataset_init(&rdataset2); + dns_rdatalist_init(rdatalist2); + rdatalist2->type = dns_rdatatype_a; + rdatalist2->rdclass = dns_rdataclass_in; + rdatalist2->ttl = 300; + + /* Create second rdata */ + dns_rdata_init(rdata2); + CHECK(dns_test_rdatafromstring(rdata2, dns_rdataclass_in, + dns_rdatatype_a, data2, 256, + "192.168.1.2", false)); + + ISC_LIST_APPEND(rdatalist2->rdata, rdata2, link); + dns_rdatalist_tordataset(rdatalist2, &rdataset2); + + /* Convert to vecheaders (each starts with refcount = 1) */ + CHECK(dns_rdatavec_fromrdataset(&rdataset1, mctx, ®ion1, 0)); + header1 = (dns_vecheader_t *)region1.base; + + CHECK(dns_rdatavec_fromrdataset(&rdataset2, mctx, ®ion2, 0)); + header2 = (dns_vecheader_t *)region2.base; + + /* Merge headers (this will create a new header with refcount = 1) */ + CHECK(dns_rdatavec_merge(header1, header2, mctx, dns_rdataclass_in, + dns_rdatatype_a, 0, 0, &merged_header)); + assert_non_null(merged_header); + + /* Test: merged header should have expected count */ + unsigned int merged_count = dns_rdatavec_count(merged_header); + assert_int_equal(merged_count, 2); + + /* Test: merged header should have expected size */ + unsigned int merged_size = dns_rdatavec_size(merged_header); + assert_true(merged_size > sizeof(dns_vecheader_t)); + +cleanup: + /* Cleanup rdatasets */ + if (DNS_RDATASET_VALID(&rdataset1)) { + dns_rdataset_disassociate(&rdataset1); + } + if (DNS_RDATASET_VALID(&rdataset2)) { + dns_rdataset_disassociate(&rdataset2); + } + + /* Cleanup using refcount - each header should be unreferenced once */ + if (header1 != NULL) { + dns_vecheader_unref(header1); + } + if (header2 != NULL) { + dns_vecheader_unref(header2); + } + if (merged_header != NULL) { + dns_vecheader_unref(merged_header); + } + + /* Cleanup temporary structures */ + if (rdata1 != NULL) { + isc_mem_put(mctx, rdata1, sizeof(*rdata1)); + } + if (rdatalist1 != NULL) { + isc_mem_put(mctx, rdatalist1, sizeof(*rdatalist1)); + } + if (data1 != NULL) { + isc_mem_put(mctx, data1, 256); + } + if (rdata2 != NULL) { + isc_mem_put(mctx, rdata2, sizeof(*rdata2)); + } + if (rdatalist2 != NULL) { + isc_mem_put(mctx, rdatalist2, sizeof(*rdatalist2)); + } + if (data2 != NULL) { + isc_mem_put(mctx, data2, 256); + } +} + ISC_TEST_LIST_START ISC_TEST_ENTRY_CUSTOM(merge_headers, setup_mctx, teardown_mctx) ISC_TEST_ENTRY_CUSTOM(merge_case_preservation, setup_mctx, teardown_mctx) ISC_TEST_ENTRY_CUSTOM(setcase_size_consistency, setup_mctx, teardown_mctx) +ISC_TEST_ENTRY_CUSTOM(rdatavec_subtract_assertion_failure, setup_mctx, + teardown_mctx) +ISC_TEST_ENTRY_CUSTOM(rdatavec_refcount_merge, setup_mctx, teardown_mctx) ISC_TEST_LIST_END ISC_TEST_MAIN From 6202492509d37dcd67e0fcbdf46e1cbecac61e11 Mon Sep 17 00:00:00 2001 From: Alessio Podda Date: Mon, 2 Mar 2026 13:52:02 +0100 Subject: [PATCH 4/5] Remove node and db pointer from dns_rdataset_t.vec Now that we track the references at the vecheader level, binding an rdataset is no longer guaranteed to keep its node alive. Therefore remove the node pointer from the rdataset, and instead decide whether glue is required by explicitely passing the owner name to addglue. --- lib/dns/db.c | 5 +-- lib/dns/db_p.h | 2 +- lib/dns/include/dns/db.h | 7 +++-- lib/dns/include/dns/rdataset.h | 2 -- lib/dns/qpzone.c | 57 ++++++++++++++-------------------- lib/ns/query.c | 4 +-- tests/dns/qpzone_test.c | 12 ++----- 7 files changed, 36 insertions(+), 53 deletions(-) diff --git a/lib/dns/db.c b/lib/dns/db.c index e86e1cba2b..0ba7f5aa1f 100644 --- a/lib/dns/db.c +++ b/lib/dns/db.c @@ -1044,7 +1044,8 @@ dns_db_setgluecachestats(dns_db_t *db, isc_stats_t *stats) { } isc_result_t -dns_db_addglue(dns_db_t *db, dns_dbversion_t *version, dns_rdataset_t *rdataset, +dns_db_addglue(dns_db_t *db, dns_dbversion_t *version, + const dns_name_t *owner_name, dns_rdataset_t *rdataset, dns_message_t *msg) { REQUIRE(DNS_DB_VALID(db)); REQUIRE((db->attributes & DNS_DBATTR_CACHE) == 0); @@ -1053,7 +1054,7 @@ dns_db_addglue(dns_db_t *db, dns_dbversion_t *version, dns_rdataset_t *rdataset, REQUIRE(rdataset->type == dns_rdatatype_ns); if (db->methods->addglue != NULL) { - (db->methods->addglue)(db, version, rdataset, msg); + (db->methods->addglue)(db, version, owner_name, rdataset, msg); return ISC_R_SUCCESS; } diff --git a/lib/dns/db_p.h b/lib/dns/db_p.h index eaee9207f9..630873e6b1 100644 --- a/lib/dns/db_p.h +++ b/lib/dns/db_p.h @@ -109,7 +109,7 @@ struct dns_gluelist { typedef struct dns_glue_additionaldata_ctx { dns_db_t *db; dns_dbversion_t *version; - dns_dbnode_t *node; + const dns_name_t *owner_name; dns_glue_t *glue; } dns_glue_additionaldata_ctx_t; diff --git a/lib/dns/include/dns/db.h b/lib/dns/include/dns/db.h index b73c3b12b4..bd8fca23d1 100644 --- a/lib/dns/include/dns/db.h +++ b/lib/dns/include/dns/db.h @@ -167,7 +167,8 @@ typedef struct dns_db_methods { isc_result_t (*getservestalerefresh)(dns_db_t *db, uint32_t *interval); isc_result_t (*setgluecachestats)(dns_db_t *db, isc_stats_t *stats); void (*addglue)(dns_db_t *db, dns_dbversion_t *version, - dns_rdataset_t *rdataset, dns_message_t *msg); + const dns_name_t *owner_name, dns_rdataset_t *rdataset, + dns_message_t *msg); void (*setmaxrrperset)(dns_db_t *db, uint32_t value); void (*setmaxtypepername)(dns_db_t *db, uint32_t value); isc_result_t (*getzoneversion)(dns_db_t *db, isc_buffer_t *b); @@ -1743,7 +1744,8 @@ dns_db_setgluecachestats(dns_db_t *db, isc_stats_t *stats); */ isc_result_t -dns_db_addglue(dns_db_t *db, dns_dbversion_t *version, dns_rdataset_t *rdataset, +dns_db_addglue(dns_db_t *db, dns_dbversion_t *version, + const dns_name_t *owner_name, dns_rdataset_t *rdataset, dns_message_t *msg); /*%< * Add glue records for rdataset to the additional section of message in @@ -1752,6 +1754,7 @@ dns_db_addglue(dns_db_t *db, dns_dbversion_t *version, dns_rdataset_t *rdataset, * Requires: * \li 'db' is a database with 'zone' semantics. * \li 'version' is the DB version. + * \li 'owner_name' name of the rdataset. * \li 'rdataset' is a valid NS rdataset. * \li 'msg' is the DNS message to which the glue should be added. * diff --git a/lib/dns/include/dns/rdataset.h b/lib/dns/include/dns/rdataset.h index 430dc0321c..187c289543 100644 --- a/lib/dns/include/dns/rdataset.h +++ b/lib/dns/include/dns/rdataset.h @@ -208,8 +208,6 @@ struct dns_rdataset { * methods; see comments in rdatavec.c for details.) */ struct { - struct dns_db *db; - dns_dbnode_t *node; dns_vecheader_t *header; rdatavec_iter_t iter; } vec; diff --git a/lib/dns/qpzone.c b/lib/dns/qpzone.c index 2731d6b196..80dbfbc72d 100644 --- a/lib/dns/qpzone.c +++ b/lib/dns/qpzone.c @@ -1157,7 +1157,7 @@ unref: } static void -bindrdataset(qpzonedb_t *qpdb, qpznode_t *node, dns_vecheader_t *header, +bindrdataset(qpzonedb_t *qpdb, dns_vecheader_t *header, dns_rdataset_t *rdataset DNS__DB_FLARG) { if (rdataset == NULL) { return; @@ -1176,8 +1176,6 @@ bindrdataset(qpzonedb_t *qpdb, qpznode_t *node, dns_vecheader_t *header, rdataset->attributes.optout = true; } - rdataset->vec.db = (dns_db_t *)qpdb; - rdataset->vec.node = (dns_dbnode_t *)node; rdataset->vec.header = header; dns_vecheader_ref(header); rdataset->vec.iter.iter_pos = NULL; @@ -1724,9 +1722,9 @@ qpzone_findrdataset(dns_db_t *db, dns_dbnode_t *dbnode, } } if (found != NULL) { - bindrdataset(qpdb, node, found, rdataset DNS__DB_FLARG_PASS); + bindrdataset(qpdb, found, rdataset DNS__DB_FLARG_PASS); if (foundsig != NULL) { - bindrdataset(qpdb, node, foundsig, + bindrdataset(qpdb, foundsig, sigrdataset DNS__DB_FLARG_PASS); } } @@ -2145,7 +2143,7 @@ add(qpzonedb_t *qpdb, qpznode_t *node, const dns_name_t *nodename, return DNS_R_CNAMEANDOTHER; } - bindrdataset(qpdb, node, newheader, addedrdataset DNS__DB_FLARG_PASS); + bindrdataset(qpdb, newheader, addedrdataset DNS__DB_FLARG_PASS); return ISC_R_SUCCESS; } @@ -2778,11 +2776,10 @@ qpzone_setup_delegation(qpz_search_t *search, dns_dbnode_t **nodep, isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlock_t *nlock = qpzone_get_lock(node); NODE_RDLOCK(nlock, &nlocktype); - bindrdataset(search->qpdb, node, search->zonecut_header, + bindrdataset(search->qpdb, search->zonecut_header, rdataset DNS__DB_FLARG_PASS); if (sigrdataset != NULL && search->zonecut_sigheader != NULL) { - bindrdataset(search->qpdb, node, - search->zonecut_sigheader, + bindrdataset(search->qpdb, search->zonecut_sigheader, sigrdataset DNS__DB_FLARG_PASS); } NODE_UNLOCK(nlock, &nlocktype); @@ -3252,11 +3249,11 @@ again: node DNS__DB_FLARG_PASS); *nodep = (dns_dbnode_t *)node; } - bindrdataset(search->qpdb, node, found, + bindrdataset(search->qpdb, found, rdataset DNS__DB_FLARG_PASS); if (foundsig != NULL) { bindrdataset( - search->qpdb, node, foundsig, + search->qpdb, foundsig, sigrdataset DNS__DB_FLARG_PASS); } } else if (found == NULL && foundsig == NULL) { @@ -3869,10 +3866,10 @@ found: *nodep = (dns_dbnode_t *)node; } if (search.version->secure && !search.version->havensec3) { - bindrdataset(search.qpdb, node, nsecheader, + bindrdataset(search.qpdb, nsecheader, rdataset DNS__DB_FLARG_PASS); if (nsecsig != NULL) { - bindrdataset(search.qpdb, node, nsecsig, + bindrdataset(search.qpdb, nsecsig, sigrdataset DNS__DB_FLARG_PASS); } } @@ -3936,10 +3933,9 @@ found: } if (type != dns_rdatatype_any) { - bindrdataset(search.qpdb, node, found, - rdataset DNS__DB_FLARG_PASS); + bindrdataset(search.qpdb, found, rdataset DNS__DB_FLARG_PASS); if (foundsig != NULL) { - bindrdataset(search.qpdb, node, foundsig, + bindrdataset(search.qpdb, foundsig, sigrdataset DNS__DB_FLARG_PASS); } } @@ -4182,7 +4178,7 @@ rdatasetiter_current(dns_rdatasetiter_t *iterator, NODE_RDLOCK(nlock, &nlocktype); - bindrdataset(qpdb, qpnode, header, rdataset DNS__DB_FLARG_PASS); + bindrdataset(qpdb, header, rdataset DNS__DB_FLARG_PASS); NODE_UNLOCK(nlock, &nlocktype); } @@ -5146,15 +5142,13 @@ qpzone_subtractrdataset(dns_db_t *db, dns_dbnode_t *dbnode, } if (result == ISC_R_SUCCESS && newrdataset != NULL) { - bindrdataset(qpdb, node, newheader, - newrdataset DNS__DB_FLARG_PASS); + bindrdataset(qpdb, newheader, newrdataset DNS__DB_FLARG_PASS); } if (result == DNS_R_NXRRSET && newrdataset != NULL && (options & DNS_DBSUB_WANTOLD) != 0) { - bindrdataset(qpdb, node, header, - newrdataset DNS__DB_FLARG_PASS); + bindrdataset(qpdb, header, newrdataset DNS__DB_FLARG_PASS); } unlock: @@ -5238,7 +5232,6 @@ glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype, dns_fixedname_t fixedname_a; dns_name_t *name_a = NULL; dns_rdataset_t rdataset_a, sigrdataset_a; - const qpznode_t *node = NULL; qpznode_t *node_a = NULL; dns_fixedname_t fixedname_aaaa; dns_name_t *name_aaaa = NULL; @@ -5253,8 +5246,6 @@ glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype, ctx = (dns_glue_additionaldata_ctx_t *)arg; - node = (qpznode_t *)ctx->node; - name_a = dns_fixedname_initname(&fixedname_a); dns_rdataset_init(&rdataset_a); dns_rdataset_init(&sigrdataset_a); @@ -5314,7 +5305,7 @@ glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype, * attributes for the first rdataset associated with the first name * added to the ADDITIONAL section. */ - if (glue != NULL && dns_name_issubdomain(name, &node->name)) { + if (glue != NULL && dns_name_issubdomain(name, ctx->owner_name)) { if (dns_rdataset_isassociated(&glue->rdataset_a)) { glue->rdataset_a.attributes.required = true; } @@ -5427,13 +5418,13 @@ addglue_to_message(dns_glue_t *ge, dns_message_t *msg) { } static dns_gluelist_t * -create_gluelist(qpzonedb_t *qpdb, qpz_version_t *version, qpznode_t *node, - dns_rdataset_t *rdataset) { +create_gluelist(qpzonedb_t *qpdb, qpz_version_t *version, + const dns_name_t *owner_name, dns_rdataset_t *rdataset) { dns_vecheader_t *header = dns_vecheader_getheader(rdataset); dns_glue_additionaldata_ctx_t ctx = { .db = (dns_db_t *)qpdb, .version = (dns_dbversion_t *)version, - .node = (dns_dbnode_t *)node, + .owner_name = owner_name, }; dns_gluelist_t *gluelist = new_gluelist(ctx.db, header, ctx.version); @@ -5453,17 +5444,15 @@ create_gluelist(qpzonedb_t *qpdb, qpz_version_t *version, qpznode_t *node, } static void -addglue(dns_db_t *db, dns_dbversion_t *dbversion, dns_rdataset_t *rdataset, - dns_message_t *msg) { +addglue(dns_db_t *db, dns_dbversion_t *dbversion, const dns_name_t *owner_name, + dns_rdataset_t *rdataset, dns_message_t *msg) { qpzonedb_t *qpdb = (qpzonedb_t *)db; qpz_version_t *version = (qpz_version_t *)dbversion; - qpznode_t *node = (qpznode_t *)rdataset->vec.node; dns_vecheader_t *header = dns_vecheader_getheader(rdataset); dns_glue_t *glue = NULL; isc_statscounter_t counter = dns_gluecachestatscounter_hits_absent; REQUIRE(rdataset->type == dns_rdatatype_ns); - REQUIRE(qpdb == (qpzonedb_t *)rdataset->vec.db); REQUIRE(qpdb == version->qpdb); REQUIRE(!IS_STUB(qpdb)); @@ -5475,8 +5464,8 @@ addglue(dns_db_t *db, dns_dbversion_t *dbversion, dns_rdataset_t *rdataset, dns_gluelist_t *xchg_gluelist = gluelist; dns_gluelist_t *old_gluelist = (void *)-1; - dns_gluelist_t *new_gluelist = create_gluelist(qpdb, version, - node, rdataset); + dns_gluelist_t *new_gluelist = + create_gluelist(qpdb, version, owner_name, rdataset); while (old_gluelist != xchg_gluelist && (xchg_gluelist == NULL || diff --git a/lib/ns/query.c b/lib/ns/query.c index 4deeda6074..4853da8edb 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -2233,8 +2233,8 @@ query_additional(query_ctx_t *qctx, dns_name_t *name, goto regular; } - result = dns_db_addglue(qctx->db, dbversion->version, rdataset, - client->message); + result = dns_db_addglue(qctx->db, dbversion->version, name, + rdataset, client->message); if (result == ISC_R_SUCCESS) { return; } diff --git a/tests/dns/qpzone_test.c b/tests/dns/qpzone_test.c index 817e72105f..ebb662bc42 100644 --- a/tests/dns/qpzone_test.c +++ b/tests/dns/qpzone_test.c @@ -186,14 +186,10 @@ ownercase_test_one(const char *str1, const char *str2) { .common.methods = &qpdb_zonemethods, .common.mctx = isc_g_mctx, }; - qpznode_t node = { .methods = &qpznode_methods, .locknum = 0 }; dns_vecheader_t header = { 0 }; dns_rdataset_t rdataset = { .magic = DNS_RDATASET_MAGIC, - .vec = { .db = (dns_db_t *)qpdb, - .node = (dns_dbnode_t *)&node, - .header = &header, - }, + .vec = { .header = &header }, .methods = &dns_rdatavec_rdatasetmethods, }; isc_buffer_t b; @@ -357,14 +353,10 @@ ISC_RUN_TEST_IMPL(setownercase) { .common.methods = &qpdb_zonemethods, .common.mctx = isc_g_mctx, }; - qpznode_t node = { .methods = &qpznode_methods, .locknum = 0 }; dns_vecheader_t header = { 0 }; dns_rdataset_t rdataset = { .magic = DNS_RDATASET_MAGIC, - .vec = { .db = (dns_db_t *)qpdb, - .node = (dns_dbnode_t *)&node, - .header = &header, - }, + .vec = { .header = &header }, .methods = &dns_rdatavec_rdatasetmethods, }; const char *str1 = From 9fe3809ccfa8179a617a6e48d613e1af0c2c9650 Mon Sep 17 00:00:00 2001 From: Alessio Podda Date: Thu, 5 Mar 2026 15:24:01 +0100 Subject: [PATCH 5/5] Fix benign race condition The dns_rdatavec_subtractrdataset function would copy the old header using memmove but the old header includes fields such as trust and reference counts that are atomic. While the values of those fields were never used, it did cause a benign race condition. This commit refactors dns_rdatavec_subtractrdataset and dns_rdatavec_merge not to use memmove. --- lib/dns/rdatavec.c | 57 ++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/lib/dns/rdatavec.c b/lib/dns/rdatavec.c index 85a78fd2f4..80a3446802 100644 --- a/lib/dns/rdatavec.c +++ b/lib/dns/rdatavec.c @@ -578,30 +578,31 @@ dns_rdatavec_merge(dns_vecheader_t *oheader, dns_vecheader_t *nheader, CLEANUP(ISC_R_NOSPACE); } - /* Allocate the target buffer and copy the new vec's header */ + /* + * Allocate the target buffer and initialize the header. + * Preserve the case of the old header, but the rest from the + * new header. + */ unsigned char *tstart = isc_mem_get(mctx, tlength); dns_vecheader_t *as_header = (dns_vecheader_t *)tstart; - - /* - * Preserve the case of the old header, but the rest from the new - * header - */ - memmove(tstart, nheader, header_size(nheader)); - memmove(as_header->upper, oheader->upper, sizeof(oheader->upper)); - uint16_t case_attrs = DNS_VECHEADER_GETATTR( + uint16_t attrs = DNS_VECHEADER_GETATTR( oheader, DNS_VECHEADERATTR_CASESET | DNS_VECHEADERATTR_CASEFULLYLOWER); - atomic_init(&as_header->attributes, 0); - DNS_VECHEADER_SETATTR(as_header, case_attrs); if (RESIGN(nheader)) { - DNS_VECHEADER_SETATTR(as_header, DNS_VECHEADERATTR_RESIGN); + attrs |= DNS_VECHEADERATTR_RESIGN; } - - /* Initialize refcount for the new header */ + *as_header = (dns_vecheader_t){ + .typepair = nheader->typepair, + .mctx = isc_mem_ref(mctx), + .serial = nheader->serial, + .ttl = nheader->ttl, + .resign = nheader->resign, + .next_header = ISC_SLINK_INITIALIZER, + }; isc_refcount_init(&as_header->references, 1); - - /* We need to re-attach to the memory context for refcount reasons. */ - as_header->mctx = isc_mem_ref(mctx); + atomic_init(&as_header->attributes, attrs); + atomic_init(&as_header->trust, atomic_load_acquire(&nheader->trust)); + memmove(as_header->upper, oheader->upper, sizeof(oheader->upper)); tcurrent = tstart + header_size(nheader); @@ -754,18 +755,20 @@ dns_rdatavec_subtract(dns_vecheader_t *oheader, dns_vecheader_t *sheader, * Allocate the target buffer and copy the old vec's header. */ tstart = isc_mem_get(mctx, tlength); - memmove(tstart, oheader, header_size(oheader)); - - /* Initialize refcount for the new header */ dns_vecheader_t *as_header = (dns_vecheader_t *)tstart; + uint16_t attrs = RESIGN(oheader) ? DNS_VECHEADERATTR_RESIGN : 0; + *as_header = (dns_vecheader_t){ + .typepair = oheader->typepair, + .mctx = isc_mem_ref(mctx), + .serial = oheader->serial, + .ttl = oheader->ttl, + .resign = oheader->resign, + .next_header = ISC_SLINK_INITIALIZER, + }; isc_refcount_init(&as_header->references, 1); - atomic_init(&as_header->attributes, 0); - if (RESIGN(oheader)) { - DNS_VECHEADER_SETATTR(as_header, DNS_VECHEADERATTR_RESIGN); - } - - /* We need to re-attach to the memory context for refcount reasons. */ - as_header->mctx = isc_mem_ref(mctx); + atomic_init(&as_header->attributes, attrs); + atomic_init(&as_header->trust, atomic_load_acquire(&oheader->trust)); + memmove(as_header->upper, oheader->upper, sizeof(oheader->upper)); tcurrent = tstart + header_size(oheader);