fix: usr: Resolve the spurious drops in performance due GLUE cache

For performance reasons, the returned GLUE records are cached on the first use.  The current implementation could randomly cause a performance drop and increased memory use.  This has been fixed.

Closes #5064

Merge branch '5064-rewrite-glue-cache-in-qpzone' into 'main'

See merge request isc-projects/bind9!9831
This commit is contained in:
Ondřej Surý 2024-12-13 23:11:04 +00:00
commit e2c1941efd
4 changed files with 157 additions and 179 deletions

View file

@ -20,9 +20,6 @@
#include <dns/nsec3.h>
#include <dns/types.h>
#define GLUETABLE_INIT_SIZE 1 << 2
#define GLUETABLE_MIN_SIZE 1 << 8
#define RDATATYPE_NCACHEANY DNS_TYPEPAIR_VALUE(0, dns_rdatatype_any)
#ifdef STRONG_RWLOCK_CHECK
@ -122,18 +119,31 @@ ISC_LANG_BEGINDECLS
struct dns_glue {
struct dns_glue *next;
dns_fixedname_t fixedname;
dns_name_t name;
dns_rdataset_t rdataset_a;
dns_rdataset_t sigrdataset_a;
dns_rdataset_t rdataset_aaaa;
dns_rdataset_t sigrdataset_aaaa;
};
typedef struct {
dns_glue_t *glue_list;
struct dns_gluelist {
isc_mem_t *mctx;
const dns_dbversion_t *version;
dns_slabheader_t *header;
struct dns_glue *glue;
struct rcu_head rcu_head;
struct cds_wfs_node wfs_node;
};
typedef struct dns_glue_additionaldata_ctx {
dns_db_t *db;
dns_dbversion_t *version;
dns_name_t *nodename;
dns_dbnode_t *node;
dns_glue_t *glue;
} dns_glue_additionaldata_ctx_t;
typedef struct {

View file

@ -133,6 +133,8 @@ struct dns_slabheader {
unsigned char upper[32];
isc_heap_t *heap;
dns_gluelist_t *gluelist;
};
enum {

View file

@ -95,6 +95,7 @@ typedef struct dns_forwarder dns_forwarder_t;
typedef struct dns_fwdtable dns_fwdtable_t;
typedef struct dns_geoip_databases dns_geoip_databases_t;
typedef struct dns_glue dns_glue_t;
typedef struct dns_gluelist dns_gluelist_t;
typedef struct dns_iptable dns_iptable_t;
typedef uint32_t dns_iterations_t;
typedef struct dns_kasp dns_kasp_t;

View file

@ -142,7 +142,7 @@ struct qpz_version {
uint64_t records;
uint64_t xfrsize;
struct cds_lfht *glue_table;
struct cds_wfs_stack glue_stack;
};
typedef ISC_LIST(qpz_version_t) qpz_versionlist_t;
@ -213,17 +213,6 @@ typedef struct {
isc_stdtime_t now;
} qpz_search_t;
typedef struct dns_gluenode_t {
isc_mem_t *mctx;
struct dns_glue *glue;
qpznode_t *node;
struct cds_lfht_node ht_node;
struct rcu_head rcu_head;
} dns_gluenode_t;
/*%
* Load Context
*/
@ -380,21 +369,74 @@ set_index(void *what, unsigned int idx) {
}
static void
free_gluenode(dns_gluenode_t *gluenode);
free_glue(isc_mem_t *mctx, dns_glue_t *glue) {
while (glue != NULL) {
dns_glue_t *next = glue->next;
if (dns_rdataset_isassociated(&glue->rdataset_a)) {
dns_rdataset_disassociate(&glue->rdataset_a);
}
if (dns_rdataset_isassociated(&glue->sigrdataset_a)) {
dns_rdataset_disassociate(&glue->sigrdataset_a);
}
if (dns_rdataset_isassociated(&glue->rdataset_aaaa)) {
dns_rdataset_disassociate(&glue->rdataset_aaaa);
}
if (dns_rdataset_isassociated(&glue->sigrdataset_aaaa)) {
dns_rdataset_disassociate(&glue->sigrdataset_aaaa);
}
dns_rdataset_invalidate(&glue->rdataset_a);
dns_rdataset_invalidate(&glue->sigrdataset_a);
dns_rdataset_invalidate(&glue->rdataset_aaaa);
dns_rdataset_invalidate(&glue->sigrdataset_aaaa);
dns_name_free(&glue->name, mctx);
isc_mem_put(mctx, glue, sizeof(*glue));
glue = next;
}
}
static void
free_gluetable(struct cds_lfht *glue_table) {
struct cds_lfht_iter iter;
dns_gluenode_t *gluenode = NULL;
destroy_gluelist(dns_gluelist_t **gluelistp) {
REQUIRE(gluelistp != NULL);
if (*gluelistp == NULL) {
return;
}
dns_gluelist_t *gluelist = *gluelistp;
free_glue(gluelist->mctx, gluelist->glue);
isc_mem_putanddetach(&gluelist->mctx, gluelist, sizeof(*gluelist));
}
static void
free_gluelist_rcu(struct rcu_head *rcu_head) {
dns_gluelist_t *gluelist = caa_container_of(rcu_head, dns_gluelist_t,
rcu_head);
destroy_gluelist(&gluelist);
}
static void
cleanup_gluelists(struct cds_wfs_stack *glue_stack) {
struct cds_wfs_head *head = __cds_wfs_pop_all(glue_stack);
struct cds_wfs_node *node = NULL, *next = NULL;
rcu_read_lock();
cds_lfht_for_each_entry(glue_table, &iter, gluenode, ht_node) {
INSIST(!cds_lfht_del(glue_table, &gluenode->ht_node));
free_gluenode(gluenode);
cds_wfs_for_each_blocking_safe(head, node, next) {
dns_gluelist_t *gluelist =
caa_container_of(node, dns_gluelist_t, wfs_node);
dns_slabheader_t *header = rcu_xchg_pointer(&gluelist->header,
NULL);
(void)rcu_cmpxchg_pointer(&header->gluelist, gluelist, NULL);
call_rcu(&gluelist->rcu_head, free_gluelist_rcu);
}
rcu_read_unlock();
cds_lfht_destroy(glue_table, NULL);
}
static void
@ -441,6 +483,7 @@ free_qpdb(qpzonedb_t *qpdb, bool log) {
isc_refcount_destroy(&qpdb->current_version->references);
UNLINK(qpdb->open_versions, qpdb->current_version, link);
cds_wfs_destroy(&qpdb->current_version->glue_stack);
isc_rwlock_destroy(&qpdb->current_version->rwlock);
isc_mem_put(qpdb->common.mctx, qpdb->current_version,
sizeof(*qpdb->current_version));
@ -481,7 +524,7 @@ qpdb_destroy(dns_db_t *arg) {
* node count below.
*/
if (qpdb->current_version != NULL) {
free_gluetable(qpdb->current_version->glue_table);
cleanup_gluelists(&qpdb->current_version->glue_stack);
}
/*
@ -553,11 +596,9 @@ allocate_version(isc_mem_t *mctx, uint32_t serial, unsigned int references,
.changed_list = ISC_LIST_INITIALIZER,
.resigned_list = ISC_LIST_INITIALIZER,
.link = ISC_LINK_INITIALIZER,
.glue_table = cds_lfht_new(GLUETABLE_INIT_SIZE,
GLUETABLE_MIN_SIZE, 0,
CDS_LFHT_AUTO_RESIZE, NULL),
};
cds_wfs_init(&version->glue_stack);
isc_rwlock_init(&version->rwlock);
isc_refcount_init(&version->references, references);
@ -1446,7 +1487,8 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp,
if (cleanup_version != NULL) {
isc_refcount_destroy(&cleanup_version->references);
INSIST(EMPTY(cleanup_version->changed_list));
free_gluetable(cleanup_version->glue_table);
cleanup_gluelists(&cleanup_version->glue_stack);
cds_wfs_destroy(&cleanup_version->glue_stack);
isc_rwlock_destroy(&cleanup_version->rwlock);
isc_mem_put(qpdb->common.mctx, cleanup_version,
sizeof(*cleanup_version));
@ -4953,24 +4995,42 @@ nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name) {
}
static dns_glue_t *
new_gluelist(isc_mem_t *mctx, dns_name_t *name) {
new_glue(isc_mem_t *mctx, const dns_name_t *name) {
dns_glue_t *glue = isc_mem_get(mctx, sizeof(*glue));
*glue = (dns_glue_t){ 0 };
dns_name_t *gluename = dns_fixedname_initname(&glue->fixedname);
*glue = (dns_glue_t){
.name = DNS_NAME_INITEMPTY,
};
dns_name_copy(name, gluename);
dns_name_dup(name, mctx, &glue->name);
return glue;
}
static dns_gluelist_t *
new_gluelist(dns_db_t *db, dns_slabheader_t *header,
const dns_dbversion_t *dbversion) {
dns_gluelist_t *gluelist = isc_mem_get(db->mctx, sizeof(*gluelist));
*gluelist = (dns_gluelist_t){
.version = dbversion,
.header = header,
};
isc_mem_attach(db->mctx, &gluelist->mctx);
cds_wfs_node_init(&gluelist->wfs_node);
return gluelist;
}
static isc_result_t
glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype,
dns_rdataset_t *unused DNS__DB_FLARG) {
dns_rdataset_t *rdataset ISC_ATTR_UNUSED DNS__DB_FLARG) {
dns_glue_additionaldata_ctx_t *ctx = NULL;
isc_result_t result;
dns_fixedname_t fixedname_a;
dns_name_t *name_a = NULL;
dns_rdataset_t rdataset_a, sigrdataset_a;
const qpznode_t *node = NULL;
qpznode_t *node_a = NULL;
dns_fixedname_t fixedname_aaaa;
dns_name_t *name_aaaa = NULL;
@ -4978,8 +5038,6 @@ glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype,
qpznode_t *node_aaaa = NULL;
dns_glue_t *glue = NULL;
UNUSED(unused);
/*
* NS records want addresses in additional records.
*/
@ -4987,6 +5045,8 @@ glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype,
ctx = (dns_glue_additionaldata_ctx_t *)arg;
node = (qpznode_t *)ctx->node;
name_a = dns_fixedname_initname(&fixedname_a);
dns_rdataset_init(&rdataset_a);
dns_rdataset_init(&sigrdataset_a);
@ -4999,7 +5059,7 @@ glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype,
DNS_DBFIND_GLUEOK, 0, (dns_dbnode_t **)&node_a, name_a,
&rdataset_a, &sigrdataset_a DNS__DB_FLARG_PASS);
if (result == DNS_R_GLUE) {
glue = new_gluelist(ctx->db->mctx, name_a);
glue = new_glue(ctx->db->mctx, name_a);
dns_rdataset_init(&glue->rdataset_a);
dns_rdataset_init(&glue->sigrdataset_a);
@ -5019,7 +5079,7 @@ glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype,
&sigrdataset_aaaa DNS__DB_FLARG_PASS);
if (result == DNS_R_GLUE) {
if (glue == NULL) {
glue = new_gluelist(ctx->db->mctx, name_aaaa);
glue = new_glue(ctx->db->mctx, name_aaaa);
dns_rdataset_init(&glue->rdataset_a);
dns_rdataset_init(&glue->sigrdataset_a);
@ -5045,7 +5105,7 @@ glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype,
* attributes for the first rdataset associated with the first name
* added to the ADDITIONAL section.
*/
if (glue != NULL && dns_name_issubdomain(name, ctx->nodename)) {
if (glue != NULL && dns_name_issubdomain(name, &node->name)) {
if (dns_rdataset_isassociated(&glue->rdataset_a)) {
glue->rdataset_a.attributes |=
DNS_RDATASETATTR_REQUIRED;
@ -5057,8 +5117,8 @@ glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype,
}
if (glue != NULL) {
glue->next = ctx->glue_list;
ctx->glue_list = glue;
glue->next = ctx->glue;
ctx->glue = glue;
}
result = ISC_R_SUCCESS;
@ -5100,12 +5160,11 @@ addglue_to_message(dns_glue_t *ge, dns_message_t *msg) {
dns_rdataset_t *sigrdataset_a = NULL;
dns_rdataset_t *rdataset_aaaa = NULL;
dns_rdataset_t *sigrdataset_aaaa = NULL;
dns_name_t *gluename = dns_fixedname_name(&ge->fixedname);
bool prepend_name = false;
dns_message_gettempname(msg, &name);
dns_name_copy(gluename, name);
dns_name_copy(&ge->name, name);
if (dns_rdataset_isassociated(&ge->rdataset_a)) {
dns_message_gettemprdataset(msg, &rdataset_a);
@ -5170,15 +5229,16 @@ addglue_to_message(dns_glue_t *ge, dns_message_t *msg) {
}
}
static dns_glue_t *
newglue(dns_db_t *db, qpz_version_t *version, qpznode_t *node,
dns_rdataset_t *rdataset) {
dns_fixedname_t nodename;
static dns_gluelist_t *
create_gluelist(qpzonedb_t *qpdb, qpz_version_t *version, qpznode_t *node,
dns_rdataset_t *rdataset) {
dns_slabheader_t *header = dns_slabheader_fromrdataset(rdataset);
dns_glue_additionaldata_ctx_t ctx = {
.db = db,
.db = (dns_db_t *)qpdb,
.version = (dns_dbversion_t *)version,
.nodename = dns_fixedname_initname(&nodename),
.node = (dns_dbnode_t *)node,
};
dns_gluelist_t *gluelist = new_gluelist(ctx.db, header, ctx.version);
/*
* Get the owner name of the NS RRset - it will be necessary for
@ -5186,100 +5246,13 @@ newglue(dns_db_t *db, qpz_version_t *version, qpznode_t *node,
* determining which NS records in the delegation are
* in-bailiwick).
*/
dns_name_copy(&node->name, ctx.nodename);
(void)dns_rdataset_additionaldata(rdataset, dns_rootname,
glue_nsdname_cb, &ctx);
return ctx.glue_list;
}
gluelist->glue = ctx.glue;
static dns_gluenode_t *
new_gluenode(dns_db_t *db, qpz_version_t *version, qpznode_t *node,
dns_rdataset_t *rdataset) {
dns_gluenode_t *gluenode = isc_mem_get(db->mctx, sizeof(*gluenode));
*gluenode = (dns_gluenode_t){
.glue = newglue(db, version, node, rdataset),
};
isc_mem_attach(db->mctx, &gluenode->mctx);
qpznode_attach(node, &gluenode->node);
return gluenode;
}
static void
freeglue(isc_mem_t *mctx, dns_glue_t *glue) {
while (glue != NULL) {
dns_glue_t *next = glue->next;
if (dns_rdataset_isassociated(&glue->rdataset_a)) {
dns_rdataset_disassociate(&glue->rdataset_a);
}
if (dns_rdataset_isassociated(&glue->sigrdataset_a)) {
dns_rdataset_disassociate(&glue->sigrdataset_a);
}
if (dns_rdataset_isassociated(&glue->rdataset_aaaa)) {
dns_rdataset_disassociate(&glue->rdataset_aaaa);
}
if (dns_rdataset_isassociated(&glue->sigrdataset_aaaa)) {
dns_rdataset_disassociate(&glue->sigrdataset_aaaa);
}
dns_rdataset_invalidate(&glue->rdataset_a);
dns_rdataset_invalidate(&glue->sigrdataset_a);
dns_rdataset_invalidate(&glue->rdataset_aaaa);
dns_rdataset_invalidate(&glue->sigrdataset_aaaa);
isc_mem_put(mctx, glue, sizeof(*glue));
glue = next;
}
}
static void
free_gluenode_rcu(struct rcu_head *rcu_head) {
dns_gluenode_t *gluenode = caa_container_of(rcu_head, dns_gluenode_t,
rcu_head);
freeglue(gluenode->mctx, gluenode->glue);
qpznode_detach(&gluenode->node);
isc_mem_putanddetach(&gluenode->mctx, gluenode, sizeof(*gluenode));
}
static void
free_gluenode(dns_gluenode_t *gluenode) {
call_rcu(&gluenode->rcu_head, free_gluenode_rcu);
}
static uint32_t
qpznode_hash(const qpznode_t *node) {
const uintptr_t key = (uintptr_t)node;
return isc_hash32(&key, sizeof(key), true);
}
static int
qpznode_match(struct cds_lfht_node *ht_node, const void *key) {
const dns_gluenode_t *gluenode =
caa_container_of(ht_node, dns_gluenode_t, ht_node);
const qpznode_t *node = key;
return gluenode->node == node;
}
static uint32_t
gluenode_hash(const dns_gluenode_t *gluenode) {
return qpznode_hash(gluenode->node);
}
static int
gluenode_match(struct cds_lfht_node *ht_node, const void *key) {
const dns_gluenode_t *gluenode = key;
return qpznode_match(ht_node, gluenode->node);
return gluelist;
}
static void
@ -5288,64 +5261,56 @@ addglue(dns_db_t *db, dns_dbversion_t *dbversion, dns_rdataset_t *rdataset,
qpzonedb_t *qpdb = (qpzonedb_t *)db;
qpz_version_t *version = (qpz_version_t *)dbversion;
qpznode_t *node = (qpznode_t *)rdataset->slab.node;
dns_gluenode_t *gluenode = NULL;
dns_slabheader_t *header = dns_slabheader_fromrdataset(rdataset);
dns_glue_t *glue = NULL;
isc_statscounter_t counter = dns_gluecachestatscounter_hits_absent;
REQUIRE(rdataset->type == dns_rdatatype_ns);
REQUIRE(qpdb == (qpzonedb_t *)rdataset->slab.db);
REQUIRE(qpdb == version->qpdb);
REQUIRE(!IS_STUB(qpdb));
/*
* The glue table cache that forms a part of the DB version
* structure is not explicitly bounded and there's no cache
* cleaning. The zone data size itself is an implicit bound.
*
* The key into the glue hashtable is the node pointer. This is
* because the glue hashtable is a property of the DB version,
* and the glue is keyed for the ownername/NS tuple. We don't
* bother with using an expensive dns_name_t comparison here as
* the node pointer is a fixed value that won't change for a DB
* version and can be compared directly.
*/
rcu_read_lock();
struct cds_lfht_iter iter;
cds_lfht_lookup(version->glue_table, qpznode_hash(node), qpznode_match,
node, &iter);
dns_gluelist_t *gluelist = rcu_dereference(header->gluelist);
if (gluelist == NULL || gluelist->version != dbversion) {
/* No or old glue list was found in the table. */
gluenode = cds_lfht_entry(cds_lfht_iter_get_node(&iter), dns_gluenode_t,
ht_node);
if (gluenode == NULL) {
/* No cached glue was found in the table. Get new glue. */
gluenode = new_gluenode(db, version, node, rdataset);
dns_gluelist_t *xchg_gluelist = gluelist;
dns_gluelist_t *old_gluelist = (void *)-1;
dns_gluelist_t *new_gluelist = create_gluelist(qpdb, version,
node, rdataset);
struct cds_lfht_node *ht_node = cds_lfht_add_unique(
version->glue_table, gluenode_hash(gluenode),
gluenode_match, gluenode, &gluenode->ht_node);
while (old_gluelist != xchg_gluelist &&
(xchg_gluelist == NULL ||
xchg_gluelist->version != dbversion))
{
old_gluelist = xchg_gluelist;
xchg_gluelist = rcu_cmpxchg_pointer(
&header->gluelist, old_gluelist, new_gluelist);
}
if (ht_node != &gluenode->ht_node) {
free_gluenode_rcu(&gluenode->rcu_head);
gluenode = cds_lfht_entry(ht_node, dns_gluenode_t,
ht_node);
if (old_gluelist == xchg_gluelist) {
/* CAS was successful */
cds_wfs_push(&version->glue_stack,
&new_gluelist->wfs_node);
gluelist = new_gluelist;
} else {
destroy_gluelist(&new_gluelist);
gluelist = xchg_gluelist;
}
}
INSIST(gluenode != NULL);
dns_glue_t *glue = gluenode->glue;
isc_statscounter_t counter = dns_gluecachestatscounter_hits_present;
glue = gluelist->glue;
if (glue != NULL) {
/* We have a cached result. Add it to the message and return. */
addglue_to_message(glue, msg);
} else {
counter = dns_gluecachestatscounter_hits_absent;
counter = dns_gluecachestatscounter_hits_present;
}
rcu_read_unlock();
/* We have a cached result. Add it to the message and return. */
if (qpdb->gluecachestats != NULL) {
isc_stats_increment(qpdb->gluecachestats, counter);
}