diff --git a/lib/dns/include/dns/rbt.h b/lib/dns/include/dns/rbt.h index 40bf09ee77..c144f83b49 100644 --- a/lib/dns/include/dns/rbt.h +++ b/lib/dns/include/dns/rbt.h @@ -124,12 +124,6 @@ struct dns_rbtnode { */ ISC_LINK(dns_rbtnode_t) deadlink; - /*% - * This linked list is used to store nodes from which tree pruning can - * be started. - */ - ISC_LINK(dns_rbtnode_t) prunelink; - /*@{*/ /*! * These values are used in the RBT DB implementation. The appropriate diff --git a/lib/dns/rbt.c b/lib/dns/rbt.c index 29f19c895b..4d8c142fc2 100644 --- a/lib/dns/rbt.c +++ b/lib/dns/rbt.c @@ -1576,7 +1576,6 @@ create_node(isc_mem_t *mctx, const dns_name_t *name, dns_rbtnode_t **nodep) { HASHVAL(node) = 0; ISC_LINK_INIT(node, deadlink); - ISC_LINK_INIT(node, prunelink); LOCKNUM(node) = 0; WILD(node) = 0; diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index 7e6f9c3d10..ad8533ebaf 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -498,10 +498,6 @@ struct dns_rbtdb { */ rbtnodelist_t *deadnodes; - /* List of nodes from which recursive tree pruning can be started from. - * Locked by tree_lock. */ - rbtnodelist_t prunenodes; - /* * Heaps. These are used for TTL based expiry in a cache, * or for zone resigning in a zone DB. hmctx is the memory @@ -1034,7 +1030,6 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) { unsigned int i; isc_result_t result; char buf[DNS_NAME_FORMATSIZE]; - dns_rbtnode_t *node = NULL; dns_rbt_t **treep; isc_time_t start; @@ -1060,6 +1055,8 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) { * the overhead of unlinking all nodes here should be negligible. */ for (i = 0; i < rbtdb->node_lock_count; i++) { + dns_rbtnode_t *node; + node = ISC_LIST_HEAD(rbtdb->deadnodes[i]); while (node != NULL) { ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink); @@ -1067,12 +1064,6 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) { } } - node = ISC_LIST_HEAD(rbtdb->prunenodes); - while (node != NULL) { - ISC_LIST_UNLINK(rbtdb->prunenodes, node, prunelink); - node = ISC_LIST_HEAD(rbtdb->prunenodes); - } - if (event == NULL) { rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0; } @@ -1878,32 +1869,19 @@ is_leaf(dns_rbtnode_t *node) { node->left == NULL && node->right == NULL); } -/*% - * The tree lock must be held when this function is called as it reads and - * updates rbtdb->prunenodes. - */ static void send_to_prune_tree(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, isc_rwlocktype_t locktype) { - bool pruning_queued = (ISC_LIST_HEAD(rbtdb->prunenodes) != NULL); - - INSIST(locktype == isc_rwlocktype_write); + isc_event_t *ev; + dns_db_t *db; + ev = isc_event_allocate(rbtdb->common.mctx, NULL, DNS_EVENT_RBTPRUNE, + prune_tree, node, sizeof(isc_event_t)); new_reference(rbtdb, node, locktype); - INSIST(!ISC_LINK_LINKED(node, prunelink)); - ISC_LIST_APPEND(rbtdb->prunenodes, node, prunelink); - - if (!pruning_queued) { - isc_event_t *ev = NULL; - dns_db_t *db = NULL; - - attach((dns_db_t *)rbtdb, &db); - - ev = isc_event_allocate(rbtdb->common.mctx, NULL, - DNS_EVENT_RBTPRUNE, prune_tree, db, - sizeof(isc_event_t)); - isc_task_send(rbtdb->task, &ev); - } + db = NULL; + attach((dns_db_t *)rbtdb, &db); + ev->ev_sender = db; + isc_task_send(rbtdb->task, &ev); } /*% @@ -2178,26 +2156,17 @@ restore_locks: } /* - * Prune the tree by recursively cleaning up single leaves. Go through all - * nodes stored in the rbtdb->prunenodes list; for each of them, in the worst - * case, it will be necessary to traverse a number of tree levels equal to the - * maximum legal number of domain name labels (127); in practice, the number of - * tree levels to traverse will virtually always be much smaller (a few levels - * at most). While holding the tree lock throughout this entire operation is - * less than ideal, so is splitting the latter up by queueing a separate - * prune_tree() run for each node to start pruning from (as queueing requires - * allocating memory and can therefore potentially be exploited to exhaust - * available memory). Also note that actually freeing up the memory used by - * RBTDB nodes (which is what this function does) is essential to keeping cache - * memory use in check, so since the tree lock needs to be acquired anyway, - * freeing as many nodes as possible before the tree lock gets released is - * prudent. + * Prune the tree by recursively cleaning-up single leaves. In the worst + * case, the number of iteration is the number of tree levels, which is at + * most the maximum number of domain name labels, i.e, 127. In practice, this + * should be much smaller (only a few times), and even the worst case would be + * acceptable for a single event. */ static void prune_tree(isc_task_t *task, isc_event_t *event) { - dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)event->ev_arg; - dns_rbtnode_t *node = NULL; - dns_rbtnode_t *parent = NULL; + dns_rbtdb_t *rbtdb = event->ev_sender; + dns_rbtnode_t *node = event->ev_arg; + dns_rbtnode_t *parent; unsigned int locknum; UNUSED(task); @@ -2205,60 +2174,44 @@ prune_tree(isc_task_t *task, isc_event_t *event) { isc_event_free(&event); RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); + locknum = node->locknum; + NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write); + do { + parent = node->parent; + decrement_reference(rbtdb, node, 0, isc_rwlocktype_write, + isc_rwlocktype_write, true); - while ((node = ISC_LIST_HEAD(rbtdb->prunenodes)) != NULL) { - locknum = node->locknum; - NODE_LOCK(&rbtdb->node_locks[locknum].lock, - isc_rwlocktype_write); - do { - if (ISC_LINK_LINKED(node, prunelink)) { - ISC_LIST_UNLINK(rbtdb->prunenodes, node, - prunelink); + if (parent != NULL && parent->down == NULL) { + /* + * node was the only down child of the parent and has + * just been removed. We'll then need to examine the + * parent. Keep the lock if possible; otherwise, + * release the old lock and acquire one for the parent. + */ + if (parent->locknum != locknum) { + NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, + isc_rwlocktype_write); + locknum = parent->locknum; + NODE_LOCK(&rbtdb->node_locks[locknum].lock, + isc_rwlocktype_write); } - parent = node->parent; - decrement_reference(rbtdb, node, 0, - isc_rwlocktype_write, - isc_rwlocktype_write, true); - - if (parent != NULL && parent->down == NULL) { - /* - * node was the only down child of the parent - * and has just been removed. We'll then need - * to examine the parent. Keep the lock if - * possible; otherwise, release the old lock and - * acquire one for the parent. - */ - if (parent->locknum != locknum) { - NODE_UNLOCK( - &rbtdb->node_locks[locknum].lock, - isc_rwlocktype_write); - locknum = parent->locknum; - NODE_LOCK( - &rbtdb->node_locks[locknum].lock, - isc_rwlocktype_write); - } - - /* - * We need to gain a reference to the node - * before decrementing it in the next iteration. - */ - if (ISC_LINK_LINKED(parent, deadlink)) { - ISC_LIST_UNLINK( - rbtdb->deadnodes[locknum], + /* + * We need to gain a reference to the node before + * decrementing it in the next iteration. + */ + if (ISC_LINK_LINKED(parent, deadlink)) { + ISC_LIST_UNLINK(rbtdb->deadnodes[locknum], parent, deadlink); - } - new_reference(rbtdb, parent, - isc_rwlocktype_write); - } else { - parent = NULL; } + new_reference(rbtdb, parent, isc_rwlocktype_write); + } else { + parent = NULL; + } - node = parent; - } while (node != NULL); - NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, - isc_rwlocktype_write); - } + node = parent; + } while (node != NULL); + NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write); RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); detach((dns_db_t **)&rbtdb); @@ -8434,8 +8387,6 @@ dns_rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type, ISC_LIST_INIT(rbtdb->deadnodes[i]); } - ISC_LIST_INIT(rbtdb->prunenodes); - rbtdb->active = rbtdb->node_lock_count; for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {