mirror of
https://github.com/isc-projects/bind9.git
synced 2026-05-28 04:34:54 -04:00
Merge branch '4596-regression-in-cache-cleaning-9.18-v9.18.25-release' into 'v9.18.25-release'
[9.18.25] Reduce lock contention during RBTDB tree pruning See merge request isc-projects/bind9!8796
This commit is contained in:
commit
9f4790d7cc
5 changed files with 44 additions and 116 deletions
4
CHANGES
4
CHANGES
|
|
@ -1,3 +1,7 @@
|
|||
6352. [bug] Revert change 6319 and decrease lock contention during
|
||||
RBTDB tree pruning by not cleaning up nodes recursively
|
||||
within a single prune_tree() call. [GL #4596]
|
||||
|
||||
6350. [bug] Address use after free in expire_lru_headers. [GL #4495]
|
||||
|
||||
--- 9.18.24 released ---
|
||||
|
|
|
|||
|
|
@ -41,6 +41,11 @@ Bug Fixes
|
|||
ISC would like to thank to Jinmei Tatuya from Infoblox for bringing
|
||||
this issue to our attention.
|
||||
|
||||
- A regression in cache-cleaning code enabled memory use to grow
|
||||
significantly more quickly than before, until the configured
|
||||
:any:`max-cache-size` limit was reached. This has been fixed.
|
||||
:gl:`#4596`
|
||||
|
||||
|
||||
Known Issues
|
||||
~~~~~~~~~~~~
|
||||
|
|
|
|||
|
|
@ -124,12 +124,6 @@ struct dns_rbtnode {
|
|||
*/
|
||||
ISC_LINK(dns_rbtnode_t) deadlink;
|
||||
|
||||
/*%
|
||||
* This linked list is used to store nodes from which tree pruning can
|
||||
* be started.
|
||||
*/
|
||||
ISC_LINK(dns_rbtnode_t) prunelink;
|
||||
|
||||
/*@{*/
|
||||
/*!
|
||||
* These values are used in the RBT DB implementation. The appropriate
|
||||
|
|
|
|||
|
|
@ -1576,7 +1576,6 @@ create_node(isc_mem_t *mctx, const dns_name_t *name, dns_rbtnode_t **nodep) {
|
|||
HASHVAL(node) = 0;
|
||||
|
||||
ISC_LINK_INIT(node, deadlink);
|
||||
ISC_LINK_INIT(node, prunelink);
|
||||
|
||||
LOCKNUM(node) = 0;
|
||||
WILD(node) = 0;
|
||||
|
|
|
|||
144
lib/dns/rbtdb.c
144
lib/dns/rbtdb.c
|
|
@ -494,10 +494,6 @@ struct dns_rbtdb {
|
|||
*/
|
||||
rbtnodelist_t *deadnodes;
|
||||
|
||||
/* List of nodes from which recursive tree pruning can be started from.
|
||||
* Locked by tree_lock. */
|
||||
rbtnodelist_t prunenodes;
|
||||
|
||||
/*
|
||||
* Heaps. These are used for TTL based expiry in a cache,
|
||||
* or for zone resigning in a zone DB. hmctx is the memory
|
||||
|
|
@ -1007,7 +1003,6 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) {
|
|||
unsigned int i;
|
||||
isc_result_t result;
|
||||
char buf[DNS_NAME_FORMATSIZE];
|
||||
dns_rbtnode_t *node = NULL;
|
||||
dns_rbt_t **treep;
|
||||
isc_time_t start;
|
||||
|
||||
|
|
@ -1033,6 +1028,8 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) {
|
|||
* the overhead of unlinking all nodes here should be negligible.
|
||||
*/
|
||||
for (i = 0; i < rbtdb->node_lock_count; i++) {
|
||||
dns_rbtnode_t *node;
|
||||
|
||||
node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
|
||||
while (node != NULL) {
|
||||
ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
|
||||
|
|
@ -1040,12 +1037,6 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) {
|
|||
}
|
||||
}
|
||||
|
||||
node = ISC_LIST_HEAD(rbtdb->prunenodes);
|
||||
while (node != NULL) {
|
||||
ISC_LIST_UNLINK(rbtdb->prunenodes, node, prunelink);
|
||||
node = ISC_LIST_HEAD(rbtdb->prunenodes);
|
||||
}
|
||||
|
||||
if (event == NULL) {
|
||||
rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
|
||||
}
|
||||
|
|
@ -1829,8 +1820,9 @@ delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
|
|||
*/
|
||||
static void
|
||||
new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
|
||||
isc_rwlocktype_t locktype) {
|
||||
if (locktype == isc_rwlocktype_write && ISC_LINK_LINKED(node, deadlink))
|
||||
isc_rwlocktype_t nlocktype) {
|
||||
if (nlocktype == isc_rwlocktype_write &&
|
||||
ISC_LINK_LINKED(node, deadlink))
|
||||
{
|
||||
ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum], node,
|
||||
deadlink);
|
||||
|
|
@ -1851,32 +1843,19 @@ is_leaf(dns_rbtnode_t *node) {
|
|||
node->left == NULL && node->right == NULL);
|
||||
}
|
||||
|
||||
/*%
|
||||
* The tree lock must be held when this function is called as it reads and
|
||||
* updates rbtdb->prunenodes.
|
||||
*/
|
||||
static void
|
||||
send_to_prune_tree(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
|
||||
isc_rwlocktype_t locktype) {
|
||||
bool pruning_queued = (ISC_LIST_HEAD(rbtdb->prunenodes) != NULL);
|
||||
isc_rwlocktype_t nlocktype) {
|
||||
isc_event_t *ev;
|
||||
dns_db_t *db;
|
||||
|
||||
INSIST(locktype == isc_rwlocktype_write);
|
||||
|
||||
new_reference(rbtdb, node, locktype);
|
||||
INSIST(!ISC_LINK_LINKED(node, prunelink));
|
||||
ISC_LIST_APPEND(rbtdb->prunenodes, node, prunelink);
|
||||
|
||||
if (!pruning_queued) {
|
||||
isc_event_t *ev = NULL;
|
||||
dns_db_t *db = NULL;
|
||||
|
||||
attach((dns_db_t *)rbtdb, &db);
|
||||
|
||||
ev = isc_event_allocate(rbtdb->common.mctx, NULL,
|
||||
DNS_EVENT_RBTPRUNE, prune_tree, db,
|
||||
sizeof(isc_event_t));
|
||||
isc_task_send(rbtdb->task, &ev);
|
||||
}
|
||||
ev = isc_event_allocate(rbtdb->common.mctx, NULL, DNS_EVENT_RBTPRUNE,
|
||||
prune_tree, node, sizeof(isc_event_t));
|
||||
new_reference(rbtdb, node, nlocktype);
|
||||
db = NULL;
|
||||
attach((dns_db_t *)rbtdb, &db);
|
||||
ev->ev_sender = db;
|
||||
isc_task_send(rbtdb->task, &ev);
|
||||
}
|
||||
|
||||
/*%
|
||||
|
|
@ -2092,10 +2071,6 @@ decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
|
|||
#undef KEEP_NODE
|
||||
|
||||
if (write_locked) {
|
||||
/*
|
||||
* We can now delete the node.
|
||||
*/
|
||||
|
||||
/*
|
||||
* If this node is the only one in the level it's in, deleting
|
||||
* this node may recursively make its parent the only node in
|
||||
|
|
@ -2116,6 +2091,7 @@ decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
|
|||
send_to_prune_tree(rbtdb, node, isc_rwlocktype_write);
|
||||
no_reference = false;
|
||||
} else {
|
||||
/* We can now delete the node. */
|
||||
delete_node(rbtdb, node);
|
||||
}
|
||||
} else {
|
||||
|
|
@ -2151,27 +2127,17 @@ restore_locks:
|
|||
}
|
||||
|
||||
/*
|
||||
* Prune the tree by recursively cleaning up single leaves. Go through all
|
||||
* nodes stored in the rbtdb->prunenodes list; for each of them, in the worst
|
||||
* case, it will be necessary to traverse a number of tree levels equal to the
|
||||
* maximum legal number of domain name labels (127); in practice, the number of
|
||||
* tree levels to traverse will virtually always be much smaller (a few levels
|
||||
* at most). While holding the tree lock throughout this entire operation is
|
||||
* less than ideal, so is splitting the latter up by queueing a separate
|
||||
* prune_tree() run for each node to start pruning from (as queueing requires
|
||||
* allocating memory and can therefore potentially be exploited to exhaust
|
||||
* available memory). Also note that actually freeing up the memory used by
|
||||
* RBTDB nodes (which is what this function does) is essential to keeping cache
|
||||
* memory use in check, so since the tree lock needs to be acquired anyway,
|
||||
* freeing as many nodes as possible before the tree lock gets released is
|
||||
* prudent.
|
||||
* Prune the tree by cleaning up single leaves. A single execution of this
|
||||
* function cleans up a single node; if the parent of the latter becomes a
|
||||
* single leaf on its own level as a result, the parent is then also sent to
|
||||
* this function.
|
||||
*/
|
||||
static void
|
||||
prune_tree(isc_task_t *task, isc_event_t *event) {
|
||||
dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)event->ev_arg;
|
||||
dns_rbtnode_t *node = NULL;
|
||||
dns_rbtdb_t *rbtdb = event->ev_sender;
|
||||
dns_rbtnode_t *node = event->ev_arg;
|
||||
dns_rbtnode_t *parent = NULL;
|
||||
unsigned int locknum;
|
||||
unsigned int locknum = node->locknum;
|
||||
|
||||
UNUSED(task);
|
||||
|
||||
|
|
@ -2179,59 +2145,21 @@ prune_tree(isc_task_t *task, isc_event_t *event) {
|
|||
|
||||
RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
|
||||
|
||||
while ((node = ISC_LIST_HEAD(rbtdb->prunenodes)) != NULL) {
|
||||
locknum = node->locknum;
|
||||
NODE_LOCK(&rbtdb->node_locks[locknum].lock,
|
||||
parent = node->parent;
|
||||
|
||||
NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
|
||||
decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
|
||||
isc_rwlocktype_write, true);
|
||||
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
|
||||
|
||||
if (parent != NULL && is_leaf(parent)) {
|
||||
NODE_LOCK(&rbtdb->node_locks[parent->locknum].lock,
|
||||
isc_rwlocktype_write);
|
||||
do {
|
||||
if (ISC_LINK_LINKED(node, prunelink)) {
|
||||
ISC_LIST_UNLINK(rbtdb->prunenodes, node,
|
||||
prunelink);
|
||||
}
|
||||
|
||||
parent = node->parent;
|
||||
decrement_reference(rbtdb, node, 0,
|
||||
isc_rwlocktype_write,
|
||||
isc_rwlocktype_write, true);
|
||||
|
||||
if (parent != NULL && parent->down == NULL) {
|
||||
/*
|
||||
* node was the only down child of the parent
|
||||
* and has just been removed. We'll then need
|
||||
* to examine the parent. Keep the lock if
|
||||
* possible; otherwise, release the old lock and
|
||||
* acquire one for the parent.
|
||||
*/
|
||||
if (parent->locknum != locknum) {
|
||||
NODE_UNLOCK(
|
||||
&rbtdb->node_locks[locknum].lock,
|
||||
isc_rwlocktype_write);
|
||||
locknum = parent->locknum;
|
||||
NODE_LOCK(
|
||||
&rbtdb->node_locks[locknum].lock,
|
||||
isc_rwlocktype_write);
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to gain a reference to the node
|
||||
* before decrementing it in the next iteration.
|
||||
*/
|
||||
if (ISC_LINK_LINKED(parent, deadlink)) {
|
||||
ISC_LIST_UNLINK(
|
||||
rbtdb->deadnodes[locknum],
|
||||
parent, deadlink);
|
||||
}
|
||||
new_reference(rbtdb, parent,
|
||||
isc_rwlocktype_write);
|
||||
} else {
|
||||
parent = NULL;
|
||||
}
|
||||
|
||||
node = parent;
|
||||
} while (node != NULL);
|
||||
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
|
||||
send_to_prune_tree(rbtdb, parent, isc_rwlocktype_write);
|
||||
NODE_UNLOCK(&rbtdb->node_locks[parent->locknum].lock,
|
||||
isc_rwlocktype_write);
|
||||
}
|
||||
|
||||
RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
|
||||
|
||||
detach((dns_db_t **)&rbtdb);
|
||||
|
|
@ -8373,8 +8301,6 @@ dns_rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type,
|
|||
ISC_LIST_INIT(rbtdb->deadnodes[i]);
|
||||
}
|
||||
|
||||
ISC_LIST_INIT(rbtdb->prunenodes);
|
||||
|
||||
rbtdb->active = rbtdb->node_lock_count;
|
||||
|
||||
for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue