mirror of
https://github.com/isc-projects/bind9.git
synced 2026-05-28 04:34:54 -04:00
2596. [bug] Stale tree nodes of cache/dynamic rbtdb could stay
long, leading to inefficient memory usage or rejecting newer cache entries in the worst case. [RT #19563]
This commit is contained in:
parent
0e77d33686
commit
5d7849ad7f
5 changed files with 190 additions and 40 deletions
4
CHANGES
4
CHANGES
|
|
@ -1,3 +1,7 @@
|
|||
2596. [bug] Stale tree nodes of cache/dynamic rbtdb could stay
|
||||
long, leading to inefficient memory usage or rejecting
|
||||
newer cache entries in the worst case. [RT #19563]
|
||||
|
||||
2595. [bug] Fix unknown extended rcodes in dig. [RT #19625]
|
||||
|
||||
2594. [func] Have rndc warn if using its default configuration
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
* PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
/* $Id: cache.c,v 1.83 2009/01/17 14:13:42 fdupont Exp $ */
|
||||
/* $Id: cache.c,v 1.84 2009/05/06 22:53:54 jinmei Exp $ */
|
||||
|
||||
/*! \file */
|
||||
|
||||
|
|
@ -186,6 +186,7 @@ dns_cache_create2(isc_mem_t *mctx, isc_taskmgr_t *taskmgr,
|
|||
isc_result_t result;
|
||||
dns_cache_t *cache;
|
||||
int i;
|
||||
isc_task_t *dbtask;
|
||||
|
||||
REQUIRE(cachep != NULL);
|
||||
REQUIRE(*cachep == NULL);
|
||||
|
|
@ -251,6 +252,14 @@ dns_cache_create2(isc_mem_t *mctx, isc_taskmgr_t *taskmgr,
|
|||
result = cache_create_db(cache, &cache->db);
|
||||
if (result != ISC_R_SUCCESS)
|
||||
goto cleanup_dbargv;
|
||||
if (taskmgr != NULL) {
|
||||
dbtask = NULL;
|
||||
result = isc_task_create(taskmgr, 1, &dbtask);
|
||||
if (result != ISC_R_SUCCESS)
|
||||
goto cleanup_db;
|
||||
dns_db_settask(cache->db, dbtask);
|
||||
isc_task_detach(&dbtask);
|
||||
}
|
||||
|
||||
cache->filename = NULL;
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
* PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
/* $Id: events.h,v 1.49 2007/06/19 23:47:16 tbox Exp $ */
|
||||
/* $Id: events.h,v 1.50 2009/05/06 22:53:54 jinmei Exp $ */
|
||||
|
||||
#ifndef DNS_EVENTS_H
|
||||
#define DNS_EVENTS_H 1
|
||||
|
|
@ -68,6 +68,7 @@
|
|||
#define DNS_EVENT_ACACHECONTROL (ISC_EVENTCLASS_DNS + 38)
|
||||
#define DNS_EVENT_ACACHECLEAN (ISC_EVENTCLASS_DNS + 39)
|
||||
#define DNS_EVENT_ACACHEOVERMEM (ISC_EVENTCLASS_DNS + 40)
|
||||
#define DNS_EVENT_RBTPRUNE (ISC_EVENTCLASS_DNS + 41)
|
||||
|
||||
#define DNS_EVENT_FIRSTEVENT (ISC_EVENTCLASS_DNS + 0)
|
||||
#define DNS_EVENT_LASTEVENT (ISC_EVENTCLASS_DNS + 65535)
|
||||
|
|
|
|||
206
lib/dns/rbtdb.c
206
lib/dns/rbtdb.c
|
|
@ -15,7 +15,7 @@
|
|||
* PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
/* $Id: rbtdb.c,v 1.275 2009/04/07 02:49:37 jinmei Exp $ */
|
||||
/* $Id: rbtdb.c,v 1.276 2009/05/06 22:53:54 jinmei Exp $ */
|
||||
|
||||
/*! \file */
|
||||
|
||||
|
|
@ -533,6 +533,7 @@ static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
|
|||
isc_stdtime_t now, isc_boolean_t tree_locked);
|
||||
static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
|
||||
rdatasetheader_t *newheader);
|
||||
static void prune_tree(isc_task_t *task, isc_event_t *event);
|
||||
|
||||
static dns_rdatasetmethods_t rdataset_methods = {
|
||||
rdataset_disassociate,
|
||||
|
|
@ -1551,7 +1552,7 @@ new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
|
|||
/*
|
||||
* This function is assumed to be called when a node is newly referenced
|
||||
* and can be in the deadnode list. In that case the node must be retrieved
|
||||
* from the list because the it is going to be used. In addition, if the caller
|
||||
* from the list because it is going to be used. In addition, if the caller
|
||||
* happens to hold a write lock on the tree, it's a good chance to purge dead
|
||||
* nodes.
|
||||
* Note: while a new reference is gained in multiple places, there are only very
|
||||
|
|
@ -1604,13 +1605,15 @@ reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
|
|||
static isc_boolean_t
|
||||
decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
|
||||
rbtdb_serial_t least_serial,
|
||||
isc_rwlocktype_t nlock, isc_rwlocktype_t tlock)
|
||||
isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
|
||||
isc_boolean_t pruning)
|
||||
{
|
||||
isc_result_t result;
|
||||
isc_boolean_t write_locked;
|
||||
rbtdb_nodelock_t *nodelock;
|
||||
unsigned int refs, nrefs;
|
||||
int bucket = node->locknum;
|
||||
isc_boolean_t no_reference;
|
||||
|
||||
nodelock = &rbtdb->node_locks[bucket];
|
||||
|
||||
|
|
@ -1693,6 +1696,7 @@ decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
|
|||
} else
|
||||
write_locked = ISC_TRUE;
|
||||
|
||||
no_reference = ISC_TRUE;
|
||||
if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
|
||||
/*
|
||||
* We can now delete the node if the reference counter is
|
||||
|
|
@ -1701,35 +1705,97 @@ decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
|
|||
* current thread locks the tree (e.g., in findnode()).
|
||||
*/
|
||||
|
||||
if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
|
||||
char printname[DNS_NAME_FORMATSIZE];
|
||||
/*
|
||||
* If this node is the only one in the level it's in, deleting
|
||||
* this node may recursively make its parent the only node in
|
||||
* the parent level; if so, and if no one is currently using
|
||||
* the parent node, this is almost the only opportunity to
|
||||
* clean it up. But the recursive cleanup is not that trivial
|
||||
* since the child and parent may be in different lock buckets,
|
||||
* which would cause a lock order reversal problem. To avoid
|
||||
* the trouble, we'll dispatch a separate event for batch
|
||||
* cleaning. We need to check whether we're deleting the node
|
||||
* as a result of pruning to avoid infinite dispatching.
|
||||
* Note: pruning happens only when a task has been set for the
|
||||
* rbtdb. If the user of the rbtdb chooses not to set a task,
|
||||
* it's their responsibility to purge stale leaves (e.g. by
|
||||
* periodic walk-through).
|
||||
*/
|
||||
if (!pruning && node->parent != NULL &&
|
||||
node->parent->down == node && node->left == NULL &&
|
||||
node->right == NULL && rbtdb->task != NULL) {
|
||||
isc_event_t *ev;
|
||||
dns_db_t *db;
|
||||
|
||||
isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
|
||||
DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
|
||||
"decrement_reference: "
|
||||
"delete from rbt: %p %s",
|
||||
node,
|
||||
dns_rbt_formatnodename(node, printname,
|
||||
sizeof(printname)));
|
||||
ev = isc_event_allocate(rbtdb->common.mctx, NULL,
|
||||
DNS_EVENT_RBTPRUNE,
|
||||
prune_tree, node,
|
||||
sizeof(isc_event_t));
|
||||
if (ev != NULL) {
|
||||
new_reference(rbtdb, node);
|
||||
db = NULL;
|
||||
attach((dns_db_t *)rbtdb, &db);
|
||||
ev->ev_sender = db;
|
||||
isc_task_send(rbtdb->task, &ev);
|
||||
no_reference = ISC_FALSE;
|
||||
} else {
|
||||
/*
|
||||
* XXX: this is a weird situation. We could
|
||||
* ignore this error case, but then the stale
|
||||
* node will unlikely be purged except via a
|
||||
* rare condition such as manual cleanup. So
|
||||
* we queue it in the deadnodes list, hoping
|
||||
* the memory shortage is temporary and the node
|
||||
* will be deleted later.
|
||||
*/
|
||||
isc_log_write(dns_lctx,
|
||||
DNS_LOGCATEGORY_DATABASE,
|
||||
DNS_LOGMODULE_CACHE,
|
||||
ISC_LOG_INFO,
|
||||
"decrement_reference: failed to "
|
||||
"allocate pruning event");
|
||||
INSIST(!ISC_LINK_LINKED(node, deadlink));
|
||||
ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
|
||||
deadlink);
|
||||
}
|
||||
} else {
|
||||
if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
|
||||
char printname[DNS_NAME_FORMATSIZE];
|
||||
|
||||
isc_log_write(dns_lctx,
|
||||
DNS_LOGCATEGORY_DATABASE,
|
||||
DNS_LOGMODULE_CACHE,
|
||||
ISC_LOG_DEBUG(1),
|
||||
"decrement_reference: "
|
||||
"delete from rbt: %p %s",
|
||||
node,
|
||||
dns_rbt_formatnodename(node,
|
||||
printname,
|
||||
sizeof(printname)));
|
||||
}
|
||||
|
||||
INSIST(!ISC_LINK_LINKED(node, deadlink));
|
||||
if (node->nsec3)
|
||||
result = dns_rbt_deletenode(rbtdb->nsec3, node,
|
||||
ISC_FALSE);
|
||||
else
|
||||
result = dns_rbt_deletenode(rbtdb->tree, node,
|
||||
ISC_FALSE);
|
||||
if (result != ISC_R_SUCCESS) {
|
||||
isc_log_write(dns_lctx,
|
||||
DNS_LOGCATEGORY_DATABASE,
|
||||
DNS_LOGMODULE_CACHE,
|
||||
ISC_LOG_WARNING,
|
||||
"decrement_reference: "
|
||||
"dns_rbt_deletenode: %s",
|
||||
isc_result_totext(result));
|
||||
}
|
||||
}
|
||||
|
||||
INSIST(!ISC_LINK_LINKED(node, deadlink));
|
||||
if (node->nsec3)
|
||||
result = dns_rbt_deletenode(rbtdb->nsec3, node,
|
||||
ISC_FALSE);
|
||||
else
|
||||
result = dns_rbt_deletenode(rbtdb->tree, node,
|
||||
ISC_FALSE);
|
||||
if (result != ISC_R_SUCCESS)
|
||||
isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
|
||||
DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
|
||||
"decrement_reference: "
|
||||
"dns_rbt_deletenode: %s",
|
||||
isc_result_totext(result));
|
||||
} else if (dns_rbtnode_refcurrent(node) == 0) {
|
||||
INSIST(!ISC_LINK_LINKED(node, deadlink));
|
||||
ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
|
||||
}
|
||||
} else
|
||||
no_reference = ISC_FALSE;
|
||||
|
||||
/* Restore the lock? */
|
||||
if (nlock == isc_rwlocktype_read)
|
||||
|
|
@ -1746,7 +1812,71 @@ decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
|
|||
if (write_locked)
|
||||
isc_rwlock_downgrade(&rbtdb->tree_lock);
|
||||
|
||||
return (ISC_TRUE);
|
||||
return (no_reference);
|
||||
}
|
||||
|
||||
/*
|
||||
* Prune the tree by recursively cleaning-up single leaves. In the worst
|
||||
* case, the number of iteration is the number of tree levels, which is at
|
||||
* most the maximum number of domain name labels, i.e, 127. In practice, this
|
||||
* should be much smaller (only a few times), and even the worst case would be
|
||||
* acceptable for a single event.
|
||||
*/
|
||||
static void
|
||||
prune_tree(isc_task_t *task, isc_event_t *event) {
|
||||
dns_rbtdb_t *rbtdb = event->ev_sender;
|
||||
dns_rbtnode_t *node = event->ev_arg;
|
||||
dns_rbtnode_t *parent;
|
||||
unsigned int locknum;
|
||||
|
||||
UNUSED(task);
|
||||
|
||||
isc_event_free(&event);
|
||||
|
||||
RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
|
||||
locknum = node->locknum;
|
||||
NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
|
||||
do {
|
||||
parent = node->parent;
|
||||
decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
|
||||
isc_rwlocktype_write, ISC_TRUE);
|
||||
|
||||
if (parent != NULL && parent->down == NULL) {
|
||||
/*
|
||||
* node was the only down child of the parent and has
|
||||
* just been removed. We'll then need to examine the
|
||||
* parent. Keep the lock if possible; otherwise,
|
||||
* release the old lock and acquire one for the parent.
|
||||
*/
|
||||
if (parent->locknum != locknum) {
|
||||
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
|
||||
isc_rwlocktype_write);
|
||||
locknum = parent->locknum;
|
||||
NODE_LOCK(&rbtdb->node_locks[locknum].lock,
|
||||
isc_rwlocktype_write);
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to gain a reference to the node before
|
||||
* decrementing it in the next iteration. In addition,
|
||||
* if the node is in the dead-nodes list, extract it
|
||||
* from the list beforehand as we do in
|
||||
* reactivate_node().
|
||||
*/
|
||||
new_reference(rbtdb, parent);
|
||||
if (ISC_LINK_LINKED(parent, deadlink)) {
|
||||
ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
|
||||
parent, deadlink);
|
||||
}
|
||||
} else
|
||||
parent = NULL;
|
||||
|
||||
node = parent;
|
||||
} while (node != NULL);
|
||||
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
|
||||
RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
|
||||
|
||||
detach((dns_db_t **)&rbtdb);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
|
@ -2163,8 +2293,8 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
|
|||
NODE_UNLOCK(lock, isc_rwlocktype_write);
|
||||
}
|
||||
decrement_reference(rbtdb, header->node, least_serial,
|
||||
isc_rwlocktype_write,
|
||||
isc_rwlocktype_none);
|
||||
isc_rwlocktype_write, isc_rwlocktype_none,
|
||||
ISC_FALSE);
|
||||
}
|
||||
|
||||
if (!EMPTY(cleanup_list)) {
|
||||
|
|
@ -2197,7 +2327,7 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
|
|||
rollback_node(rbtnode, serial);
|
||||
decrement_reference(rbtdb, rbtnode, least_serial,
|
||||
isc_rwlocktype_write,
|
||||
isc_rwlocktype_write);
|
||||
isc_rwlocktype_write, ISC_FALSE);
|
||||
|
||||
NODE_UNLOCK(lock, isc_rwlocktype_write);
|
||||
|
||||
|
|
@ -3747,7 +3877,8 @@ zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
|
|||
|
||||
NODE_LOCK(lock, isc_rwlocktype_read);
|
||||
decrement_reference(search.rbtdb, node, 0,
|
||||
isc_rwlocktype_read, isc_rwlocktype_none);
|
||||
isc_rwlocktype_read, isc_rwlocktype_none,
|
||||
ISC_FALSE);
|
||||
NODE_UNLOCK(lock, isc_rwlocktype_read);
|
||||
}
|
||||
|
||||
|
|
@ -4525,7 +4656,8 @@ cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
|
|||
|
||||
NODE_LOCK(lock, isc_rwlocktype_read);
|
||||
decrement_reference(search.rbtdb, node, 0,
|
||||
isc_rwlocktype_read, isc_rwlocktype_none);
|
||||
isc_rwlocktype_read, isc_rwlocktype_none,
|
||||
ISC_FALSE);
|
||||
NODE_UNLOCK(lock, isc_rwlocktype_read);
|
||||
}
|
||||
|
||||
|
|
@ -4743,7 +4875,7 @@ detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
|
|||
NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
|
||||
|
||||
if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
|
||||
isc_rwlocktype_none)) {
|
||||
isc_rwlocktype_none, ISC_FALSE)) {
|
||||
if (isc_refcount_current(&nodelock->references) == 0 &&
|
||||
nodelock->exiting) {
|
||||
inactive = ISC_TRUE;
|
||||
|
|
@ -7469,7 +7601,7 @@ dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
|
|||
lock = &rbtdb->node_locks[node->locknum].lock;
|
||||
NODE_LOCK(lock, isc_rwlocktype_read);
|
||||
decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
|
||||
rbtdbiter->tree_locked);
|
||||
rbtdbiter->tree_locked, ISC_FALSE);
|
||||
NODE_UNLOCK(lock, isc_rwlocktype_read);
|
||||
|
||||
rbtdbiter->node = NULL;
|
||||
|
|
@ -7510,7 +7642,7 @@ flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
|
|||
NODE_LOCK(lock, isc_rwlocktype_read);
|
||||
decrement_reference(rbtdb, node, 0,
|
||||
isc_rwlocktype_read,
|
||||
rbtdbiter->tree_locked);
|
||||
rbtdbiter->tree_locked, ISC_FALSE);
|
||||
NODE_UNLOCK(lock, isc_rwlocktype_read);
|
||||
}
|
||||
|
||||
|
|
@ -8446,6 +8578,6 @@ expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
|
|||
decrement_reference(rbtdb, header->node, 0,
|
||||
isc_rwlocktype_write,
|
||||
tree_locked ? isc_rwlocktype_write :
|
||||
isc_rwlocktype_none);
|
||||
isc_rwlocktype_none, ISC_FALSE);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
* PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
/* $Id: hash.c,v 1.13 2007/06/19 23:47:17 tbox Exp $ */
|
||||
/* $Id: hash.c,v 1.14 2009/05/06 22:53:54 jinmei Exp $ */
|
||||
|
||||
/*! \file
|
||||
* Some portion of this code was derived from universal hash function
|
||||
|
|
@ -293,6 +293,7 @@ static void
|
|||
destroy(isc_hash_t **hctxp) {
|
||||
isc_hash_t *hctx;
|
||||
isc_mem_t *mctx;
|
||||
unsigned char canary0[4], canary1[4];
|
||||
|
||||
REQUIRE(hctxp != NULL && *hctxp != NULL);
|
||||
hctx = *hctxp;
|
||||
|
|
@ -312,7 +313,10 @@ destroy(isc_hash_t **hctxp) {
|
|||
|
||||
DESTROYLOCK(&hctx->lock);
|
||||
|
||||
memcpy(canary0, hctx + 1, sizeof(canary0));
|
||||
memset(hctx, 0, sizeof(isc_hash_t));
|
||||
memcpy(canary1, hctx + 1, sizeof(canary1));
|
||||
INSIST(memcmp(canary0, canary1, sizeof(canary0)) == 0);
|
||||
isc_mem_put(mctx, hctx, sizeof(isc_hash_t));
|
||||
isc_mem_detach(&mctx);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue