Make rbtdb maintain stale counters

When updating the statistics for RRset types, if a header is marked
stale or ancient, the appropriate statistic counters are decremented,
then incremented.

Also fix some out of date comments.

(cherry picked from commit a3af2c57e7)
This commit is contained in:
Matthijs Mekking 2019-08-07 13:27:59 +02:00
parent 51a3ba45e1
commit a8b29e051e
4 changed files with 327 additions and 39 deletions

View file

@ -15,3 +15,4 @@ rm -f rndc.out.test*
rm -f */named.run */named.memstats
rm -f ns*/managed-keys.bind*
rm -f ns*/named_dump*
rm -f ns*/named.stats*

View file

@ -27,7 +27,7 @@ options {
listen-on { 10.53.0.1; };
listen-on-v6 { none; };
recursion yes;
max-stale-ttl 7200;
max-stale-ttl 35;
stale-answer-ttl 3;
stale-answer-enable yes;
};

View file

@ -31,6 +31,11 @@ n=0
#$DIG -p ${PORT} @10.53.0.2 nodata.example TXT
#$DIG -p ${PORT} @10.53.0.2 nxdomain.example TXT
#
# First test server with serve-stale options set.
#
echo_i "test server with serve-stale options set"
n=`expr $n + 1`
echo_i "prime cache longttl.example ($n)"
ret=0
@ -67,6 +72,22 @@ grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "verify prime cache statistics ($n)"
ret=0
rm -f ns1/named.stats
$RNDCCMD 10.53.0.1 stats > /dev/null 2>&1
[ -f ns1/named.stats ] || ret=1
cp ns1/named.stats ns1/named.stats.$n
# Check first 10 lines of Cache DB statistics. After prime queries, we expect
# two active TXT one nxrrset TXT, and one NXDOMAIN.
grep -A 10 "++ Cache DB RRsets ++" ns1/named.stats.$n > ns1/named.stats.$n.cachedb || ret=1
grep "2 TXT" ns1/named.stats.$n.cachedb > /dev/null || ret=1
grep "1 !TXT" ns1/named.stats.$n.cachedb > /dev/null || ret=1
grep "1 NXDOMAIN" ns1/named.stats.$n.cachedb > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "disable responses from authoritative server ($n)"
ret=0
@ -125,6 +146,24 @@ grep "example\..*2.*IN.*SOA" dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "verify stale cache statistics ($n)"
ret=0
rm -f ns1/named.stats
$RNDCCMD 10.53.0.1 stats > /dev/null 2>&1
[ -f ns1/named.stats ] || ret=1
cp ns1/named.stats ns1/named.stats.$n
# Check first 10 lines of Cache DB statistics. After serve-stale queries, we
# expect one active TXT RRset, one stale TXT, one stale nxrrset TXT, and one
# stale NXDOMAIN.
grep -A 10 "++ Cache DB RRsets ++" ns1/named.stats.$n > ns1/named.stats.$n.cachedb || ret=1
grep "1 TXT" ns1/named.stats.$n.cachedb > /dev/null || ret=1
grep "1 #TXT" ns1/named.stats.$n.cachedb > /dev/null || ret=1
grep "1 #!TXT" ns1/named.stats.$n.cachedb > /dev/null || ret=1
grep "1 #NXDOMAIN" ns1/named.stats.$n.cachedb > /dev/null || ret=1
status=`expr $status + $ret`
if [ $ret != 0 ]; then echo_i "failed"; fi
n=`expr $n + 1`
echo_i "running 'rndc serve-stale off' ($n)"
ret=0
@ -362,6 +401,12 @@ grep '_default: off (rndc) (stale-answer-ttl=2 max-stale-ttl=3600)' rndc.out.tes
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
#
# Update named.conf.
# Test server with low max-stale-ttl.
#
echo_i "test server with serve-stale options set, low max-stale-ttl"
n=`expr $n + 1`
echo_i "updating ns1/named.conf ($n)"
ret=0
@ -380,23 +425,7 @@ n=`expr $n + 1`
echo_i "check 'rndc serve-stale status' ($n)"
ret=0
$RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1
grep '_default: off (rndc) (stale-answer-ttl=3 max-stale-ttl=7200)' rndc.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "check 'rndc serve-stale' ($n)"
ret=0
$RNDCCMD 10.53.0.1 serve-stale > rndc.out.test$n 2>&1 && ret=1
grep "unexpected end of input" rndc.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "check 'rndc serve-stale unknown' ($n)"
ret=0
$RNDCCMD 10.53.0.1 serve-stale unknown > rndc.out.test$n 2>&1 && ret=1
grep "syntax error" rndc.out.test$n > /dev/null || ret=1
grep '_default: off (rndc) (stale-answer-ttl=3 max-stale-ttl=35)' rndc.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
@ -411,6 +440,14 @@ grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "check 'rndc serve-stale status' ($n)"
ret=0
$RNDCCMD 10.53.0.1 serve-stale status > rndc.out.test$n 2>&1 || ret=1
grep '_default: on (rndc) (stale-answer-ttl=3 max-stale-ttl=35)' rndc.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "enable responses from authoritative server ($n)"
ret=0
@ -420,6 +457,171 @@ grep "TXT.\"1\"" dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
sleep 1
n=`expr $n + 1`
echo_i "prime cache longttl.example (low max-stale-ttl) ($n)"
ret=0
$DIG -p ${PORT} @10.53.0.1 longttl.example TXT > dig.out.test$n
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "prime cache data.example (low max-stale-ttl) ($n)"
ret=0
$DIG -p ${PORT} @10.53.0.1 data.example TXT > dig.out.test$n
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "prime cache nodata.example (low max-stale-ttl) ($n)"
ret=0
$DIG -p ${PORT} @10.53.0.1 nodata.example TXT > dig.out.test$n
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "prime cache nxdomain.example (low max-stale-ttl) ($n)"
ret=0
$DIG -p ${PORT} @10.53.0.1 nxdomain.example TXT > dig.out.test$n
grep "status: NXDOMAIN" dig.out.test$n > /dev/null || ret=1
grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "verify prime cache statistics (low max-stale-ttl) ($n)"
ret=0
rm -f ns1/named.stats
$RNDCCMD 10.53.0.1 stats > /dev/null 2>&1
[ -f ns1/named.stats ] || ret=1
cp ns1/named.stats ns1/named.stats.$n
# Check first 10 lines of Cache DB statistics. After prime queries, we expect
# two active TXT RRsets, one nxrrset TXT, and one NXDOMAIN.
grep -A 10 "++ Cache DB RRsets ++" ns1/named.stats.$n > ns1/named.stats.$n.cachedb || ret=1
grep "2 TXT" ns1/named.stats.$n.cachedb > /dev/null || ret=1
grep "1 !TXT" ns1/named.stats.$n.cachedb > /dev/null || ret=1
grep "1 NXDOMAIN" ns1/named.stats.$n.cachedb > /dev/null || ret=1
status=`expr $status + $ret`
if [ $ret != 0 ]; then echo_i "failed"; fi
n=`expr $n + 1`
echo_i "disable responses from authoritative server ($n)"
ret=0
$DIG -p ${PORT} @10.53.0.2 txt disable > dig.out.test$n
grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1
grep "TXT.\"0\"" dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
sleep 1
n=`expr $n + 1`
echo_i "check stale data.example (low max-stale-ttl) ($n)"
ret=0
$DIG -p ${PORT} @10.53.0.1 data.example TXT > dig.out.test$n
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1
grep "data\.example\..*3.*IN.*TXT.*A text record with a 1 second ttl" dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "check stale nodata.example (low max-stale-ttl) ($n)"
ret=0
$DIG -p ${PORT} @10.53.0.1 nodata.example TXT > dig.out.test$n
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1
grep "example\..*3.*IN.*SOA" dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "check stale nxdomain.example (low max-stale-ttl) ($n)"
ret=0
$DIG -p ${PORT} @10.53.0.1 nxdomain.example TXT > dig.out.test$n
grep "status: NXDOMAIN" dig.out.test$n > /dev/null || ret=1
grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1
grep "example\..*3.*IN.*SOA" dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "verify stale cache statistics (low max-stale-ttl) ($n)"
ret=0
rm -f ns1/named.stats
$RNDCCMD 10.53.0.1 stats > /dev/null 2>&1
[ -f ns1/named.stats ] || ret=1
cp ns1/named.stats ns1/named.stats.$n
# Check first 10 lines of Cache DB statistics. After serve-stale queries, we
# expect one active TXT RRset, one stale TXT, one stale nxrrset TXT, and one
# stale NXDOMAIN.
grep -A 10 "++ Cache DB RRsets ++" ns1/named.stats.$n > ns1/named.stats.$n.cachedb || ret=1
grep "1 TXT" ns1/named.stats.$n.cachedb > /dev/null || ret=1
grep "1 #TXT" ns1/named.stats.$n.cachedb > /dev/null || ret=1
grep "1 #!TXT" ns1/named.stats.$n.cachedb > /dev/null || ret=1
grep "1 #NXDOMAIN" ns1/named.stats.$n.cachedb > /dev/null || ret=1
status=`expr $status + $ret`
if [ $ret != 0 ]; then echo_i "failed"; fi
sleep 1
n=`expr $n + 1`
echo_i "check ancient data.example (low max-stale-ttl) ($n)"
ret=0
$DIG -p ${PORT} @10.53.0.1 data.example TXT > dig.out.test$n
grep "status: SERVFAIL" dig.out.test$n > /dev/null || ret=1
grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "check ancient nodata.example (low max-stale-ttl) ($n)"
ret=0
$DIG -p ${PORT} @10.53.0.1 nodata.example TXT > dig.out.test$n
grep "status: SERVFAIL" dig.out.test$n > /dev/null || ret=1
grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "check ancient nxdomain.example (low max-stale-ttl) ($n)"
ret=0
$DIG -p ${PORT} @10.53.0.1 nxdomain.example TXT > dig.out.test$n
grep "status: SERVFAIL" dig.out.test$n > /dev/null || ret=1
grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
#
# Now test server with no serve-stale options set.
#
echo_i "test server with no serve-stale options set"
n=`expr $n + 1`
echo_i "enable responses from authoritative server ($n)"
ret=0
$DIG -p ${PORT} @10.53.0.2 txt enable > dig.out.test$n
grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1
grep "TXT.\"1\"" dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "prime cache longttl.example (max-stale-ttl default) ($n)"
ret=0
$DIG -p ${PORT} @10.53.0.3 longttl.example TXT > dig.out.test$n
grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1
grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "prime cache data.example (max-stale-ttl default) ($n)"
ret=0
@ -459,6 +661,22 @@ grep "TXT.\"0\"" dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "verify prime cache statistics (max-stale-ttl default) ($n)"
ret=0
rm -f ns3/named.stats
$RNDCCMD 10.53.0.3 stats > /dev/null 2>&1
[ -f ns3/named.stats ] || ret=1
cp ns3/named.stats ns3/named.stats.$n
# Check first 10 lines of Cache DB statistics. After prime queries, we expect
# two active TXT RRsets, one nxrrset TXT, and one NXDOMAIN.
grep -A 10 "++ Cache DB RRsets ++" ns3/named.stats.$n > ns3/named.stats.$n.cachedb || ret=1
grep "2 TXT" ns3/named.stats.$n.cachedb > /dev/null || ret=1
grep "1 !TXT" ns3/named.stats.$n.cachedb > /dev/null || ret=1
grep "1 NXDOMAIN" ns3/named.stats.$n.cachedb > /dev/null || ret=1
status=`expr $status + $ret`
if [ $ret != 0 ]; then echo_i "failed"; fi
sleep 1
n=`expr $n + 1`
@ -496,6 +714,24 @@ grep "ANSWER: 0," dig.out.test$n > /dev/null || ret=1
if [ $ret != 0 ]; then echo_i "failed"; fi
status=`expr $status + $ret`
n=`expr $n + 1`
echo_i "verify stale cache statistics (max-stale-ttl default) ($n)"
ret=0
rm -f ns3/named.stats
$RNDCCMD 10.53.0.3 stats > /dev/null 2>&1
[ -f ns3/named.stats ] || ret=1
cp ns3/named.stats ns3/named.stats.$n
# Check first 10 lines of Cache DB statistics. After last queries, we expect
# one active TXT RRset, one stale TXT, one stale nxrrset TXT, and one
# stale NXDOMAIN.
grep -A 10 "++ Cache DB RRsets ++" ns3/named.stats.$n > ns3/named.stats.$n.cachedb || ret=1
grep "1 TXT" ns3/named.stats.$n.cachedb > /dev/null || ret=1
grep "1 #TXT" ns3/named.stats.$n.cachedb > /dev/null || ret=1
grep "1 #!TXT" ns3/named.stats.$n.cachedb > /dev/null || ret=1
grep "1 #NXDOMAIN" ns3/named.stats.$n.cachedb > /dev/null || ret=1
status=`expr $status + $ret`
if [ $ret != 0 ]; then echo_i "failed"; fi
n=`expr $n + 1`
echo_i "check 'rndc serve-stale on' ($n)"
ret=0

View file

@ -196,7 +196,7 @@ typedef struct rdatasetheader {
rbtdb_serial_t serial;
dns_ttl_t rdh_ttl;
rbtdb_rdatatype_t type;
uint16_t attributes;
uint16_t attributes;
dns_trust_t trust;
struct noqname *noqname;
struct noqname *closest;
@ -833,17 +833,23 @@ update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
base = RBTDB_RDATATYPE_EXT(header->type);
}
} else
} else {
base = RBTDB_RDATATYPE_BASE(header->type);
}
if (STALE(header))
if (STALE(header)) {
statattributes |= DNS_RDATASTATSTYPE_ATTR_STALE;
}
if (ANCIENT(header)) {
statattributes |= DNS_RDATASTATSTYPE_ATTR_ANCIENT;
}
type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
if (increment)
if (increment) {
dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
else
} else {
dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
}
}
static void
@ -1520,24 +1526,68 @@ rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
static inline void
mark_header_ancient(dns_rbtdb_t *rbtdb, rdatasetheader_t *header) {
bool do_stats = false;
/*
* If we are already ancient there is nothing to do.
*/
if (ANCIENT(header))
if (ANCIENT(header)) {
return;
}
if ((header->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
do_stats = EXISTS(header);
}
if (do_stats) {
/*
* Decrement the stats counter for the appropriate RRtype.
* If the STALE attribute is set, this will decrement the
* stale type counter, otherwise it decrements the active
* stats type counter.
*/
update_rrsetstats(rbtdb, header, false);
}
header->attributes |= RDATASET_ATTR_ANCIENT;
header->node->dirty = 1;
/*
* If we have not been counted then there is nothing to do.
*/
if ((header->attributes & RDATASET_ATTR_STATCOUNT) == 0)
return;
if (EXISTS(header))
if (do_stats) {
/* Increment the stats counter for the ancient RRtype. */
update_rrsetstats(rbtdb, header, true);
}
}
static inline void
mark_header_stale(dns_rbtdb_t *rbtdb, rdatasetheader_t *header) {
bool do_stats = false;
/*
* If we are already stale there is nothing to do.
*/
if (STALE(header)) {
return;
}
if ((header->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
do_stats = EXISTS(header);
}
if (do_stats) {
/* Decrement the stats counter for the appropriate RRtype.
* If the ANCIENT attribute is set (although it is very
* unlikely that an RRset goes from ANCIENT to STALE), this
* will decrement the ancient stale type counter, otherwise it
* decrements the active stats type counter.
*/
update_rrsetstats(rbtdb, header, false);
}
header->attributes |= RDATASET_ATTR_STALE;
if (do_stats) {
update_rrsetstats(rbtdb, header, true);
}
}
static inline void
@ -4333,7 +4383,7 @@ check_stale_header(dns_rbtnode_t *node, rdatasetheader_t *header,
* skip this record.
*/
if (KEEPSTALE(search->rbtdb) && stale > search->now) {
header->attributes |= RDATASET_ATTR_STALE;
mark_header_stale(search->rbtdb, header);
*header_prev = header;
return ((search->options & DNS_DBFIND_STALEOK) == 0);
}
@ -4341,7 +4391,7 @@ check_stale_header(dns_rbtnode_t *node, rdatasetheader_t *header,
/*
* This rdataset is stale. If no one else is using the
* node, we can clean it up right now, otherwise we mark
* it as stale, and the node as dirty, so it will get
* it as ancient, and the node as dirty, so it will get
* cleaned up later.
*/
if ((header->rdh_ttl < search->now - RBTDB_VIRTUAL) &&
@ -5302,7 +5352,8 @@ expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
mark_header_ancient(rbtdb, header);
if (log)
isc_log_write(dns_lctx, category, module,
level, "overmem cache: stale %s",
level,
"overmem cache: ancient %s",
printname);
} else if (force_expire) {
if (! RETAIN(header)) {
@ -5841,7 +5892,7 @@ add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
* which covers all types (NXDOMAIN,
* NODATA(QTYPE=ANY)),
*
* We make all other data stale so that the
* We make all other data ancient so that the
* only rdataset that can be found at this
* node is the negative cache entry.
*/
@ -5856,7 +5907,7 @@ add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
}
/*
* Otherwise look for any RRSIGs of the given
* type so they can be marked stale later.
* type so they can be marked ancient later.
*/
for (topheader = rbtnode->data;
topheader != NULL;
@ -5868,9 +5919,9 @@ add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
/*
* We're adding something that isn't a
* negative cache entry. Look for an extant
* non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
* non-ancient NXDOMAIN/NODATA(QTYPE=ANY) negative
* cache entry. If we're adding an RRSIG, also
* check for an extant non-stale NODATA ncache
* check for an extant non-ancient NODATA ncache
* entry which covers the same type as the RRSIG.
*/
for (topheader = rbtnode->data;
@ -6558,7 +6609,7 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
* If we're adding a delegation type, adding to the auxiliary NSEC tree,
* or the DB is a cache in an overmem state, hold an exclusive lock on
* the tree. In the latter case the lock does not necessarily have to
* be acquired but it will help purge stale entries more effectively.
* be acquired but it will help purge ancient entries more effectively.
*/
if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
cache_is_overmem = true;