From 2a5e0232ed56233ae047fe1c31fded0d1ea96d77 Mon Sep 17 00:00:00 2001 From: Matthijs Mekking Date: Wed, 10 Mar 2021 12:09:16 +0100 Subject: [PATCH 1/3] Fix nonsensical stale TTL values in cache dump When introducing change 5149, "rndc dumpdb" started to print a line above a stale RRset, indicating how long the data will be retained. At that time, I thought it should also be possible to load a cache from file. But if a TTL has a value of 0 (because it is stale), stale entries wouldn't be loaded from file. So, I added the 'max-stale-ttl' to TTL values, and adjusted the $DATE accordingly. Since we actually don't have a "load cache from file" feature, this is premature and is causing confusion at operators. This commit changes the 'max-stale-ttl' adjustments. A check in the serve-stale system test is added for a non-stale RRset (longttl.example) to make sure the TTL in cache is sensible. Also, the comment above stale RRsets could have nonsensical values. A possible reason why this may happen is when the RRset was marked a stale but the 'max-stale-ttl' has passed (and is actually an RRset awaiting cleanup). This would lead to the "will be retained" value to be negative (but since it is stored in an uint32_t, you would get a nonsensical value (e.g. 4294362497). To mitigate against this, we now also check if the header is not ancient. In addition we check if the stale_ttl would be negative, and if so we set it to 0. Most likely this will not happen because the header would already have been marked ancient, but there is a possible race condition where the 'rdh_ttl + serve_stale_ttl' has passed, but the header has not been checked for staleness. --- CHANGES | 7 +++++++ bin/tests/system/serve-stale/tests.sh | 16 +++++++++++++--- doc/notes/notes-current.rst | 6 ++++++ lib/dns/masterdump.c | 9 +-------- lib/dns/rbtdb.c | 11 ++++++++--- 5 files changed, 35 insertions(+), 14 deletions(-) diff --git a/CHANGES b/CHANGES index a7acefc77d..51eb7c80e8 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,10 @@ +5618. [bug] When introducing change 5149, "rndc dumpdb" started + to print a line above a stale RRset, indicating how + long the data will be retained. Also, TTLs were + increased with 'max-stale-ttl'. This could lead to + nonsensical values and both issues have been fixed. + [GL #389] [GL #2289] + 5617. [placeholder] 5616. [placeholder] diff --git a/bin/tests/system/serve-stale/tests.sh b/bin/tests/system/serve-stale/tests.sh index 2205962c7d..f16aaf0844 100755 --- a/bin/tests/system/serve-stale/tests.sh +++ b/bin/tests/system/serve-stale/tests.sh @@ -107,9 +107,10 @@ sleep 2 echo_i "sending queries for tests $((n+1))-$((n+4))..." $DIG -p ${PORT} @10.53.0.1 data.example TXT > dig.out.test$((n+1)) & -$DIG -p ${PORT} @10.53.0.1 othertype.example CAA > dig.out.test$((n+2)) & -$DIG -p ${PORT} @10.53.0.1 nodata.example TXT > dig.out.test$((n+3)) & -$DIG -p ${PORT} @10.53.0.1 nxdomain.example TXT > dig.out.test$((n+4)) +$DIG -p ${PORT} @10.53.0.1 longttl.example TXT > dig.out.test$((n+2)) & +$DIG -p ${PORT} @10.53.0.1 othertype.example CAA > dig.out.test$((n+3)) & +$DIG -p ${PORT} @10.53.0.1 nodata.example TXT > dig.out.test$((n+4)) & +$DIG -p ${PORT} @10.53.0.1 nxdomain.example TXT > dig.out.test$((n+5)) wait @@ -135,6 +136,15 @@ awk '/; answer/ { x=$0; getline; print x, $0}' ns1/named_dump.db.test$n | if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) +n=$((n+1)) +echo_i "check non-stale longttl.example ($n)" +ret=0 +grep "status: NOERROR" dig.out.test$n > /dev/null || ret=1 +grep "ANSWER: 1," dig.out.test$n > /dev/null || ret=1 +grep "longttl\.example\..*59[0-9].*IN.*TXT.*A text record with a 600 second ttl" dig.out.test$n > /dev/null || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + n=$((n+1)) echo_i "check stale othertype.example ($n)" ret=0 diff --git a/doc/notes/notes-current.rst b/doc/notes/notes-current.rst index cd3af2cb7b..4098e8ea0a 100644 --- a/doc/notes/notes-current.rst +++ b/doc/notes/notes-current.rst @@ -80,3 +80,9 @@ Bug Fixes state. (Other zone journal files were not affected.) This has been fixed. If a corrupt journal file is detected, ``named`` can now recover from it. [GL #2600] + +- When dumping the cache to file, TTLs were being increased with + ``max-stale-ttl``. Also the comment above stale RRsets could have nonsensical + values if the RRset was still marked a stale but the ``max-stale-ttl`` has + passed (and is actually an RRset awaiting cleanup). Both issues have now + been fixed. [GL #389] [GL #2289] diff --git a/lib/dns/masterdump.c b/lib/dns/masterdump.c index edd4fcaaa0..8a4eb4de47 100644 --- a/lib/dns/masterdump.c +++ b/lib/dns/masterdump.c @@ -1111,8 +1111,7 @@ again: fprintf(f, "; stale (will be retained for %u more " "seconds)\n", - (rds->stale_ttl - - ctx->serve_stale_ttl)); + rds->stale_ttl); } else if (ANCIENT(rds)) { isc_buffer_t b; char buf[sizeof("YYYYMMDDHHMMSS")]; @@ -1591,14 +1590,8 @@ dumpctx_create(isc_mem_t *mctx, dns_db_t *db, dns_dbversion_t *version, dctx->do_date = dns_db_iscache(dctx->db); if (dctx->do_date) { - /* - * Adjust the date backwards by the serve-stale TTL, if any. - * This is so the TTL will be loaded correctly when next - * started. - */ (void)dns_db_getservestalettl(dctx->db, &dctx->tctx.serve_stale_ttl); - dctx->now -= dctx->tctx.serve_stale_ttl; } if (dctx->format == dns_masterformat_text && diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index a18fcceaa5..8af4e10e48 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -3123,13 +3123,18 @@ bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, rdatasetheader_t *header, if (PREFETCH(header)) { rdataset->attributes |= DNS_RDATASETATTR_PREFETCH; } - if (STALE(header)) { + if (STALE(header) && !ANCIENT(header)) { + dns_ttl_t stale = header->rdh_ttl + rbtdb->serve_stale_ttl; + if (stale > now) { + stale = stale - now; + } else { + stale = 0; + } if (STALE_WINDOW(header)) { rdataset->attributes |= DNS_RDATASETATTR_STALE_WINDOW; } rdataset->attributes |= DNS_RDATASETATTR_STALE; - rdataset->stale_ttl = - (rbtdb->serve_stale_ttl + header->rdh_ttl) - now; + rdataset->stale_ttl = stale; rdataset->ttl = 0; } else if (IS_CACHE(rbtdb) && !ACTIVE(header, now)) { rdataset->attributes |= DNS_RDATASETATTR_ANCIENT; From debee6157b7d78a8aba60159777a7025374edce7 Mon Sep 17 00:00:00 2001 From: Matthijs Mekking Date: Fri, 12 Mar 2021 15:12:27 +0100 Subject: [PATCH 2/3] Check staleness in bind_rdataset Before binding an RRset, check the time and see if this record is stale (or perhaps even ancient). Marking a header stale or ancient happens only when looking up an RRset in cache, but binding an RRset can also happen on other occasions (for example when dumping the database). Check the time and compare it to the header. If according to the time the entry is stale, but not ancient, set the STALE attribute. If according to the time is ancient, set the ANCIENT attribute. We could mark the header stale or ancient here, but that requires locking, so that's why we only compare the current time against the rdh_ttl. Adjust the test to check the dump-db before querying for data. In the dumped file the entry should be marked as stale, despite no cache lookup happened since the initial query. --- bin/tests/system/serve-stale/tests.sh | 26 +++++++++---------- lib/dns/rbtdb.c | 37 ++++++++++++++++++++++----- 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/bin/tests/system/serve-stale/tests.sh b/bin/tests/system/serve-stale/tests.sh index f16aaf0844..62f6bf8bec 100755 --- a/bin/tests/system/serve-stale/tests.sh +++ b/bin/tests/system/serve-stale/tests.sh @@ -105,6 +105,19 @@ status=$((status+ret)) sleep 2 +# Run rndc dumpdb, test whether the stale data has correct comment printed. +# The max-stale-ttl is 3600 seconds, so the comment should say the data is +# stale for somewhere between 3500-3599 seconds. +echo_i "check rndc dump stale data.example ($n)" +rndc_dumpdb ns1 || ret=1 +awk '/; stale/ { x=$0; getline; print x, $0}' ns1/named_dump.db.test$n | + grep "; stale (will be retained for 3[56].. more seconds) data\.example.*A text record with a 2 second ttl" > /dev/null 2>&1 || ret=1 +# Also make sure the not expired data does not have a stale comment. +awk '/; answer/ { x=$0; getline; print x, $0}' ns1/named_dump.db.test$n | + grep "; answer longttl\.example.*A text record with a 600 second ttl" > /dev/null 2>&1 || ret=1 +if [ $ret != 0 ]; then echo_i "failed"; fi +status=$((status+ret)) + echo_i "sending queries for tests $((n+1))-$((n+4))..." $DIG -p ${PORT} @10.53.0.1 data.example TXT > dig.out.test$((n+1)) & $DIG -p ${PORT} @10.53.0.1 longttl.example TXT > dig.out.test$((n+2)) & @@ -123,19 +136,6 @@ grep "data\.example\..*4.*IN.*TXT.*A text record with a 2 second ttl" dig.out.te if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) -# Run rndc dumpdb, test whether the stale data has correct comment printed. -# The max-stale-ttl is 3600 seconds, so the comment should say the data is -# stale for somewhere between 3500-3599 seconds. -echo_i "check rndc dump stale data.example ($n)" -rndc_dumpdb ns1 || ret=1 -awk '/; stale/ { x=$0; getline; print x, $0}' ns1/named_dump.db.test$n | - grep "; stale (will be retained for 35.. more seconds) data\.example.*A text record with a 2 second ttl" > /dev/null 2>&1 || ret=1 -# Also make sure the not expired data does not have a stale comment. -awk '/; answer/ { x=$0; getline; print x, $0}' ns1/named_dump.db.test$n | - grep "; answer longttl\.example.*A text record with a 600 second ttl" > /dev/null 2>&1 || ret=1 -if [ $ret != 0 ]; then echo_i "failed"; fi -status=$((status+ret)) - n=$((n+1)) echo_i "check non-stale longttl.example ($n)" ret=0 diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index 8af4e10e48..f30d560e68 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -3088,6 +3088,8 @@ bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, rdatasetheader_t *header, isc_stdtime_t now, isc_rwlocktype_t locktype, dns_rdataset_t *rdataset) { unsigned char *raw; /* RDATASLAB */ + bool stale = STALE(header); + bool ancient = ANCIENT(header); /* * Caller must be holding the node reader lock. @@ -3105,6 +3107,29 @@ bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, rdatasetheader_t *header, INSIST(rdataset->methods == NULL); /* We must be disassociated. */ + /* + * Mark header stale or ancient if the RRset is no longer active. + */ + if (!ACTIVE(header, now)) { + dns_ttl_t stale_ttl = header->rdh_ttl + rbtdb->serve_stale_ttl; + /* + * If this data is in the stale window keep it and if + * DNS_DBFIND_STALEOK is not set we tell the caller to + * skip this record. We skip the records with ZEROTTL + * (these records should not be cached anyway). + */ + + if (KEEPSTALE(rbtdb) && stale_ttl > now) { + stale = true; + } else { + /* + * We are not keeping stale, or it is outside the + * stale window. Mark ancient, i.e. ready for cleanup. + */ + ancient = true; + } + } + rdataset->methods = &rdataset_methods; rdataset->rdclass = rbtdb->common.rdclass; rdataset->type = RBTDB_RDATATYPE_BASE(header->type); @@ -3123,18 +3148,18 @@ bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, rdatasetheader_t *header, if (PREFETCH(header)) { rdataset->attributes |= DNS_RDATASETATTR_PREFETCH; } - if (STALE(header) && !ANCIENT(header)) { - dns_ttl_t stale = header->rdh_ttl + rbtdb->serve_stale_ttl; - if (stale > now) { - stale = stale - now; + if (stale && !ancient) { + dns_ttl_t stale_ttl = header->rdh_ttl + rbtdb->serve_stale_ttl; + if (stale_ttl > now) { + stale_ttl = stale_ttl - now; } else { - stale = 0; + stale_ttl = 0; } if (STALE_WINDOW(header)) { rdataset->attributes |= DNS_RDATASETATTR_STALE_WINDOW; } rdataset->attributes |= DNS_RDATASETATTR_STALE; - rdataset->stale_ttl = stale; + rdataset->stale_ttl = stale_ttl; rdataset->ttl = 0; } else if (IS_CACHE(rbtdb) && !ACTIVE(header, now)) { rdataset->attributes |= DNS_RDATASETATTR_ANCIENT; From a83c8cb0afd88d54b9cf67239f2495c9b0391e97 Mon Sep 17 00:00:00 2001 From: Matthijs Mekking Date: Fri, 12 Mar 2021 16:27:45 +0100 Subject: [PATCH 3/3] Use stale TTL as RRset TTL in dumpdb It is more intuitive to have the countdown 'max-stale-ttl' as the RRset TTL, instead of 0 TTL. This information was already available in a comment "; stale (will be retained for x more seconds", but Support suggested to put it in the TTL field instead. --- bin/tests/system/serve-stale/tests.sh | 4 ++-- lib/dns/masterdump.c | 5 +---- lib/dns/rbtdb.c | 2 +- lib/dns/tests/db_test.c | 1 - 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/bin/tests/system/serve-stale/tests.sh b/bin/tests/system/serve-stale/tests.sh index 62f6bf8bec..4a83e35ac6 100755 --- a/bin/tests/system/serve-stale/tests.sh +++ b/bin/tests/system/serve-stale/tests.sh @@ -111,10 +111,10 @@ sleep 2 echo_i "check rndc dump stale data.example ($n)" rndc_dumpdb ns1 || ret=1 awk '/; stale/ { x=$0; getline; print x, $0}' ns1/named_dump.db.test$n | - grep "; stale (will be retained for 3[56].. more seconds) data\.example.*A text record with a 2 second ttl" > /dev/null 2>&1 || ret=1 + grep "; stale data\.example.*3[56]...*TXT.*A text record with a 2 second ttl" > /dev/null 2>&1 || ret=1 # Also make sure the not expired data does not have a stale comment. awk '/; answer/ { x=$0; getline; print x, $0}' ns1/named_dump.db.test$n | - grep "; answer longttl\.example.*A text record with a 600 second ttl" > /dev/null 2>&1 || ret=1 + grep "; answer longttl\.example.*[56]...*TXT.*A text record with a 600 second ttl" > /dev/null 2>&1 || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi status=$((status+ret)) diff --git a/lib/dns/masterdump.c b/lib/dns/masterdump.c index 8a4eb4de47..c8dcce5aa2 100644 --- a/lib/dns/masterdump.c +++ b/lib/dns/masterdump.c @@ -1108,10 +1108,7 @@ again: } else { isc_result_t result; if (STALE(rds)) { - fprintf(f, - "; stale (will be retained for %u more " - "seconds)\n", - rds->stale_ttl); + fprintf(f, "; stale\n"); } else if (ANCIENT(rds)) { isc_buffer_t b; char buf[sizeof("YYYYMMDDHHMMSS")]; diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index f30d560e68..f9c93416ed 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -3160,7 +3160,7 @@ bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, rdatasetheader_t *header, } rdataset->attributes |= DNS_RDATASETATTR_STALE; rdataset->stale_ttl = stale_ttl; - rdataset->ttl = 0; + rdataset->ttl = stale_ttl; } else if (IS_CACHE(rbtdb) && !ACTIVE(header, now)) { rdataset->attributes |= DNS_RDATASETATTR_ANCIENT; rdataset->stale_ttl = header->rdh_ttl; diff --git a/lib/dns/tests/db_test.c b/lib/dns/tests/db_test.c index c12e90ee1c..e41a63c0cf 100644 --- a/lib/dns/tests/db_test.c +++ b/lib/dns/tests/db_test.c @@ -248,7 +248,6 @@ dns_dbfind_staleok_test(void **state) { count++; assert_in_range(count, 0, 49); /* loop sanity */ assert_int_equal(result, ISC_R_SUCCESS); - assert_int_equal(rdataset.ttl, 0); assert_int_equal(rdataset.attributes & DNS_RDATASETATTR_STALE, DNS_RDATASETATTR_STALE);