From 46480a4b475621904496c33880ee6ead151023d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= Date: Thu, 14 Feb 2019 10:41:56 +0100 Subject: [PATCH 1/3] Fix serial number used in zone verification checks Due to the way the "mirror" system test is set up, it is impossible for the "verify-unsigned" and "verify-untrusted" zones to contain any serial number other than the original one present in ns2/verify.db.in. Thus, using presence of a different serial number in the SOA records of these zones as an indicator of problems with mirror zone verification is wrong. Look for the original zone serial number instead as that is the one that will be returned by ns3 if one of the aforementioned zones is successfully verified. --- bin/tests/system/mirror/tests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/tests/system/mirror/tests.sh b/bin/tests/system/mirror/tests.sh index ce35f67dc0..9913fee57c 100644 --- a/bin/tests/system/mirror/tests.sh +++ b/bin/tests/system/mirror/tests.sh @@ -61,7 +61,7 @@ echo_i "checking that an unsigned mirror zone is rejected ($n)" ret=0 wait_for_transfer verify-unsigned $DIG $DIGOPTS @10.53.0.3 +norec verify-unsigned SOA > dig.out.ns3.test$n 2>&1 || ret=1 -grep "${UPDATED_SERIAL_BAD}.*; serial" dig.out.ns3.test$n > /dev/null && ret=1 +grep "${ORIGINAL_SERIAL}.*; serial" dig.out.ns3.test$n > /dev/null && ret=1 nextpartpeek ns3/named.run | grep "verify-unsigned.*Zone contains no DNSSEC keys" > /dev/null || ret=1 nextpartpeek ns3/named.run | grep "verify-unsigned.*mirror zone is now in use" > /dev/null && ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi @@ -73,7 +73,7 @@ ret=0 nextpartreset ns3/named.run wait_for_transfer verify-untrusted $DIG $DIGOPTS @10.53.0.3 +norec verify-untrusted SOA > dig.out.ns3.test$n 2>&1 || ret=1 -grep "${UPDATED_SERIAL_BAD}.*; serial" dig.out.ns3.test$n > /dev/null && ret=1 +grep "${ORIGINAL_SERIAL}.*; serial" dig.out.ns3.test$n > /dev/null && ret=1 nextpartpeek ns3/named.run | grep "verify-untrusted.*No trusted KSK DNSKEY found" > /dev/null || ret=1 nextpartpeek ns3/named.run | grep "verify-untrusted.*mirror zone is now in use" > /dev/null && ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi From 2cbf1028e9ebbd694b3ce1660cb0cbd0a246fd0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= Date: Thu, 14 Feb 2019 10:41:56 +0100 Subject: [PATCH 2/3] Improve reliability of zone verification checks In the "mirror" system test, ns3 periodically sends trust anchor telemetry queries to ns1 and ns2. It may thus happen that for some non-recursive queries for names inside mirror zones which are not yet loaded, ns3 will be able to synthesize a negative answer from the cached records it obtained from trust anchor telemetry responses. In such cases, NXDOMAIN responses will be sent with the root zone SOA in the AUTHORITY section. Since the root zone used in the "mirror" system test has the same serial number as ns2/verify.db.in and zone verification checks look for the specified serial numbers anywhere in the answer, the test could be broken if different zone names were used. The +noauth dig option could be used to address this weakness, but that would prevent entire responses from being stored for later inspection, which in turn would hamper troubleshooting test failures. Instead, use a different serial number for ns2/verify.db.in than for any other zone used in the "mirror" system test and check the number of records in the ANSWER section of each response. --- bin/tests/system/mirror/ns1/root.db.in | 2 +- .../system/mirror/ns2/initially-unavailable.db.in | 2 +- bin/tests/system/mirror/tests.sh | 12 ++++++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/bin/tests/system/mirror/ns1/root.db.in b/bin/tests/system/mirror/ns1/root.db.in index 65af796574..ebd4b4da0c 100644 --- a/bin/tests/system/mirror/ns1/root.db.in +++ b/bin/tests/system/mirror/ns1/root.db.in @@ -8,7 +8,7 @@ ; information regarding copyright ownership. $TTL 3600 -@ SOA a.root-servers.nil. hostmaster 2000010100 3600 1200 604800 3600 +@ SOA a.root-servers.nil. hostmaster 1 3600 1200 604800 3600 @ NS a.root-servers.nil. a.root-servers.nil. A 10.53.0.1 example NS ns2.example. diff --git a/bin/tests/system/mirror/ns2/initially-unavailable.db.in b/bin/tests/system/mirror/ns2/initially-unavailable.db.in index 65d573e3e3..71dddd9282 100644 --- a/bin/tests/system/mirror/ns2/initially-unavailable.db.in +++ b/bin/tests/system/mirror/ns2/initially-unavailable.db.in @@ -8,7 +8,7 @@ ; information regarding copyright ownership. $TTL 3600 -@ SOA a.root-servers.nil. hostmaster 2000010100 3600 1200 604800 3600 +@ SOA a.root-servers.nil. hostmaster 1 3600 1200 604800 3600 @ NS ns2 ns2 A 10.53.0.2 foo CNAME foo.example. diff --git a/bin/tests/system/mirror/tests.sh b/bin/tests/system/mirror/tests.sh index 9913fee57c..d11d03ec47 100644 --- a/bin/tests/system/mirror/tests.sh +++ b/bin/tests/system/mirror/tests.sh @@ -61,6 +61,7 @@ echo_i "checking that an unsigned mirror zone is rejected ($n)" ret=0 wait_for_transfer verify-unsigned $DIG $DIGOPTS @10.53.0.3 +norec verify-unsigned SOA > dig.out.ns3.test$n 2>&1 || ret=1 +grep "ANSWER: 0" dig.out.ns3.test$n > /dev/null || ret=1 grep "${ORIGINAL_SERIAL}.*; serial" dig.out.ns3.test$n > /dev/null && ret=1 nextpartpeek ns3/named.run | grep "verify-unsigned.*Zone contains no DNSSEC keys" > /dev/null || ret=1 nextpartpeek ns3/named.run | grep "verify-unsigned.*mirror zone is now in use" > /dev/null && ret=1 @@ -73,6 +74,7 @@ ret=0 nextpartreset ns3/named.run wait_for_transfer verify-untrusted $DIG $DIGOPTS @10.53.0.3 +norec verify-untrusted SOA > dig.out.ns3.test$n 2>&1 || ret=1 +grep "ANSWER: 0" dig.out.ns3.test$n > /dev/null || ret=1 grep "${ORIGINAL_SERIAL}.*; serial" dig.out.ns3.test$n > /dev/null && ret=1 nextpartpeek ns3/named.run | grep "verify-untrusted.*No trusted KSK DNSKEY found" > /dev/null || ret=1 nextpartpeek ns3/named.run | grep "verify-untrusted.*mirror zone is now in use" > /dev/null && ret=1 @@ -85,6 +87,7 @@ ret=0 nextpartreset ns3/named.run wait_for_transfer verify-axfr $DIG $DIGOPTS @10.53.0.3 +norec verify-axfr SOA > dig.out.ns3.test$n 2>&1 || ret=1 +grep "ANSWER: 0" dig.out.ns3.test$n > /dev/null || ret=1 grep "${UPDATED_SERIAL_BAD}.*; serial" dig.out.ns3.test$n > /dev/null && ret=1 nextpartpeek ns3/named.run | grep "No correct RSASHA256 signature for verify-axfr SOA" > /dev/null || ret=1 nextpartpeek ns3/named.run | grep "verify-axfr.*mirror zone is now in use" > /dev/null && ret=1 @@ -100,6 +103,7 @@ reload_zone verify-axfr ${UPDATED_SERIAL_GOOD} $RNDCCMD 10.53.0.3 retransfer verify-axfr > /dev/null 2>&1 wait_for_transfer verify-axfr $DIG $DIGOPTS @10.53.0.3 +norec verify-axfr SOA > dig.out.ns3.test$n 2>&1 || ret=1 +grep "ANSWER: 0" dig.out.ns3.test$n > /dev/null && ret=1 grep "${UPDATED_SERIAL_GOOD}.*; serial" dig.out.ns3.test$n > /dev/null || ret=1 nextpartpeek ns3/named.run | grep "verify-axfr.*mirror zone is now in use" > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi @@ -141,6 +145,9 @@ if [ `nextpartpeek ns3/named.run | grep "verify-ixfr.*got incremental response" fi # Ensure the new, bad version of the zone was not accepted. $DIG $DIGOPTS @10.53.0.3 +norec verify-ixfr SOA > dig.out.ns3.test$n 2>&1 || ret=1 +# A positive answer is expected as the original version of the "verify-ixfr" +# zone should have been successfully verified. +grep "ANSWER: 0" dig.out.ns3.test$n > /dev/null && ret=1 grep "${UPDATED_SERIAL_BAD}.*; serial" dig.out.ns3.test$n > /dev/null && ret=1 nextpartpeek ns3/named.run | grep "No correct RSASHA256 signature for verify-ixfr SOA" > /dev/null || ret=1 # Despite the verification failure for this IXFR, this mirror zone should still @@ -165,6 +172,7 @@ $RNDCCMD 10.53.0.3 refresh verify-ixfr > /dev/null 2>&1 wait_for_transfer verify-ixfr # Ensure the new, good version of the zone was accepted. $DIG $DIGOPTS @10.53.0.3 +norec verify-ixfr SOA > dig.out.ns3.test$n 2>&1 || ret=1 +grep "ANSWER: 0" dig.out.ns3.test$n > /dev/null && ret=1 grep "${UPDATED_SERIAL_GOOD}.*; serial" dig.out.ns3.test$n > /dev/null || ret=1 # The log message announcing the mirror zone coming into effect should not have # been logged this time since the mirror zone in question is expected to @@ -179,6 +187,7 @@ ret=0 nextpartreset ns3/named.run wait_for_load verify-load ${UPDATED_SERIAL_BAD} ns3/named.run $DIG $DIGOPTS @10.53.0.3 +norec verify-load SOA > dig.out.ns3.test$n 2>&1 || ret=1 +grep "ANSWER: 0" dig.out.ns3.test$n > /dev/null || ret=1 grep "${UPDATED_SERIAL_BAD}.*; serial" dig.out.ns3.test$n > /dev/null && ret=1 nextpartpeek ns3/named.run | grep "No correct RSASHA256 signature for verify-load SOA" > /dev/null || ret=1 nextpartpeek ns3/named.run | grep "verify-load.*mirror zone is now in use" > /dev/null && ret=1 @@ -194,6 +203,7 @@ nextpart ns3/named.run > /dev/null $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} mirror ns3 wait_for_load verify-load ${UPDATED_SERIAL_GOOD} ns3/named.run $DIG $DIGOPTS @10.53.0.3 +norec verify-load SOA > dig.out.ns3.test$n 2>&1 || ret=1 +grep "ANSWER: 0" dig.out.ns3.test$n > /dev/null && ret=1 grep "${UPDATED_SERIAL_GOOD}.*; serial" dig.out.ns3.test$n > /dev/null || ret=1 nextpartpeek ns3/named.run | grep "verify-load.*mirror zone is now in use" > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi @@ -220,6 +230,7 @@ nextpart ns3/named.run > /dev/null $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} mirror ns3 wait_for_load verify-ixfr ${UPDATED_SERIAL_BAD} ns3/named.run $DIG $DIGOPTS @10.53.0.3 +norec verify-ixfr SOA > dig.out.ns3.test$n 2>&1 || ret=1 +grep "ANSWER: 0" dig.out.ns3.test$n > /dev/null || ret=1 grep "${UPDATED_SERIAL_BAD}.*; serial" dig.out.ns3.test$n > /dev/null && ret=1 nextpartpeek ns3/named.run | grep "No correct RSASHA256 signature for verify-ixfr SOA" > /dev/null || ret=1 nextpartpeek ns3/named.run | grep "verify-ixfr.*mirror zone is now in use" > /dev/null && ret=1 @@ -243,6 +254,7 @@ nextpart ns3/named.run > /dev/null $PERL $SYSTEMTESTTOP/start.pl --noclean --restart --port ${PORT} mirror ns3 wait_for_load verify-ixfr ${UPDATED_SERIAL_GOOD} ns3/named.run $DIG $DIGOPTS @10.53.0.3 +norec verify-ixfr SOA > dig.out.ns3.test$n 2>&1 || ret=1 +grep "ANSWER: 0" dig.out.ns3.test$n > /dev/null && ret=1 grep "${UPDATED_SERIAL_GOOD}.*; serial" dig.out.ns3.test$n > /dev/null || ret=1 nextpartpeek ns3/named.run | grep "verify-ixfr.*mirror zone is now in use" > /dev/null || ret=1 if [ $ret != 0 ]; then echo_i "failed"; fi From 9c611dd999417a31b80ff1b162dac5d6792fed7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= Date: Thu, 14 Feb 2019 10:41:56 +0100 Subject: [PATCH 3/3] Prevent races when waiting for log messages The "mirror" system test checks whether log messages announcing a mirror zone coming into effect are emitted properly. However, the helper functions responsible for waiting for zone transfers and zone loading to complete do not wait for these exact log messages, but rather for other ones preceding them, which introduces a possibility of false positives. This problem cannot be addressed by just changing the log message to look for because the test still needs to discern between transferring a zone and loading a zone. Add two new log messages at debug level 99 (which is what named instances used in system tests are configured with) that are to be emitted after the log messages announcing a mirror zone coming into effect. Tweak the aforementioned helper functions to only return once the log messages they originally looked for are followed by the newly added log messages. This reliably prevents races when looking for "mirror zone is now in use" log messages and also enables a workaround previously put into place in the "mirror" system test to be reverted. --- bin/tests/system/mirror/tests.sh | 37 +++++++++++++++++++------------- lib/dns/xfrin.c | 4 ++++ lib/dns/zone.c | 2 ++ 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/bin/tests/system/mirror/tests.sh b/bin/tests/system/mirror/tests.sh index d11d03ec47..b0203341c1 100644 --- a/bin/tests/system/mirror/tests.sh +++ b/bin/tests/system/mirror/tests.sh @@ -17,10 +17,20 @@ RNDCCMD="$RNDC -c $SYSTEMTESTTOP/common/rndc.conf -p ${CONTROLPORT} -s" # Wait until the transfer of the given zone to ns3 either completes successfully # or is aborted by a verification failure or a REFUSED response from the master. +# Note that matching on any transfer status is deliberately avoided because some +# checks performed by this test cause transfer attempts to end with the "IXFR +# failed" status, which is followed by an AXFR retry and this test needs to +# check what the result of the latter transfer attempt is. wait_for_transfer() { zone=$1 for i in 1 2 3 4 5 6 7 8 9 10; do - nextpartpeek ns3/named.run | egrep "'$zone/IN'.*Transfer status: (success|verify failure|REFUSED)" > /dev/null && return + # Wait until a "freeing transfer context" message is logged + # after one of the transfer results we are looking for is + # logged. This is needed to prevent races when checking for + # "mirror zone is now in use" messages. + nextpartpeek ns3/named.run | \ + awk "matched; /'$zone\/IN'.*Transfer status: (success|verify failure|REFUSED)/ {matched=1}" | \ + grep "'$zone/IN'.*freeing transfer context" > /dev/null && return sleep 1 done echo_i "exceeded time limit waiting for proof of '$zone' being transferred to appear in ns3/named.run" @@ -34,7 +44,13 @@ wait_for_load() { serial=$2 log=$3 for i in 1 2 3 4 5 6 7 8 9 10; do - nextpartpeek $log | egrep "$zone.*(loaded serial $serial|unable to load)" > /dev/null && return + # Wait until a "zone_postload: (...): done" message is logged + # after one of the loading-related messages we are looking for + # is logged. This is needed to prevent races when checking for + # "mirror zone is now in use" messages. + nextpartpeek $log | \ + awk "matched; /$zone.*(loaded serial $serial|unable to load)/ {matched=1}" | \ + grep "zone_postload: zone $zone/IN: done" > /dev/null && return sleep 1 done echo_i "exceeded time limit waiting for proof of '$zone' being loaded to appear in $log" @@ -114,23 +130,14 @@ echo_i "checking that an IXFR of an incorrectly signed mirror zone is rejected ( nextpartreset ns3/named.run ret=0 wait_for_transfer verify-ixfr +# Sanity check: the initial, properly signed version of the zone should have +# been announced as coming into effect. +nextpart ns3/named.run | grep "verify-ixfr.*mirror zone is now in use" > /dev/null || ret=1 # Make a copy of the original zone file for reuse in journal tests below. cp ns2/verify-ixfr.db.signed ns3/verify-journal.db.mirror # Wait 1 second so that the zone file timestamp changes and the subsequent -# invocation of "rndc reload" triggers a zone reload. This should also be way -# more than enough for the log message announcing the mirror zone coming into -# effect to appear in the log (see below). +# invocation of "rndc reload" triggers a zone reload. sleep 1 -# Sanity check: the initial, properly signed version of the zone should have -# been announced as coming into effect. Note that we cannot check that -# immediately after wait_for_transfer() as the latter might return before the -# log message we are looking for here appears; we also cannot call nextpart() -# after we update the zone on ns2 since there is a possibility of periodic -# refreshes triggering an IXFR of the "verify-ixfr" zone before the "rndc -# refresh" call below and that possibility needs to be handled as proper -# behavior. Thus, we need to look for the log message now. -nextpart ns3/named.run | grep "verify-ixfr.*mirror zone is now in use" > /dev/null || ret=1 -# Update the "verify-ixfr" zone on ns2. cat ns2/verify-ixfr.db.bad.signed > ns2/verify-ixfr.db.signed reload_zone verify-ixfr ${UPDATED_SERIAL_BAD} # Make a copy of the bad zone journal for reuse in journal tests below. diff --git a/lib/dns/xfrin.c b/lib/dns/xfrin.c index 2ae5f94149..e3a0248850 100644 --- a/lib/dns/xfrin.c +++ b/lib/dns/xfrin.c @@ -1534,6 +1534,10 @@ maybe_free(dns_xfrin_ctx_t *xfr) { dns_zone_log(xfr->zone, ISC_LOG_INFO, "mirror zone is now in use"); } + xfrin_log(xfr, ISC_LOG_DEBUG(99), "freeing transfer context"); + /* + * xfr->zone must not be detached before xfrin_log() is called. + */ dns_zone_idetach(&xfr->zone); } diff --git a/lib/dns/zone.c b/lib/dns/zone.c index 7e6eaf9bf4..7504c0c087 100644 --- a/lib/dns/zone.c +++ b/lib/dns/zone.c @@ -5069,6 +5069,8 @@ zone_postload(dns_zone_t *zone, dns_db_t *db, isc_time_t loadtime, DNS_ZONE_CLRFLAG(zone->secure, DNS_ZONEFLG_LOADPENDING); } + zone_debuglog(zone, "zone_postload", 99, "done"); + return (result); }