From 01de79b332ec3fbd0960dc9f4ab9a650ae85d453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= Date: Tue, 20 Feb 2018 13:59:25 +0100 Subject: [PATCH 1/4] Assign an index to each check in the cacheclean system test This will enable storing per-check forensic data, which facilitates troubleshooting test failures. --- bin/tests/system/cacheclean/tests.sh | 49 +++++++++++++++++++--------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/bin/tests/system/cacheclean/tests.sh b/bin/tests/system/cacheclean/tests.sh index 2c6f2ccac2..28414ade3b 100644 --- a/bin/tests/system/cacheclean/tests.sh +++ b/bin/tests/system/cacheclean/tests.sh @@ -12,6 +12,7 @@ SYSTEMTESTTOP=.. . $SYSTEMTESTTOP/conf.sh status=0 +n=0 RNDCOPTS="-c ../common/rndc.conf -s 10.53.0.2 -p 9953" DIGOPTS="+nosea +nocomm +nocmd +noquest +noadd +noauth +nocomm \ @@ -75,18 +76,21 @@ in_cache () { return 0 } -echo "I:check correctness of routine cache cleaning" +n=`expr $n + 1` +echo "I:check correctness of routine cache cleaning ($n)" $DIG $DIGOPTS +tcp +keepopen -b 10.53.0.7 -f dig.batch > dig.out.ns2 || status=1 grep ";" dig.out.ns2 $PERL ../digcomp.pl --lc dig.out.ns2 knowngood.dig.out || status=1 -echo "I:only one tcp socket was used" +n=`expr $n + 1` +echo "I:only one tcp socket was used ($n)" tcpclients=`awk '$3 == "client" && $5 ~ /10.53.0.7#[0-9]*:/ {print $5}' ns2/named.run | sort | uniq -c | wc -l` test $tcpclients -eq 1 || { status=1; echo "I:failed"; } -echo "I:reset and check that records are correctly cached initially" +n=`expr $n + 1` +echo "I:reset and check that records are correctly cached initially ($n)" ret=0 load_cache dump_cache @@ -95,7 +99,8 @@ nrecords=`grep flushtest.example ns2/named_dump.db | grep -v '^;' | egrep '(TXT| if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` -echo "I:check flushing of the full cache" +n=`expr $n + 1` +echo "I:check flushing of the full cache ($n)" ret=0 clear_cache dump_cache @@ -104,7 +109,8 @@ nrecords=`grep flushtest.example ns2/named_dump.db | grep -v '^;' | wc -l` if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` -echo "I:check flushing of individual nodes (interior node)" +n=`expr $n + 1` +echo "I:check flushing of individual nodes (interior node) ($n)" ret=0 clear_cache load_cache @@ -115,7 +121,8 @@ in_cache txt top1.flushtest.example && ret=1 if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` -echo "I:check flushing of individual nodes (leaf node, under the interior node)" +n=`expr $n + 1` +echo "I:check flushing of individual nodes (leaf node, under the interior node) ($n)" ret=0 # leaf node, under the interior node (should still exist) in_cache txt third2.second1.top1.flushtest.example || ret=1 @@ -124,7 +131,8 @@ in_cache txt third2.second1.top1.flushtest.example && ret=1 if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` -echo "I:check flushing of individual nodes (another leaf node, with both positive and negative cache entries)" +n=`expr $n + 1` +echo "I:check flushing of individual nodes (another leaf node, with both positive and negative cache entries) ($n)" ret=0 # another leaf node, with both positive and negative cache entries in_cache a third1.second1.top1.flushtest.example || ret=1 @@ -135,13 +143,15 @@ in_cache txt third1.second1.top1.flushtest.example && ret=1 if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` -echo "I:check flushing a nonexistent name" +n=`expr $n + 1` +echo "I:check flushing a nonexistent name ($n)" ret=0 $RNDC $RNDCOPTS flushname fake.flushtest.example || ret=1 if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` -echo "I:check flushing of namespaces" +n=`expr $n + 1` +echo "I:check flushing of namespaces ($n)" ret=0 clear_cache load_cache @@ -167,13 +177,15 @@ in_cache txt second3.top2.flushtest.example && ret=1 if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` -echo "I:check flushing a nonexistent namespace" +n=`expr $n + 1` +echo "I:check flushing a nonexistent namespace ($n)" ret=0 $RNDC $RNDCOPTS flushtree fake.flushtest.example || ret=1 if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` -echo "I:check the number of cached records remaining" +n=`expr $n + 1` +echo "I:check the number of cached records remaining ($n)" ret=0 dump_cache nrecords=`grep flushtest.example ns2/named_dump.db | grep -v '^;' | egrep '(TXT|ANY)' | wc -l` @@ -181,7 +193,8 @@ nrecords=`grep flushtest.example ns2/named_dump.db | grep -v '^;' | egrep '(TXT| if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` -echo "I:check the check that flushname of a partial match works." +n=`expr $n + 1` +echo "I:check the check that flushname of a partial match works ($n)" ret=0 in_cache txt second2.top1.flushtest.example || ret=1 $RNDC $RNDCOPTS flushtree example @@ -189,7 +202,8 @@ in_cache txt second2.top1.flushtest.example && ret=1 if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` -echo "I:check the number of cached records remaining" +n=`expr $n + 1` +echo "I:check the number of cached records remaining ($n)" ret=0 dump_cache nrecords=`grep flushtest.example ns2/named_dump.db | grep -v '^;' | egrep '(TXT|ANY)' | wc -l` @@ -197,7 +211,8 @@ nrecords=`grep flushtest.example ns2/named_dump.db | grep -v '^;' | egrep '(TXT| if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` -echo "I:check flushtree clears adb correctly" +n=`expr $n + 1` +echo "I:check flushtree clears adb correctly ($n)" ret=0 load_cache dump_cache @@ -208,14 +223,16 @@ awk '/plain success\/timeout/ {getline; getline; if ($2 == "ns.flushtest.example if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` -echo "I:check expire option returned from master zone" +n=`expr $n + 1` +echo "I:check expire option returned from master zone ($n)" ret=0 $DIG @10.53.0.1 -p 5300 +expire soa expire-test > dig.out.expire grep EXPIRE: dig.out.expire > /dev/null || ret=1 if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` -echo "I:check expire option returned from slave zone" +n=`expr $n + 1` +echo "I:check expire option returned from slave zone ($n)" ret=0 $DIG @10.53.0.2 -p 5300 +expire soa expire-test > dig.out.expire grep EXPIRE: dig.out.expire > /dev/null || ret=1 From aeea1faf0191f8f693393ee088272f893fc4faa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= Date: Tue, 20 Feb 2018 13:59:27 +0100 Subject: [PATCH 2/4] Do not overwrite cache dumps Unless configured otherwise in named.conf, "rndc dumpdb" causes a cache dump to be written to a file called "named_dump.db" in the working directory of the given named instance. Repeatedly using this command throughout different checks in the cacheclean system test causes cache dumps for older checks to be overwritten, which hinders failure diagnosis. Prevent this by moving each cache dump to a check-specific location after running "rndc dumpdb". Furthermore, during the "check flushtree clears adb correctly" check, dump_cache() is called twice without renaming the resulting files. Prevent the first cache dump from being overwritten by moving it to a different file before calling "rndc dumpdb" for the second time. --- bin/tests/system/cacheclean/clean.sh | 2 +- bin/tests/system/cacheclean/tests.sh | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/bin/tests/system/cacheclean/clean.sh b/bin/tests/system/cacheclean/clean.sh index 2a17346393..39396277ed 100644 --- a/bin/tests/system/cacheclean/clean.sh +++ b/bin/tests/system/cacheclean/clean.sh @@ -16,5 +16,5 @@ rm -f dig.out.ns2 rm -f dig.out.expire rm -f */named.memstats rm -f */named.run -rm -f ns2/named_dump.db +rm -f ns2/named_dump.db.* rm -f ns*/named.lock diff --git a/bin/tests/system/cacheclean/tests.sh b/bin/tests/system/cacheclean/tests.sh index 28414ade3b..9f69e00510 100644 --- a/bin/tests/system/cacheclean/tests.sh +++ b/bin/tests/system/cacheclean/tests.sh @@ -56,9 +56,9 @@ EOF } dump_cache () { - rm -f ns2/named_dump.db $RNDC $RNDCOPTS dumpdb -cache _default sleep 1 + mv ns2/named_dump.db ns2/named_dump.db.$n } clear_cache () { @@ -94,7 +94,7 @@ echo "I:reset and check that records are correctly cached initially ($n)" ret=0 load_cache dump_cache -nrecords=`grep flushtest.example ns2/named_dump.db | grep -v '^;' | egrep '(TXT|ANY)'| wc -l` +nrecords=`grep flushtest.example ns2/named_dump.db.$n | grep -v '^;' | egrep '(TXT|ANY)'| wc -l` [ $nrecords -eq 17 ] || { ret=1; echo "I: found $nrecords records expected 17"; } if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` @@ -104,7 +104,7 @@ echo "I:check flushing of the full cache ($n)" ret=0 clear_cache dump_cache -nrecords=`grep flushtest.example ns2/named_dump.db | grep -v '^;' | wc -l` +nrecords=`grep flushtest.example ns2/named_dump.db.$n | grep -v '^;' | wc -l` [ $nrecords -eq 0 ] || ret=1 if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` @@ -188,7 +188,7 @@ n=`expr $n + 1` echo "I:check the number of cached records remaining ($n)" ret=0 dump_cache -nrecords=`grep flushtest.example ns2/named_dump.db | grep -v '^;' | egrep '(TXT|ANY)' | wc -l` +nrecords=`grep flushtest.example ns2/named_dump.db.$n | grep -v '^;' | egrep '(TXT|ANY)' | wc -l` [ $nrecords -eq 17 ] || { ret=1; echo "I: found $nrecords records expected 17"; } if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` @@ -206,7 +206,7 @@ n=`expr $n + 1` echo "I:check the number of cached records remaining ($n)" ret=0 dump_cache -nrecords=`grep flushtest.example ns2/named_dump.db | grep -v '^;' | egrep '(TXT|ANY)' | wc -l` +nrecords=`grep flushtest.example ns2/named_dump.db.$n | grep -v '^;' | egrep '(TXT|ANY)' | wc -l` [ $nrecords -eq 1 ] || { ret=1; echo "I: found $nrecords records expected 1"; } if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` @@ -216,10 +216,12 @@ echo "I:check flushtree clears adb correctly ($n)" ret=0 load_cache dump_cache -awk '/plain success\/timeout/ {getline; getline; if ($2 == "ns.flushtest.example") exit(0); exit(1); }' ns2/named_dump.db || ret=1 +mv ns2/named_dump.db.$n ns2/named_dump.db.$n.a +awk '/plain success\/timeout/ {getline; getline; if ($2 == "ns.flushtest.example") exit(0); exit(1); }' ns2/named_dump.db.$n.a || ret=1 $RNDC $RNDCOPTS flushtree flushtest.example || ret=1 dump_cache -awk '/plain success\/timeout/ {getline; getline; if ($2 == "ns.flushtest.example") exit(1); exit(0); }' ns2/named_dump.db || ret=1 +mv ns2/named_dump.db.$n ns2/named_dump.db.$n.b +awk '/plain success\/timeout/ {getline; getline; if ($2 == "ns.flushtest.example") exit(1); exit(0); }' ns2/named_dump.db.$n.b || ret=1 if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret` From 2bbff06d3631653fcea95e223da47aea0f9cdf0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= Date: Tue, 20 Feb 2018 13:59:28 +0100 Subject: [PATCH 3/4] Wait until a cache dump completes instead of waiting for a fixed amount of time Dumping the cache is an asynchronous operation, so sleeping for a fixed amount of time after running "rndc dumpdb" is imperfect as dumping cache contents may take longer than expected on slower machines. Instead of always sleeping for 1 second, wait until the "; Dump complete" line appears in the dump or 10 seconds pass, whichever comes first. --- bin/tests/system/cacheclean/tests.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bin/tests/system/cacheclean/tests.sh b/bin/tests/system/cacheclean/tests.sh index 9f69e00510..002f61f3ef 100644 --- a/bin/tests/system/cacheclean/tests.sh +++ b/bin/tests/system/cacheclean/tests.sh @@ -57,7 +57,11 @@ EOF dump_cache () { $RNDC $RNDCOPTS dumpdb -cache _default - sleep 1 + for i in 0 1 2 3 4 5 6 7 8 9 + do + grep '^; Dump complete$' ns2/named_dump.db > /dev/null && break + sleep 1 + done mv ns2/named_dump.db ns2/named_dump.db.$n } From ca1049b2f36be42a6daa9c04289d51c481cb7b52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= Date: Tue, 20 Feb 2018 13:59:29 +0100 Subject: [PATCH 4/4] Improve the way cache contents are searched for "ns.flushtest.example" During the "check flushtree clears adb correctly" check, expecting "ns.flushtest.example" to always be the first name in the ADB dump is fragile, because in a certain corner case "a.root-servers.nil" will be the first name instead. As the purpose of the relevant check is to ensure "ns.flushtest.example" is removed from ADB by "rndc flushtree flushtest.example", search the entire list of names present in ADB instead of just the first entry when looking for "ns.flushtest.example". --- bin/tests/system/cacheclean/tests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/tests/system/cacheclean/tests.sh b/bin/tests/system/cacheclean/tests.sh index 002f61f3ef..56f96e667c 100644 --- a/bin/tests/system/cacheclean/tests.sh +++ b/bin/tests/system/cacheclean/tests.sh @@ -221,11 +221,11 @@ ret=0 load_cache dump_cache mv ns2/named_dump.db.$n ns2/named_dump.db.$n.a -awk '/plain success\/timeout/ {getline; getline; if ($2 == "ns.flushtest.example") exit(0); exit(1); }' ns2/named_dump.db.$n.a || ret=1 +sed -n '/plain success\/timeout/,/Unassociated entries/p' ns2/named_dump.db.$n.a | grep 'ns.flushtest.example' > /dev/null 2>&1 || ret=1 $RNDC $RNDCOPTS flushtree flushtest.example || ret=1 dump_cache mv ns2/named_dump.db.$n ns2/named_dump.db.$n.b -awk '/plain success\/timeout/ {getline; getline; if ($2 == "ns.flushtest.example") exit(1); exit(0); }' ns2/named_dump.db.$n.b || ret=1 +sed -n '/plain success\/timeout/,/Unassociated entries/p' ns2/named_dump.db.$n.b | grep 'ns.flushtest.example' > /dev/null 2>&1 && ret=1 if [ $ret != 0 ]; then echo "I:failed"; fi status=`expr $status + $ret`