From b1063d2de644e5fdeb79ae049a9dad5d97c11a8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= Date: Wed, 26 Jan 2022 15:18:43 +0100 Subject: [PATCH 1/3] Retain all named.run files from each test run The bin/tests/system/start.pl script truncates the named.run file for a given named instance unless it is invoked with the --restart command-line option. Ever since Python-based tests were introduced, bin/tests/system/run.sh may start named instances used by a given system test multiple times within a single run, causing the bin/tests/system/start.pl script to truncate some of the log files written during the test. This makes troubleshooting certain test failures hard or even impossible. Fix by calling bin/tests/system/start.pl with the --restart command-line option for every start_servers() invocation except the first one. (cherry picked from commit 65abbca79b0c082e610f427c2b78608a4dc8b4bd) --- bin/tests/system/run.sh.in | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bin/tests/system/run.sh.in b/bin/tests/system/run.sh.in index ee4a9077b2..057cd1f874 100644 --- a/bin/tests/system/run.sh.in +++ b/bin/tests/system/run.sh.in @@ -117,9 +117,13 @@ fi # Determine which ports to use for this system test. eval "$(cd "${srcdir}" && ./get_ports.sh -p "$baseport" -t "$systest")" +# Start all servers used by the system test. Ensure all log files written +# during a system test (tests.sh + potentially multiple *.py scripts) are +# retained for each run by calling start.pl with the --restart command-line +# option for all invocations except the first one. start_servers() { echoinfo "I:$systest:starting servers" - if $restart; then + if $restart || [ "$run" -gt 0 ]; then restart_opt="--restart" fi if ! $PERL start.pl ${restart_opt} --port "$PORT" "$systest"; then From 17fbf25676fd2fd0713b599518fe35cb36366a51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= Date: Wed, 26 Jan 2022 15:18:43 +0100 Subject: [PATCH 2/3] Do not strip leading whitespace from test output The echo_*() and cat_*() functions in bin/tests/system/conf.sh.common call the "read" builtin command without specifying the field separator to use. This results in leading whitespace getting stripped from each line of the texts passed to those functions, which mangles e.g. pytest output, hindering test failure troubleshooting. Address by setting IFS to an empty value for the "read" calls used in the aforementioned helper functions. (cherry picked from commit fb8702211532c4f68f2d180aac23d983d5e17fc7) --- bin/tests/system/conf.sh.common | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/bin/tests/system/conf.sh.common b/bin/tests/system/conf.sh.common index 3809d74aa8..ce7b923b23 100644 --- a/bin/tests/system/conf.sh.common +++ b/bin/tests/system/conf.sh.common @@ -181,19 +181,19 @@ then printf "${COLOR_END}%s${COLOR_NONE}\n" "$*" } echo_i() { - printf '%s\n' "$*" | while read -r __LINE ; do + printf '%s\n' "$*" | while IFS= read -r __LINE ; do echoinfo "I:$SYSTESTDIR:$__LINE" done } echo_ic() { - printf '%s\n' "$*" | while read -r __LINE ; do + printf '%s\n' "$*" | while IFS= read -r __LINE ; do echoinfo "I:$SYSTESTDIR: $__LINE" done } echo_d() { - printf '%s\n' "$*" | while read -r __LINE ; do + printf '%s\n' "$*" | while IFS= read -r __LINE ; do echoinfo "D:$SYSTESTDIR:$__LINE" done } @@ -218,32 +218,32 @@ else } echo_i() { - echo "$@" | while read -r __LINE ; do + echo "$@" | while IFS= read -r __LINE ; do echoinfo "I:$SYSTESTDIR:$__LINE" done } echo_ic() { - echo "$@" | while read -r __LINE ; do + echo "$@" | while IFS= read -r __LINE ; do echoinfo "I:$SYSTESTDIR: $__LINE" done } echo_d() { - echo "$@" | while read -r __LINE ; do + echo "$@" | while IFS= read -r __LINE ; do echoinfo "D:$SYSTESTDIR:$__LINE" done } fi cat_i() { - while read -r __LINE ; do + while IFS= read -r __LINE ; do echoinfo "I:$SYSTESTDIR:$__LINE" done } cat_d() { - while read -r __LINE ; do + while IFS= read -r __LINE ; do echoinfo "D:$SYSTESTDIR:$__LINE" done } From 7d7199f18c66dc394281585b6b29e7dc4ff24b9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= Date: Wed, 26 Jan 2022 15:18:43 +0100 Subject: [PATCH 3/3] Fix waiting for lock file removal upon exit Commit c787a539d2a931ba9023677c1c269ed191455512 fixed a certain class of intermittent system test failures caused by named instances unable to restart. The root cause was bin/tests/system/stop.pl returning without waiting for a named instance to remove its lock file. Later on, it turned out that the above change causes other issues on Windows due to the way named handles signals on that platform. Commit 761ba4514f7eceab8019d71dc9cabd269d274597 intended to address those issues by making the server_lock_file() subroutine in bin/tests/system/stop.pl return an empty value on Windows, in order to prevent the script for waiting for lock file cleanup on that platform. Note, however, that Windows detection in that subroutine is limited to checking whether the CYGWIN environment variable is set. While that environment variable was not set on Unix-like systems before commit 761ba4514f7eceab8019d71dc9cabd269d274597, another commit (a33237f070c95480f581d85b0169f41ce5a12017, merged a few weeks later) changed that by setting the CYGWIN environment variable to an empty value on Unix-like systems. This made the defined($ENV{'CYGWIN'}) check in server_lock_file() return true, inadvertently preventing bin/tests/system/stop.pl from waiting for lock file removal before exiting on Unix-like systems and therefore reintroducing the original issue. Fix by making server_lock_file() only return an empty value when the CYGWIN environment variable is set to a non-empty value (which is what bin/tests/system/conf.sh.win32 does). Adjust a similar check in the pid_file_exists() subroutine in the same way for consistency. (cherry picked from commit a938db2170ef78058ff596f86c04c4bbe75cda33) --- bin/tests/system/stop.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/tests/system/stop.pl b/bin/tests/system/stop.pl index 4332ac83cc..57cb752343 100644 --- a/bin/tests/system/stop.pl +++ b/bin/tests/system/stop.pl @@ -134,7 +134,7 @@ exit($errors); sub server_lock_file { my ( $server ) = @_; - return if (defined($ENV{'CYGWIN'})); + return if (defined($ENV{'CYGWIN'}) && $ENV{'CYGWIN'}); return $testdir . "/" . $server . "/named.lock" if ($server =~ /^ns/); return if ($server =~ /^ans/); @@ -255,7 +255,7 @@ sub pid_file_exists { if (send_signal(0, $pid) == 0) { # XXX: on windows this is likely to result in a # false positive, so don't bother reporting the error. - if (!defined($ENV{'CYGWIN'})) { + if (!defined($ENV{'CYGWIN'}) || !$ENV{'CYGWIN'}) { print "I:$test:$server crashed on shutdown\n"; $errors = 1; }