mirror of
https://github.com/postgres/postgres.git
synced 2026-05-26 19:28:27 -04:00
Test improvements for online checksums
This includes a number of smaller fixups to the online checksums test module which were found during postcommit review and stabilization work. * Fix scope increase for PG_TEST_EXTRA: The online checksums tests have two levels of PG_TEST_EXTRA, checksum and checksums_extended for extra test runs and test runs with increased randomization. The logic for increasing the number of test iterations was however backwards. * Change stopmode for PITR test: The pitr suite used immediate stop mode which caused problems on slower machines where the sigquit would interrupt archive commands leaving partial WAL files behind. This would then prevent restart. Fix by using fast mode which is the appropriate mode for the test at hand. Also increase timeouts to help slower test systems since an expired timeout will incur the same effect as an immediate standby with a partial WAL left behind. This issue was observed when running the test suites on a Raspberry Pi 4 machine. * Improve logging: The test suite for data checksums use a set of helper functions in a Perl module to avoid repeating code, this makes sure that the helper functions do a better job of logging their test output to make debug easier. * Remove unused code: wait_for_cluster_crash was used during the development of online checksums but was never used in any test which shipped, so remove the function. * Standby fixes: Ensure no vacuum on pgbench init on standby with -n to avoid bogus error message in the log, and enable hot_standby_feedback to prevent queries from getting cancelled due to recovery on slower systems. Author: Daniel Gustafsson <daniel@yesql.se> Author: Tomas Vondra <tomas@vondra.me> Reviewed-by: Ayush Tiwari <ayushtiwari.slg01@gmail.com> Reviewed-by: SATYANARAYANA NARLAPURAM <satyanarlapuram@gmail.com> Discussion: https://postgr.es/m/9197F930-DDEB-4CAC-82A2-16FEC715CCE8@yesql.se
This commit is contained in:
parent
b120358c61
commit
a0d8f4c1ae
3 changed files with 20 additions and 50 deletions
|
|
@ -49,8 +49,8 @@ my $node_standby_loglocation = 0;
|
|||
# of tests performed and the wall time taken is non-deterministic as the test
|
||||
# performs a lot of randomized actions, but 5 iterations will be a long test
|
||||
# run regardless.
|
||||
my $TEST_ITERATIONS = 5;
|
||||
$TEST_ITERATIONS = 1 if ($extended);
|
||||
my $TEST_ITERATIONS = 1;
|
||||
$TEST_ITERATIONS = 5 if ($extended);
|
||||
|
||||
# Variables which record the current state of the cluster
|
||||
my $data_checksum_state = 'off';
|
||||
|
|
@ -83,6 +83,7 @@ sub background_pgbench
|
|||
push(@cmd, '-C') if ($extended && cointoss());
|
||||
# If we run on a standby it needs to be a read-only benchmark
|
||||
push(@cmd, '-S') if ($standby);
|
||||
push(@cmd, '-n') if ($standby);
|
||||
# Finally add the database name to use
|
||||
push(@cmd, 'postgres');
|
||||
|
||||
|
|
@ -146,8 +147,10 @@ sub flip_data_checksums
|
|||
. "FROM pg_catalog.pg_settings "
|
||||
. "WHERE name = 'data_checksums';");
|
||||
|
||||
is(($result eq 'inprogress-on' || $result eq 'on'),
|
||||
1, 'ensure checksums are on, or in progress, on standby_1');
|
||||
is( ($result eq 'inprogress-on' || $result eq 'on'),
|
||||
1,
|
||||
'ensure checksums are on, or in progress, on standby_1, got: '
|
||||
. $result);
|
||||
|
||||
# Wait for checksums enabled on the primary and standby
|
||||
wait_for_checksum_state($node_primary, 'on');
|
||||
|
|
@ -210,6 +213,7 @@ $node_primary->append_conf(
|
|||
qq[
|
||||
max_connections = 30
|
||||
log_statement = none
|
||||
hot_standby_feedback = on
|
||||
]);
|
||||
$node_primary->start;
|
||||
$node_primary->safe_psql('postgres', 'CREATE EXTENSION test_checksums;');
|
||||
|
|
|
|||
|
|
@ -124,11 +124,14 @@ $node_primary->init(
|
|||
has_archiving => 1,
|
||||
allows_streaming => 1,
|
||||
no_data_checksums => 1);
|
||||
my $timeout_unit = 's';
|
||||
$node_primary->append_conf(
|
||||
'postgresql.conf',
|
||||
qq[
|
||||
max_connections = 100
|
||||
log_statement = none
|
||||
wal_sender_timeout = $PostgreSQL::Test::Utils::timeout_default$timeout_unit
|
||||
wal_receiver_timeout = $PostgreSQL::Test::Utils::timeout_default$timeout_unit
|
||||
]);
|
||||
$node_primary->start;
|
||||
|
||||
|
|
@ -154,7 +157,7 @@ my ($pre_lsn, $post_lsn) = flip_data_checksums();
|
|||
$node_primary->safe_psql('postgres', "UPDATE t SET a = a + 1;");
|
||||
$node_primary->safe_psql('postgres', "SELECT pg_create_restore_point('a');");
|
||||
$node_primary->safe_psql('postgres', "UPDATE t SET a = a + 1;");
|
||||
$node_primary->stop('immediate');
|
||||
$node_primary->stop('fast');
|
||||
|
||||
my $node_pitr = PostgreSQL::Test::Cluster->new('pitr_backup');
|
||||
$node_pitr->init_from_backup(
|
||||
|
|
|
|||
|
|
@ -43,7 +43,6 @@ our @EXPORT = qw(
|
|||
stopmode
|
||||
test_checksum_state
|
||||
wait_for_checksum_state
|
||||
wait_for_cluster_crash
|
||||
);
|
||||
|
||||
=pod
|
||||
|
|
@ -67,7 +66,10 @@ sub test_checksum_state
|
|||
my $result = $postgresnode->safe_psql('postgres',
|
||||
"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';"
|
||||
);
|
||||
is($result, $state, 'ensure checksums are set to ' . $state);
|
||||
is($result, $state,
|
||||
'ensure checksums are set to '
|
||||
. $state . ' on '
|
||||
. $postgresnode->name());
|
||||
return $result eq $state;
|
||||
}
|
||||
|
||||
|
|
@ -89,52 +91,13 @@ sub wait_for_checksum_state
|
|||
'postgres',
|
||||
"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';",
|
||||
$state);
|
||||
is($res, 1, 'ensure data checksums are transitioned to ' . $state);
|
||||
is($res, 1,
|
||||
'ensure data checksums are transitioned to '
|
||||
. $state . ' on '
|
||||
. $postgresnode->name());
|
||||
return $res == 1;
|
||||
}
|
||||
|
||||
=item wait_for_cluster_crash(node, params)
|
||||
|
||||
Repeatedly test if the cluster running at B<node> responds to connections
|
||||
and return when it no longer does so, or when it times out. Processing will
|
||||
run for $PostgreSQL::Test::Utils::timeout_default seconds unless a timeout
|
||||
value is specified as a parameter. Returns True if the cluster crashed, else
|
||||
False if the process timed out.
|
||||
|
||||
=over
|
||||
|
||||
=item timeout
|
||||
|
||||
Approximate number of seconds to wait for cluster to crash, default is
|
||||
$PostgreSQL::Test::Utils::timeout_default. There are no real-time guarantees
|
||||
that the total process time won't exceed the timeout.
|
||||
|
||||
=back
|
||||
|
||||
=cut
|
||||
|
||||
sub wait_for_cluster_crash
|
||||
{
|
||||
my $postgresnode = shift;
|
||||
my %params = @_;
|
||||
my $crash = 0;
|
||||
|
||||
$params{timeout} = $PostgreSQL::Test::Utils::timeout_default
|
||||
unless (defined($params{timeout}));
|
||||
|
||||
for (my $naps = 0; $naps < $params{timeout}; $naps++)
|
||||
{
|
||||
if (!$postgresnode->is_alive)
|
||||
{
|
||||
$crash = 1;
|
||||
last;
|
||||
}
|
||||
sleep(1);
|
||||
}
|
||||
|
||||
return $crash == 1;
|
||||
}
|
||||
|
||||
=item enable_data_checksums($node, %params)
|
||||
|
||||
Function for enabling data checksums in the cluster running at B<node>.
|
||||
|
|
|
|||
Loading…
Reference in a new issue