diff --git a/src/backend/postmaster/datachecksum_state.c b/src/backend/postmaster/datachecksum_state.c index 430e6e9a3c9..1243949eacb 100644 --- a/src/backend/postmaster/datachecksum_state.c +++ b/src/backend/postmaster/datachecksum_state.c @@ -1216,8 +1216,15 @@ ProcessAllDatabases(void) result = ProcessDatabase(db); +#ifdef USE_INJECTION_POINTS /* Allow a test process to alter the result of the operation */ - INJECTION_POINT("datachecksumsworker-modify-db-result", &result); + if (IS_INJECTION_POINT_ATTACHED("datachecksumsworker-fail-db-result")) + { + result = DATACHECKSUMSWORKER_FAILED; + INJECTION_POINT_CACHED("datachecksumsworker-fail-db-result", + db->dbname); + } +#endif pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_DBS_DONE, ++cumulative_total); @@ -1451,6 +1458,9 @@ DataChecksumsWorkerMain(Datum arg) BufferAccessStrategy strategy; bool aborted = false; int64 rels_done; +#ifdef USE_INJECTION_POINTS + bool retried = false; +#endif operation = ENABLE_DATACHECKSUMS; @@ -1566,7 +1576,19 @@ DataChecksumsWorkerMain(Datum arg) } list_free(CurrentTempTables); - INJECTION_POINT("datachecksumsworker-fake-temptable-wait", &numleft); +#ifdef USE_INJECTION_POINTS + if (IS_INJECTION_POINT_ATTACHED("datachecksumsworker-fake-temptable-wait")) + { + /* Make sure to just cause one retry */ + if (!retried && numleft == 0) + { + numleft = 1; + retried = true; + + INJECTION_POINT_CACHED("datachecksumsworker-fake-temptable-wait", NULL); + } + } +#endif if (numleft == 0) break; diff --git a/src/test/modules/test_checksums/t/001_basic.pl b/src/test/modules/test_checksums/t/001_basic.pl index 5933c730da1..a78118320d5 100644 --- a/src/test/modules/test_checksums/t/001_basic.pl +++ b/src/test/modules/test_checksums/t/001_basic.pl @@ -35,11 +35,7 @@ my $result = is($result, '9999', 'ensure checksummed pages can be read back'); # Enable data checksums again which should be a no-op so we explicitly don't -# wait for any state transition as none should happen here. Make sure to let -# any running launcher finish in case it's still wrapping up. -$result = $node->poll_query_until('postgres', - "SELECT count(*) = 0 FROM pg_catalog.pg_stat_activity WHERE backend_type = 'datachecksum launcher';" -); +# wait for any state transition as none should happen here. enable_data_checksums($node); test_checksum_state($node, 'on'); # ..and make sure we can still read/write data diff --git a/src/test/modules/test_checksums/t/003_standby_restarts.pl b/src/test/modules/test_checksums/t/003_standby_restarts.pl index 6b016925651..7ad11417ca6 100644 --- a/src/test/modules/test_checksums/t/003_standby_restarts.pl +++ b/src/test/modules/test_checksums/t/003_standby_restarts.pl @@ -41,7 +41,7 @@ $node_standby->start; $node_primary->safe_psql('postgres', "CREATE TABLE t AS SELECT generate_series(1,10000) AS a;"); -# Wait for standbys to catch up +# Wait for standby to catch up $node_primary->wait_for_catchup($node_standby, 'replay', $node_primary->lsn('insert')); @@ -54,8 +54,14 @@ test_checksum_state($node_standby, 'off'); # standby change state. # -# Ensure that the primary switches to "inprogress-on" -enable_data_checksums($node_primary, wait => 'inprogress-on'); +# Initiate enabling of checksums and ensure that the primary switches to +# either "inprogress-on" or "on" +enable_data_checksums($node_primary); +my $result = $node_primary->poll_query_until( + 'postgres', + "SELECT setting = 'off' FROM pg_catalog.pg_settings WHERE name = 'data_checksums';", + 'f'); +is($result, 1, 'ensure primary has transitioned from off'); # Wait for checksum enable to be replayed $node_primary->wait_for_catchup($node_standby, 'replay'); @@ -63,7 +69,7 @@ $node_primary->wait_for_catchup($node_standby, 'replay'); # would be "inprogress-on", but it is theoretically possible for the primary to # complete the checksum enabling *and* have the standby replay that record # before we reach the check below. -my $result = $node_standby->poll_query_until( +$result = $node_standby->poll_query_until( 'postgres', "SELECT setting = 'off' FROM pg_catalog.pg_settings WHERE name = 'data_checksums';", 'f'); diff --git a/src/test/modules/test_checksums/t/004_offline.pl b/src/test/modules/test_checksums/t/004_offline.pl index f1972bddff1..73c279e75e0 100644 --- a/src/test/modules/test_checksums/t/004_offline.pl +++ b/src/test/modules/test_checksums/t/004_offline.pl @@ -62,11 +62,15 @@ $result = $node->safe_psql('postgres', "SELECT relpersistence FROM pg_catalog.pg_class WHERE relname = 'tt';"); is($result, 't', 'ensure we can see the temporary table'); +# Enable, but stop waiting at inprogress-on since it will sit there until the +# above temporary table is removed. enable_data_checksums($node, wait => 'inprogress-on'); -# Turn the cluster off and enable checksums offline, then start back up +# Turn the cluster off and enable checksums offline, then start back up. +# Stop the cluster before exiting the background session since otherwise +# checksums might have time to get enabled before shutting down the cluster. +$node->stop('fast'); $bsession->quit; -$node->stop; $node->checksum_enable_offline; $node->start; diff --git a/src/test/modules/test_checksums/t/005_injection.pl b/src/test/modules/test_checksums/t/005_injection.pl index 897f282a1f2..a37a24dbbad 100644 --- a/src/test/modules/test_checksums/t/005_injection.pl +++ b/src/test/modules/test_checksums/t/005_injection.pl @@ -32,6 +32,7 @@ $node->start; # Set up test environment $node->safe_psql('postgres', 'CREATE EXTENSION test_checksums;'); +$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;'); # --------------------------------------------------------------------------- # Inducing failures and crashes in processing @@ -39,9 +40,12 @@ $node->safe_psql('postgres', 'CREATE EXTENSION test_checksums;'); # Force enabling checksums to fail by marking one of the databases as having # failed in processing. disable_data_checksums($node, wait => 1); -$node->safe_psql('postgres', 'SELECT dcw_inject_fail_database(true);'); +$node->safe_psql('postgres', + "SELECT injection_points_attach('datachecksumsworker-fail-db-result','notice');" +); enable_data_checksums($node, wait => 'off'); -$node->safe_psql('postgres', 'SELECT dcw_inject_fail_database(false);'); +$node->safe_psql('postgres', + "SELECT injection_points_detach('datachecksumsworker-fail-db-result');"); # Make sure that disabling after a failure works disable_data_checksums($node); @@ -66,7 +70,9 @@ SKIP: # will force the processing to wait and retry in order to wait for it to # disappear. disable_data_checksums($node, wait => 1); - $node->safe_psql('postgres', 'SELECT dcw_fake_temptable(true);'); + $node->safe_psql('postgres', + "SELECT injection_points_attach('datachecksumsworker-fake-temptable-wait', 'notice');" + ); enable_data_checksums($node, wait => 'on'); } diff --git a/src/test/modules/test_checksums/t/006_pgbench_single.pl b/src/test/modules/test_checksums/t/006_pgbench_single.pl index 0ab5b04b931..f5ccc1b5b08 100644 --- a/src/test/modules/test_checksums/t/006_pgbench_single.pl +++ b/src/test/modules/test_checksums/t/006_pgbench_single.pl @@ -89,14 +89,21 @@ sub background_rw_pgbench # before and after state. sub flip_data_checksums { + my $temptablewait = 0; + # First, make sure the cluster is in the state we expect it to be test_checksum_state($node, $data_checksum_state); if ($data_checksum_state eq 'off') { # Coin-toss to see if we are injecting a retry due to a temptable - $node->safe_psql('postgres', 'SELECT dcw_fake_temptable();') - if cointoss(); + if (cointoss()) + { + $node->safe_psql('postgres', + "SELECT injection_points_attach('datachecksumsworker-fake-temptable-wait', 'notice');" + ); + $temptablewait = 1; + } # log LSN right before we start changing checksums my $result = @@ -117,7 +124,9 @@ sub flip_data_checksums random_sleep() if ($extended); - $node->safe_psql('postgres', 'SELECT dcw_fake_temptable(false);'); + $node->safe_psql('postgres', + "SELECT injection_points_detach('datachecksumsworker-fake-temptable-wait');" + ) if ($temptablewait); $data_checksum_state = 'on'; } elsif ($data_checksum_state eq 'on') @@ -165,6 +174,7 @@ log_statement = none ]); $node->start; $node->safe_psql('postgres', 'CREATE EXTENSION test_checksums;'); +$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;'); # Create some content to have un-checksummed data in the cluster $node->safe_psql('postgres', "CREATE TABLE t AS SELECT generate_series(1, 100000) AS a;"); diff --git a/src/test/modules/test_checksums/t/007_pgbench_standby.pl b/src/test/modules/test_checksums/t/007_pgbench_standby.pl index b0d40d24005..f3611e7ce25 100644 --- a/src/test/modules/test_checksums/t/007_pgbench_standby.pl +++ b/src/test/modules/test_checksums/t/007_pgbench_standby.pl @@ -99,6 +99,8 @@ sub background_pgbench # before and after state. sub flip_data_checksums { + my $temptablewait = 0; + # First, make sure the cluster is in the state we expect it to be test_checksum_state($node_primary, $data_checksum_state); test_checksum_state($node_standby, $data_checksum_state); @@ -106,8 +108,13 @@ sub flip_data_checksums if ($data_checksum_state eq 'off') { # Coin-toss to see if we are injecting a retry due to a temptable - $node_primary->safe_psql('postgres', 'SELECT dcw_fake_temptable();') - if cointoss(); + if (cointoss()) + { + $node_primary->safe_psql('postgres', + "SELECT injection_points_attach('datachecksumsworker-fake-temptable-wait', 'notice');" + ); + $temptablewait = 1; + } # log LSN right before we start changing checksums my $result = @@ -154,7 +161,8 @@ sub flip_data_checksums wait_for_checksum_state($node_standby, 'on'); $node_primary->safe_psql('postgres', - 'SELECT dcw_fake_temptable(false);'); + "SELECT injection_points_detach('datachecksumsworker-fake-temptable-wait');" + ) if ($temptablewait); $data_checksum_state = 'on'; } elsif ($data_checksum_state eq 'on') @@ -170,6 +178,7 @@ sub flip_data_checksums $node_primary->wait_for_catchup($node_standby, 'replay'); # Wait for checksums disabled on the primary and standby + random_sleep() if ($extended); wait_for_checksum_state($node_primary, 'off'); wait_for_checksum_state($node_standby, 'off'); @@ -178,9 +187,6 @@ sub flip_data_checksums $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); note("LSN after disabling: " . $result . "\n"); - random_sleep() if ($extended); - wait_for_checksum_state($node_standby, 'off'); - $data_checksum_state = 'off'; } else @@ -207,6 +213,7 @@ log_statement = none ]); $node_primary->start; $node_primary->safe_psql('postgres', 'CREATE EXTENSION test_checksums;'); +$node_primary->safe_psql('postgres', 'CREATE EXTENSION injection_points;'); # Create some content to have un-checksummed data in the cluster $node_primary->safe_psql('postgres', "CREATE TABLE t AS SELECT generate_series(1, 100000) AS a;"); diff --git a/src/test/modules/test_checksums/t/008_pitr.pl b/src/test/modules/test_checksums/t/008_pitr.pl index b9b89f414ab..e8cb2b0ed96 100644 --- a/src/test/modules/test_checksums/t/008_pitr.pl +++ b/src/test/modules/test_checksums/t/008_pitr.pl @@ -67,15 +67,15 @@ sub flip_data_checksums # log LSN right before we start changing checksums $lsn_pre = $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); + note("LSN before disabling: " . $lsn_pre . "\n"); - disable_data_checksums($node_primary); - - # Wait for checksums disabled on the primary - wait_for_checksum_state($node_primary, 'off'); + # Disable checksums on the primary and wait for completion + disable_data_checksums($node_primary, wait => 1); # log LSN right after the primary flips checksums to "off" $lsn_post = $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); + note("LSN after disabling: " . $lsn_post . "\n"); $data_checksum_state = 'off'; } diff --git a/src/test/modules/test_checksums/t/DataChecksums/Utils.pm b/src/test/modules/test_checksums/t/DataChecksums/Utils.pm index 9a2269e8a92..fb704623a60 100644 --- a/src/test/modules/test_checksums/t/DataChecksums/Utils.pm +++ b/src/test/modules/test_checksums/t/DataChecksums/Utils.pm @@ -175,8 +175,19 @@ EOQ $postgresnode->safe_psql('postgres', sprintf($query, $params{cost_delay}, $params{cost_limit})); - wait_for_checksum_state($postgresnode, $params{wait}) - if (defined($params{wait})); + if (defined($params{wait})) + { + wait_for_checksum_state($postgresnode, $params{wait}); + # If we are tasked with waiting for an end state, also wait for the + # launcher to exit. + if ($params{wait} eq 'on' || $params{wait} eq 'off') + { + $postgresnode->poll_query_until('postgres', + "SELECT count(*) = 0 " + . "FROM pg_catalog.pg_stat_activity " + . "WHERE backend_type = 'datachecksum launcher';"); + } + } } =item disable_data_checksums($node, %params) @@ -204,7 +215,14 @@ sub disable_data_checksums $postgresnode->safe_psql('postgres', 'SELECT pg_disable_data_checksums();'); - wait_for_checksum_state($postgresnode, 'off') if (defined($params{wait})); + if (defined($params{wait})) + { + wait_for_checksum_state($postgresnode, 'off'); + $postgresnode->poll_query_until('postgres', + "SELECT count(*) = 0 " + . "FROM pg_catalog.pg_stat_activity " + . "WHERE backend_type = 'datachecksum launcher';"); + } } =item cointoss diff --git a/src/test/modules/test_checksums/test_checksums--1.0.sql b/src/test/modules/test_checksums/test_checksums--1.0.sql index 90642d247fa..c674fee02bb 100644 --- a/src/test/modules/test_checksums/test_checksums--1.0.sql +++ b/src/test/modules/test_checksums/test_checksums--1.0.sql @@ -14,11 +14,3 @@ CREATE FUNCTION dcw_inject_launcher_delay(attach boolean DEFAULT true) CREATE FUNCTION dcw_inject_startup_delay(attach boolean DEFAULT true) RETURNS pg_catalog.void AS 'MODULE_PATHNAME' LANGUAGE C; - -CREATE FUNCTION dcw_inject_fail_database(attach boolean DEFAULT true) - RETURNS pg_catalog.void - AS 'MODULE_PATHNAME' LANGUAGE C; - -CREATE FUNCTION dcw_fake_temptable(attach boolean DEFAULT true) - RETURNS pg_catalog.void - AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/src/test/modules/test_checksums/test_checksums.c b/src/test/modules/test_checksums/test_checksums.c index e2b91f9c78c..c2eabc2821c 100644 --- a/src/test/modules/test_checksums/test_checksums.c +++ b/src/test/modules/test_checksums/test_checksums.c @@ -25,8 +25,6 @@ extern PGDLLEXPORT void dc_delay_barrier(const char *name, const void *private_d extern PGDLLEXPORT void dc_modify_db_result(const char *name, const void *private_data, void *arg); extern PGDLLEXPORT void dc_fake_temptable(const char *name, const void *private_data, void *arg); -extern PGDLLEXPORT void crash(const char *name, const void *private_data, void *arg); - /* * Test for delaying emission of procsignalbarriers. */ @@ -107,80 +105,3 @@ dcw_inject_startup_delay(PG_FUNCTION_ARGS) #endif PG_RETURN_VOID(); } - -#ifdef USE_INJECTION_POINTS -static uint32 db_fail = DATACHECKSUMSWORKER_FAILED; -#endif - -void -dc_modify_db_result(const char *name, const void *private_data, void *arg) -{ - DataChecksumsWorkerResult *res = (DataChecksumsWorkerResult *) arg; - uint32 new_res = *(uint32 *) private_data; - - *res = new_res; -} - -PG_FUNCTION_INFO_V1(dcw_inject_fail_database); -Datum -dcw_inject_fail_database(PG_FUNCTION_ARGS) -{ -#ifdef USE_INJECTION_POINTS - bool attach = PG_GETARG_BOOL(0); - - if (attach) - InjectionPointAttach("datachecksumsworker-modify-db-result", - "test_checksums", - "dc_modify_db_result", - &db_fail, - sizeof(uint32)); - else - InjectionPointDetach("datachecksumsworker-modify-db-result"); -#else - elog(ERROR, - "test is not working as intended when injection points are disabled"); -#endif - PG_RETURN_VOID(); -} - -/* - * Test to force waiting for existing temptables. - */ -void -dc_fake_temptable(const char *name, const void *private_data, void *arg) -{ - static bool first_pass = true; - int *numleft = (int *) arg; - - if (first_pass) - *numleft = 1; - first_pass = false; -} - -PG_FUNCTION_INFO_V1(dcw_fake_temptable); -Datum -dcw_fake_temptable(PG_FUNCTION_ARGS) -{ -#ifdef USE_INJECTION_POINTS - bool attach = PG_GETARG_BOOL(0); - - if (attach) - InjectionPointAttach("datachecksumsworker-fake-temptable-wait", - "test_checksums", - "dc_fake_temptable", - NULL, - 0); - else - InjectionPointDetach("datachecksumsworker-fake-temptable-wait"); -#else - elog(ERROR, - "test is not working as intended when injection points are disabled"); -#endif - PG_RETURN_VOID(); -} - -void -crash(const char *name, const void *private_data, void *arg) -{ - abort(); -}