mirror of
https://github.com/postgres/postgres.git
synced 2026-05-28 04:35:45 -04:00
Wake standby_write/standby_flush waiters from the WAL replay loop
The startup process only woke STANDBY_REPLAY waiters after replaying each WAL record. STANDBY_WRITE and STANDBY_FLUSH waiters depended only on walreceiver write/flush callbacks. As a result, replay progress alone did not wake those waiters, and in pure archive recovery (where no walreceiver exists) they could sleep until timeout. Fix by also calling WaitLSNWakeup() for STANDBY_WRITE and STANDBY_FLUSH after each replay. For the replay-floor semantics used by GetCurrentLSNForWaitType(), replay progress is a valid lower bound for both modes: WAL cannot be replayed unless it has already been written and flushed locally. This works together with the replay-position floor in GetCurrentLSNForWaitType(). The getter ensures that a waiter woken by replay can recheck successfully; the replay-side wakeups ensure that a waiter already asleep is notified when replay reaches its target. Reported-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/1957514.1775526774%40sss.pgh.pa.us Author: Xuneng Zhou <xunengzhou@gmail.com> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
This commit is contained in:
parent
cba67b5b87
commit
e7cd592174
2 changed files with 72 additions and 2 deletions
|
|
@ -1782,11 +1782,17 @@ PerformWalRecovery(void)
|
|||
ApplyWalRecord(xlogreader, record, &replayTLI);
|
||||
|
||||
/*
|
||||
* Wake up processes waiting for standby replay LSN to reach
|
||||
* current replay position.
|
||||
* Wake up processes waiting for standby replay, write, or flush
|
||||
* LSN to reach current replay position. Replay implies that the
|
||||
* WAL was already written and flushed to disk, so write and flush
|
||||
* waiters can be woken at the replay position too.
|
||||
*/
|
||||
WaitLSNWakeup(WAIT_LSN_TYPE_STANDBY_REPLAY,
|
||||
XLogRecoveryCtl->lastReplayedEndRecPtr);
|
||||
WaitLSNWakeup(WAIT_LSN_TYPE_STANDBY_WRITE,
|
||||
XLogRecoveryCtl->lastReplayedEndRecPtr);
|
||||
WaitLSNWakeup(WAIT_LSN_TYPE_STANDBY_FLUSH,
|
||||
XLogRecoveryCtl->lastReplayedEndRecPtr);
|
||||
|
||||
/* Exit loop if we reached inclusive recovery target */
|
||||
if (recoveryStopsAfter(xlogreader))
|
||||
|
|
|
|||
|
|
@ -744,6 +744,70 @@ $output = $arc_standby->safe_psql(
|
|||
ok($output eq "success",
|
||||
"standby_flush succeeds on archive-only standby (getter fallback)");
|
||||
|
||||
# 9b. Replay waker: standby_write/standby_flush waiters that go to sleep
|
||||
# (target > replay at entry) are woken when replay catches up. This tests
|
||||
# that PerformWalRecovery() calls WaitLSNWakeup for STANDBY_WRITE and
|
||||
# STANDBY_FLUSH, not just STANDBY_REPLAY.
|
||||
#
|
||||
# Pause replay, archive more WAL, start background waiters, then resume
|
||||
# replay and verify the waiters complete.
|
||||
|
||||
$arc_standby->safe_psql('postgres', "SELECT pg_wal_replay_pause()");
|
||||
|
||||
# Generate more WAL and archive it.
|
||||
$arc_primary->safe_psql('postgres',
|
||||
"INSERT INTO arc_test VALUES (generate_series(21, 30))");
|
||||
my $arc_target_lsn2 =
|
||||
$arc_primary->safe_psql('postgres', "SELECT pg_current_wal_insert_lsn()");
|
||||
|
||||
my $arc_segment2 = $arc_primary->safe_psql('postgres',
|
||||
"SELECT pg_walfile_name(pg_current_wal_lsn())");
|
||||
$arc_primary->safe_psql('postgres', "SELECT pg_switch_wal()");
|
||||
$arc_primary->poll_query_until('postgres',
|
||||
qq{SELECT last_archived_wal >= '$arc_segment2' FROM pg_stat_archiver},
|
||||
't')
|
||||
or die "Timed out waiting for WAL archiving on arc_primary (round 2)";
|
||||
|
||||
# Start background waiters. With replay paused, target > replay, so they
|
||||
# will sleep on WaitLatch. They can only be woken by the replay-loop
|
||||
# WaitLSNWakeup calls.
|
||||
my $arc_write_session = $arc_standby->background_psql('postgres');
|
||||
$arc_write_session->query_until(
|
||||
qr/start/, qq[
|
||||
\\echo start
|
||||
WAIT FOR LSN '${arc_target_lsn2}'
|
||||
WITH (MODE 'standby_write', timeout '1d', no_throw);
|
||||
]);
|
||||
|
||||
my $arc_flush_session = $arc_standby->background_psql('postgres');
|
||||
$arc_flush_session->query_until(
|
||||
qr/start/, qq[
|
||||
\\echo start
|
||||
WAIT FOR LSN '${arc_target_lsn2}'
|
||||
WITH (MODE 'standby_flush', timeout '1d', no_throw);
|
||||
]);
|
||||
|
||||
# Verify both waiters are blocked.
|
||||
$arc_standby->poll_query_until('postgres',
|
||||
"SELECT count(*) = 2 FROM pg_stat_activity WHERE wait_event LIKE 'WaitForWal%'"
|
||||
) or die "Timed out waiting for arc_standby waiters to block";
|
||||
|
||||
# Resume replay. The startup process should wake the STANDBY_WRITE and
|
||||
# STANDBY_FLUSH waiters as it replays past arc_target_lsn2.
|
||||
$arc_standby->safe_psql('postgres', "SELECT pg_wal_replay_resume()");
|
||||
|
||||
$arc_write_session->quit;
|
||||
$arc_flush_session->quit;
|
||||
chomp($arc_write_session->{stdout});
|
||||
chomp($arc_flush_session->{stdout});
|
||||
|
||||
is($arc_write_session->{stdout},
|
||||
'success',
|
||||
"standby_write waiter woken by replay on archive-only standby");
|
||||
is($arc_flush_session->{stdout},
|
||||
'success',
|
||||
"standby_flush waiter woken by replay on archive-only standby");
|
||||
|
||||
$arc_standby->stop;
|
||||
$arc_primary->stop;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue