Fix WAIT FOR LSN cleanup on subtransaction abort

WAIT FOR LSN registers the current backend in shared memory before entering an
interruptible wait loop.  Top-level abort and backend exit already call
WaitLSNCleanup(), but subtransaction abort did not.  If an interrupt, such as
statement_timeout, occurred while waiting inside a savepoint, rolling back to
the savepoint left the backend marked as present in the WAIT FOR LSN heap.

Clean up WAIT FOR LSN state from AbortSubTransaction() as well, and add
a TAP test covering reuse of WAIT FOR LSN after a savepoint rollback.

Reported-by: Ayush Tiwari <ayushtiwari.slg01@gmail.com>
Discussion: https://postgr.es/m/CAJTYsWXDRwo-RVRaQgwxVcXgURVFeX8BKnijQrPiPcSCkDDX9A%40mail.gmail.com
Author: Ayush Tiwari <ayushtiwari.slg01@gmail.com>
Author: Xuneng Zhou <xunengzhou@gmail.com>
Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
This commit is contained in:
Alexander Korotkov 2026-05-06 13:32:18 +03:00
parent 486b9a9b9e
commit 5cdec42319
3 changed files with 54 additions and 1 deletions

View file

@ -5289,6 +5289,11 @@ AbortSubTransaction(void)
*/
LWLockReleaseAll();
/*
* Cleanup waiting for LSN if any.
*/
WaitLSNCleanup();
pgstat_report_wait_end();
pgstat_progress_end_command();

View file

@ -360,7 +360,7 @@ WaitLSNWakeup(WaitLSNType lsnType, XLogRecPtr currentLSN)
}
/*
* Clean up LSN waiters for exiting process
* Clean up any LSN wait state for the current process.
*/
void
WaitLSNCleanup(void)

View file

@ -213,6 +213,54 @@ $output = $node_standby->safe_psql(
WAIT FOR LSN '${lsn3}' WITH (timeout '10ms', no_throw);]);
ok($output eq "timeout", "WAIT FOR returns correct status after timeout");
# 4a. Check that aborting a subtransaction during WAIT FOR LSN cleans up the
# shared wait-state. Poll pg_stat_activity before canceling the first WAIT
# FOR to ensure that the backend has registered itself in the waiters heap.
# After rolling back to the savepoint, a second WAIT FOR in the same backend
# must be able to register itself again.
my $subxact_lsn = $node_primary->safe_psql('postgres',
"SELECT pg_current_wal_insert_lsn() + 10000000000");
my $subxact_appname = 'wait_for_lsn_subxact_cleanup';
my $subxact_session =
$node_primary->background_psql('postgres', on_error_stop => 0);
$subxact_session->query_until(
qr/start/, qq[
SET application_name = '$subxact_appname';
BEGIN;
SAVEPOINT wait_cleanup;
\\echo start
WAIT FOR LSN '${subxact_lsn}' WITH (MODE 'primary_flush');
ROLLBACK TO wait_cleanup;
WAIT FOR LSN '${subxact_lsn}'
WITH (MODE 'primary_flush', timeout '10ms', no_throw);
COMMIT;
]);
$node_primary->poll_query_until(
'postgres',
"SELECT count(*) = 1 FROM pg_stat_activity
WHERE application_name = '$subxact_appname'
AND wait_event = 'WaitForWalFlush'"
) or die "WAIT FOR LSN did not enter the primary_flush wait path";
my $subxact_cancelled = $node_primary->safe_psql(
'postgres',
"SELECT pg_cancel_backend(pid) FROM pg_stat_activity
WHERE application_name = '$subxact_appname'
AND wait_event = 'WaitForWalFlush'"
);
is($subxact_cancelled, 't', "canceled WAIT FOR LSN in subtransaction");
$subxact_session->quit;
chomp($subxact_session->{stdout});
like(
$subxact_session->{stderr},
qr/canceling statement due to user request/,
"query cancel interrupted WAIT FOR LSN in subtransaction");
is($subxact_session->{stdout},
"timeout", "second WAIT FOR LSN timed out after savepoint rollback");
unlike(
$subxact_session->{stderr},
qr/server closed the connection unexpectedly/,
"WAIT FOR LSN after savepoint rollback did not disconnect");
# 5. Check mode validation: standby modes error on primary, primary mode errors
# on standby, and primary_flush works on primary. Also check that WAIT FOR
# triggers an error if called within a function, procedure, anonymous DO block,