Add errdetail() with PID and UID about source of termination signal.

When a backend is terminated via pg_terminate_backend() or an external
SIGTERM, the error message now includes the sender's PID and UID as
errdetail, making it easier to identify the source of unexpected
terminations in multi-user environments.

On platforms that support SA_SIGINFO (Linux, FreeBSD, and most modern
Unix systems), the signal handler captures si_pid and si_uid from the
siginfo_t structure.  On platforms without SA_SIGINFO, the detail is
simply omitted.

Author: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reviewed-by: Andrew Dunstan <andrew@dunslane.net>
Reviewed-by: Chao Li <1356863904@qq.com>
Discussion: https://postgr.es/m/CAKZiRmyrOWovZSdixpLd3PGMQXuQL_zw2Ght5XhHCkQ1uDsxjw@mail.gmail.com
This commit is contained in:
Andrew Dunstan 2026-04-06 12:39:14 -04:00
parent c10edb102a
commit 55890a9194
10 changed files with 146 additions and 15 deletions

42
configure vendored
View file

@ -15797,6 +15797,48 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then
fi fi
fi fi
# Check for SA_SIGINFO extended signal handler availability
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SA_SIGINFO" >&5
$as_echo_n "checking for SA_SIGINFO... " >&6; }
if ${ac_cv_have_sa_siginfo+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <signal.h>
#include <stddef.h>
int
main ()
{
struct sigaction sa;
sa.sa_flags = SA_SIGINFO;
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
ac_cv_have_sa_siginfo=yes
else
ac_cv_have_sa_siginfo=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sa_siginfo" >&5
$as_echo "$ac_cv_have_sa_siginfo" >&6; }
if test "x$ac_cv_have_sa_siginfo" = "xyes"; then
$as_echo "#define HAVE_SA_SIGINFO 1" >>confdefs.h
fi
## ##
## Functions, global variables ## Functions, global variables

View file

@ -1817,6 +1817,24 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then
fi fi
fi fi
# Check for SA_SIGINFO extended signal handler availability
AC_CACHE_CHECK([for SA_SIGINFO], [ac_cv_have_sa_siginfo], [
AC_COMPILE_IFELSE([
AC_LANG_PROGRAM([[
#include <signal.h>
#include <stddef.h>
]], [[
struct sigaction sa;
sa.sa_flags = SA_SIGINFO;
]])
],
[ac_cv_have_sa_siginfo=yes],
[ac_cv_have_sa_siginfo=no])
])
if test "x$ac_cv_have_sa_siginfo" = "xyes"; then
AC_DEFINE([HAVE_SA_SIGINFO], 1, [Define to 1 if you have SA_SIGINFO available.])
fi
## ##
## Functions, global variables ## Functions, global variables

View file

@ -2985,6 +2985,10 @@ if cc.has_member('struct sockaddr', 'sa_len',
cdata.set('HAVE_STRUCT_SOCKADDR_SA_LEN', 1) cdata.set('HAVE_STRUCT_SOCKADDR_SA_LEN', 1)
endif endif
if cc.has_header_symbol('signal.h', 'SA_SIGINFO')
cdata.set('HAVE_SA_SIGINFO', 1)
endif
if cc.has_member('struct tm', 'tm_zone', if cc.has_member('struct tm', 'tm_zone',
args: test_c_args, include_directories: postgres_inc, args: test_c_args, include_directories: postgres_inc,
prefix: ''' prefix: '''

View file

@ -300,10 +300,22 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
*/ */
if (ProcDiePending) if (ProcDiePending)
{ {
/*
* ProcDieSenderPid/Uid are read directly from the globals here
* rather than copied to locals first; a second SIGTERM could
* change them between reads, but that is harmless because the
* process is about to die anyway. The signal sender detail is
* inlined rather than using a separate errdetail() call because
* it must be appended to the existing detail message.
*/
ereport(WARNING, ereport(WARNING,
(errcode(ERRCODE_ADMIN_SHUTDOWN), (errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"), errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
errdetail("The transaction has already committed locally, but might not have been replicated to the standby."))); errdetail("The transaction has already committed locally, but might not have been replicated to the standby.%s",
ProcDieSenderPid == 0 ? "" :
psprintf("\nSignal sent by PID %d, UID %d.",
(int) ProcDieSenderPid,
(int) ProcDieSenderUid))));
whereToSendOutput = DestNone; whereToSendOutput = DestNone;
SyncRepCancelWait(); SyncRepCancelWait();
break; break;

View file

@ -109,6 +109,14 @@ int client_connection_check_interval = 0;
/* flags for non-system relation kinds to restrict use */ /* flags for non-system relation kinds to restrict use */
int restrict_nonsystem_relation_kind; int restrict_nonsystem_relation_kind;
/*
* Include signal sender PID/UID as errdetail when available (SA_SIGINFO).
* The caller must supply the (already-captured) pid and uid values.
*/
#define ERRDETAIL_SIGNAL_SENDER(pid, uid) \
((pid) == 0 ? 0 : \
errdetail("Signal sent by PID %d, UID %d.", (int) (pid), (int) (uid)))
/* ---------------- /* ----------------
* private typedefs etc * private typedefs etc
* ---------------- * ----------------
@ -3347,7 +3355,12 @@ ProcessInterrupts(void)
if (ProcDiePending) if (ProcDiePending)
{ {
int sender_pid = ProcDieSenderPid;
int sender_uid = ProcDieSenderUid;
ProcDiePending = false; ProcDiePending = false;
ProcDieSenderPid = 0;
ProcDieSenderUid = 0;
QueryCancelPending = false; /* ProcDie trumps QueryCancel */ QueryCancelPending = false; /* ProcDie trumps QueryCancel */
LockErrorCleanup(); LockErrorCleanup();
/* As in quickdie, don't risk sending to client during auth */ /* As in quickdie, don't risk sending to client during auth */
@ -3360,15 +3373,18 @@ ProcessInterrupts(void)
else if (AmAutoVacuumWorkerProcess()) else if (AmAutoVacuumWorkerProcess())
ereport(FATAL, ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN), (errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating autovacuum process due to administrator command"))); errmsg("terminating autovacuum process due to administrator command"),
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
else if (IsLogicalWorker()) else if (IsLogicalWorker())
ereport(FATAL, ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN), (errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating logical replication worker due to administrator command"))); errmsg("terminating logical replication worker due to administrator command"),
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
else if (IsLogicalLauncher()) else if (IsLogicalLauncher())
{ {
ereport(DEBUG1, ereport(DEBUG1,
(errmsg_internal("logical replication launcher shutting down"))); (errmsg_internal("logical replication launcher shutting down"),
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
/* /*
* The logical replication launcher can be stopped at any time. * The logical replication launcher can be stopped at any time.
@ -3379,23 +3395,27 @@ ProcessInterrupts(void)
else if (AmWalReceiverProcess()) else if (AmWalReceiverProcess())
ereport(FATAL, ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN), (errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating walreceiver process due to administrator command"))); errmsg("terminating walreceiver process due to administrator command"),
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
else if (AmBackgroundWorkerProcess()) else if (AmBackgroundWorkerProcess())
ereport(FATAL, ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN), (errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating background worker \"%s\" due to administrator command", errmsg("terminating background worker \"%s\" due to administrator command",
MyBgworkerEntry->bgw_type))); MyBgworkerEntry->bgw_type),
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
else if (AmIoWorkerProcess()) else if (AmIoWorkerProcess())
{ {
ereport(DEBUG1, ereport(DEBUG1,
(errmsg_internal("io worker shutting down due to administrator command"))); (errmsg_internal("io worker shutting down due to administrator command"),
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
proc_exit(0); proc_exit(0);
} }
else else
ereport(FATAL, ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN), (errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating connection due to administrator command"))); errmsg("terminating connection due to administrator command"),
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
} }
if (CheckClientConnectionPending) if (CheckClientConnectionPending)

View file

@ -43,6 +43,8 @@ volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false;
volatile uint32 InterruptHoldoffCount = 0; volatile uint32 InterruptHoldoffCount = 0;
volatile uint32 QueryCancelHoldoffCount = 0; volatile uint32 QueryCancelHoldoffCount = 0;
volatile uint32 CritSectionCount = 0; volatile uint32 CritSectionCount = 0;
volatile int ProcDieSenderPid = 0;
volatile int ProcDieSenderUid = 0;
int MyProcPid; int MyProcPid;
pg_time_t MyStartTime; pg_time_t MyStartTime;

View file

@ -142,12 +142,11 @@ my ($ret, $out, $err) = $node->psql('postgres',
is($ret, 2, 'server crash: psql exit code'); is($ret, 2, 'server crash: psql exit code');
like($out, qr/before/, 'server crash: output before crash'); like($out, qr/before/, 'server crash: output before crash');
unlike($out, qr/AFTER/, 'server crash: no output after crash'); unlike($out, qr/AFTER/, 'server crash: no output after crash');
is( $err, like( $err, qr/psql:<stdin>:2: FATAL: terminating connection due to administrator command
'psql:<stdin>:2: FATAL: terminating connection due to administrator command (?:DETAIL: Signal sent by PID \d+, UID \d+\.\n)?psql:<stdin>:2: server closed the connection unexpectedly
psql:<stdin>:2: server closed the connection unexpectedly
This probably means the server terminated abnormally This probably means the server terminated abnormally
before or while processing the request. before or while processing the request.
psql:<stdin>:2: error: connection to server was lost', psql:<stdin>:2: error: connection to server was lost/,
'server crash: error message'); 'server crash: error message');
# test \errverbose # test \errverbose

View file

@ -90,6 +90,8 @@
extern PGDLLIMPORT volatile sig_atomic_t InterruptPending; extern PGDLLIMPORT volatile sig_atomic_t InterruptPending;
extern PGDLLIMPORT volatile sig_atomic_t QueryCancelPending; extern PGDLLIMPORT volatile sig_atomic_t QueryCancelPending;
extern PGDLLIMPORT volatile sig_atomic_t ProcDiePending; extern PGDLLIMPORT volatile sig_atomic_t ProcDiePending;
extern PGDLLIMPORT volatile int ProcDieSenderPid;
extern PGDLLIMPORT volatile int ProcDieSenderUid;
extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending;
extern PGDLLIMPORT volatile sig_atomic_t TransactionTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t TransactionTimeoutPending;
extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;

View file

@ -354,6 +354,9 @@
/* Define to 1 if you have the `rl_variable_bind' function. */ /* Define to 1 if you have the `rl_variable_bind' function. */
#undef HAVE_RL_VARIABLE_BIND #undef HAVE_RL_VARIABLE_BIND
/* Define to 1 if you have SA_SIGINFO available. */
#undef HAVE_SA_SIGINFO
/* Define to 1 if you have the <security/pam_appl.h> header file. */ /* Define to 1 if you have the <security/pam_appl.h> header file. */
#undef HAVE_SECURITY_PAM_APPL_H #undef HAVE_SECURITY_PAM_APPL_H

View file

@ -82,10 +82,19 @@ static volatile pqsigfunc pqsignal_handlers[PG_NSIG];
* *
* This wrapper also handles restoring the value of errno. * This wrapper also handles restoring the value of errno.
*/ */
#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
static void
wrapper_handler(int signo, siginfo_t * info, void *context)
#else
static void static void
wrapper_handler(SIGNAL_ARGS) wrapper_handler(SIGNAL_ARGS)
#endif
{ {
int save_errno = errno; int save_errno = errno;
#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
/* SA_SIGINFO signature uses signo, not SIGNAL_ARGS macro */
int postgres_signal_arg = signo;
#endif
Assert(postgres_signal_arg > 0); Assert(postgres_signal_arg > 0);
Assert(postgres_signal_arg < PG_NSIG); Assert(postgres_signal_arg < PG_NSIG);
@ -105,6 +114,14 @@ wrapper_handler(SIGNAL_ARGS)
raise(postgres_signal_arg); raise(postgres_signal_arg);
return; return;
} }
#ifdef HAVE_SA_SIGINFO
if (signo == SIGTERM && info)
{
ProcDieSenderPid = info->si_pid;
ProcDieSenderUid = info->si_uid;
}
#endif
#endif #endif
(*pqsignal_handlers[postgres_signal_arg]) (postgres_signal_arg); (*pqsignal_handlers[postgres_signal_arg]) (postgres_signal_arg);
@ -125,6 +142,7 @@ pqsignal(int signo, pqsigfunc func)
#if !(defined(WIN32) && defined(FRONTEND)) #if !(defined(WIN32) && defined(FRONTEND))
struct sigaction act; struct sigaction act;
#endif #endif
bool use_wrapper = false;
Assert(signo > 0); Assert(signo > 0);
Assert(signo < PG_NSIG); Assert(signo < PG_NSIG);
@ -132,13 +150,24 @@ pqsignal(int signo, pqsigfunc func)
if (func != SIG_IGN && func != SIG_DFL) if (func != SIG_IGN && func != SIG_DFL)
{ {
pqsignal_handlers[signo] = func; /* assumed atomic */ pqsignal_handlers[signo] = func; /* assumed atomic */
func = wrapper_handler; use_wrapper = true;
} }
#if !(defined(WIN32) && defined(FRONTEND)) #if !(defined(WIN32) && defined(FRONTEND))
act.sa_handler = func;
sigemptyset(&act.sa_mask); sigemptyset(&act.sa_mask);
act.sa_flags = SA_RESTART; act.sa_flags = SA_RESTART;
#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
if (use_wrapper)
{
act.sa_sigaction = wrapper_handler;
act.sa_flags |= SA_SIGINFO;
}
else
act.sa_handler = func;
#else
act.sa_handler = use_wrapper ? wrapper_handler : func;
#endif
#ifdef SA_NOCLDSTOP #ifdef SA_NOCLDSTOP
if (signo == SIGCHLD) if (signo == SIGCHLD)
act.sa_flags |= SA_NOCLDSTOP; act.sa_flags |= SA_NOCLDSTOP;
@ -147,7 +176,7 @@ pqsignal(int signo, pqsigfunc func)
Assert(false); /* probably indicates coding error */ Assert(false); /* probably indicates coding error */
#else #else
/* Forward to Windows native signal system. */ /* Forward to Windows native signal system. */
if (signal(signo, func) == SIG_ERR) if (signal(signo, use_wrapper ? wrapper_handler : func) == SIG_ERR)
Assert(false); /* probably indicates coding error */ Assert(false); /* probably indicates coding error */
#endif #endif
} }