mirror of
https://github.com/postgres/postgres.git
synced 2026-06-11 09:40:25 -04:00
Add errdetail() with PID and UID about source of termination signal.
When a backend is terminated via pg_terminate_backend() or an external SIGTERM, the error message now includes the sender's PID and UID as errdetail, making it easier to identify the source of unexpected terminations in multi-user environments. On platforms that support SA_SIGINFO (Linux, FreeBSD, and most modern Unix systems), the signal handler captures si_pid and si_uid from the siginfo_t structure. On platforms without SA_SIGINFO, the detail is simply omitted. Author: Jakub Wartak <jakub.wartak@enterprisedb.com> Reviewed-by: Andrew Dunstan <andrew@dunslane.net> Reviewed-by: Chao Li <1356863904@qq.com> Discussion: https://postgr.es/m/CAKZiRmyrOWovZSdixpLd3PGMQXuQL_zw2Ght5XhHCkQ1uDsxjw@mail.gmail.com
This commit is contained in:
parent
c10edb102a
commit
55890a9194
10 changed files with 146 additions and 15 deletions
42
configure
vendored
42
configure
vendored
|
|
@ -15797,6 +15797,48 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Check for SA_SIGINFO extended signal handler availability
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SA_SIGINFO" >&5
|
||||||
|
$as_echo_n "checking for SA_SIGINFO... " >&6; }
|
||||||
|
if ${ac_cv_have_sa_siginfo+:} false; then :
|
||||||
|
$as_echo_n "(cached) " >&6
|
||||||
|
else
|
||||||
|
|
||||||
|
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||||
|
/* end confdefs.h. */
|
||||||
|
|
||||||
|
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
|
||||||
|
struct sigaction sa;
|
||||||
|
sa.sa_flags = SA_SIGINFO;
|
||||||
|
|
||||||
|
;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
_ACEOF
|
||||||
|
if ac_fn_c_try_compile "$LINENO"; then :
|
||||||
|
ac_cv_have_sa_siginfo=yes
|
||||||
|
else
|
||||||
|
ac_cv_have_sa_siginfo=no
|
||||||
|
fi
|
||||||
|
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||||
|
|
||||||
|
fi
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sa_siginfo" >&5
|
||||||
|
$as_echo "$ac_cv_have_sa_siginfo" >&6; }
|
||||||
|
|
||||||
|
if test "x$ac_cv_have_sa_siginfo" = "xyes"; then
|
||||||
|
|
||||||
|
$as_echo "#define HAVE_SA_SIGINFO 1" >>confdefs.h
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
##
|
##
|
||||||
## Functions, global variables
|
## Functions, global variables
|
||||||
|
|
|
||||||
18
configure.ac
18
configure.ac
|
|
@ -1817,6 +1817,24 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Check for SA_SIGINFO extended signal handler availability
|
||||||
|
AC_CACHE_CHECK([for SA_SIGINFO], [ac_cv_have_sa_siginfo], [
|
||||||
|
AC_COMPILE_IFELSE([
|
||||||
|
AC_LANG_PROGRAM([[
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
]], [[
|
||||||
|
struct sigaction sa;
|
||||||
|
sa.sa_flags = SA_SIGINFO;
|
||||||
|
]])
|
||||||
|
],
|
||||||
|
[ac_cv_have_sa_siginfo=yes],
|
||||||
|
[ac_cv_have_sa_siginfo=no])
|
||||||
|
])
|
||||||
|
|
||||||
|
if test "x$ac_cv_have_sa_siginfo" = "xyes"; then
|
||||||
|
AC_DEFINE([HAVE_SA_SIGINFO], 1, [Define to 1 if you have SA_SIGINFO available.])
|
||||||
|
fi
|
||||||
|
|
||||||
##
|
##
|
||||||
## Functions, global variables
|
## Functions, global variables
|
||||||
|
|
|
||||||
|
|
@ -2985,6 +2985,10 @@ if cc.has_member('struct sockaddr', 'sa_len',
|
||||||
cdata.set('HAVE_STRUCT_SOCKADDR_SA_LEN', 1)
|
cdata.set('HAVE_STRUCT_SOCKADDR_SA_LEN', 1)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
if cc.has_header_symbol('signal.h', 'SA_SIGINFO')
|
||||||
|
cdata.set('HAVE_SA_SIGINFO', 1)
|
||||||
|
endif
|
||||||
|
|
||||||
if cc.has_member('struct tm', 'tm_zone',
|
if cc.has_member('struct tm', 'tm_zone',
|
||||||
args: test_c_args, include_directories: postgres_inc,
|
args: test_c_args, include_directories: postgres_inc,
|
||||||
prefix: '''
|
prefix: '''
|
||||||
|
|
|
||||||
|
|
@ -300,10 +300,22 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
|
||||||
*/
|
*/
|
||||||
if (ProcDiePending)
|
if (ProcDiePending)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* ProcDieSenderPid/Uid are read directly from the globals here
|
||||||
|
* rather than copied to locals first; a second SIGTERM could
|
||||||
|
* change them between reads, but that is harmless because the
|
||||||
|
* process is about to die anyway. The signal sender detail is
|
||||||
|
* inlined rather than using a separate errdetail() call because
|
||||||
|
* it must be appended to the existing detail message.
|
||||||
|
*/
|
||||||
ereport(WARNING,
|
ereport(WARNING,
|
||||||
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
||||||
errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
|
errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
|
||||||
errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
|
errdetail("The transaction has already committed locally, but might not have been replicated to the standby.%s",
|
||||||
|
ProcDieSenderPid == 0 ? "" :
|
||||||
|
psprintf("\nSignal sent by PID %d, UID %d.",
|
||||||
|
(int) ProcDieSenderPid,
|
||||||
|
(int) ProcDieSenderUid))));
|
||||||
whereToSendOutput = DestNone;
|
whereToSendOutput = DestNone;
|
||||||
SyncRepCancelWait();
|
SyncRepCancelWait();
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
|
|
@ -109,6 +109,14 @@ int client_connection_check_interval = 0;
|
||||||
/* flags for non-system relation kinds to restrict use */
|
/* flags for non-system relation kinds to restrict use */
|
||||||
int restrict_nonsystem_relation_kind;
|
int restrict_nonsystem_relation_kind;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Include signal sender PID/UID as errdetail when available (SA_SIGINFO).
|
||||||
|
* The caller must supply the (already-captured) pid and uid values.
|
||||||
|
*/
|
||||||
|
#define ERRDETAIL_SIGNAL_SENDER(pid, uid) \
|
||||||
|
((pid) == 0 ? 0 : \
|
||||||
|
errdetail("Signal sent by PID %d, UID %d.", (int) (pid), (int) (uid)))
|
||||||
|
|
||||||
/* ----------------
|
/* ----------------
|
||||||
* private typedefs etc
|
* private typedefs etc
|
||||||
* ----------------
|
* ----------------
|
||||||
|
|
@ -3347,7 +3355,12 @@ ProcessInterrupts(void)
|
||||||
|
|
||||||
if (ProcDiePending)
|
if (ProcDiePending)
|
||||||
{
|
{
|
||||||
|
int sender_pid = ProcDieSenderPid;
|
||||||
|
int sender_uid = ProcDieSenderUid;
|
||||||
|
|
||||||
ProcDiePending = false;
|
ProcDiePending = false;
|
||||||
|
ProcDieSenderPid = 0;
|
||||||
|
ProcDieSenderUid = 0;
|
||||||
QueryCancelPending = false; /* ProcDie trumps QueryCancel */
|
QueryCancelPending = false; /* ProcDie trumps QueryCancel */
|
||||||
LockErrorCleanup();
|
LockErrorCleanup();
|
||||||
/* As in quickdie, don't risk sending to client during auth */
|
/* As in quickdie, don't risk sending to client during auth */
|
||||||
|
|
@ -3360,15 +3373,18 @@ ProcessInterrupts(void)
|
||||||
else if (AmAutoVacuumWorkerProcess())
|
else if (AmAutoVacuumWorkerProcess())
|
||||||
ereport(FATAL,
|
ereport(FATAL,
|
||||||
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
||||||
errmsg("terminating autovacuum process due to administrator command")));
|
errmsg("terminating autovacuum process due to administrator command"),
|
||||||
|
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
|
||||||
else if (IsLogicalWorker())
|
else if (IsLogicalWorker())
|
||||||
ereport(FATAL,
|
ereport(FATAL,
|
||||||
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
||||||
errmsg("terminating logical replication worker due to administrator command")));
|
errmsg("terminating logical replication worker due to administrator command"),
|
||||||
|
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
|
||||||
else if (IsLogicalLauncher())
|
else if (IsLogicalLauncher())
|
||||||
{
|
{
|
||||||
ereport(DEBUG1,
|
ereport(DEBUG1,
|
||||||
(errmsg_internal("logical replication launcher shutting down")));
|
(errmsg_internal("logical replication launcher shutting down"),
|
||||||
|
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The logical replication launcher can be stopped at any time.
|
* The logical replication launcher can be stopped at any time.
|
||||||
|
|
@ -3379,23 +3395,27 @@ ProcessInterrupts(void)
|
||||||
else if (AmWalReceiverProcess())
|
else if (AmWalReceiverProcess())
|
||||||
ereport(FATAL,
|
ereport(FATAL,
|
||||||
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
||||||
errmsg("terminating walreceiver process due to administrator command")));
|
errmsg("terminating walreceiver process due to administrator command"),
|
||||||
|
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
|
||||||
else if (AmBackgroundWorkerProcess())
|
else if (AmBackgroundWorkerProcess())
|
||||||
ereport(FATAL,
|
ereport(FATAL,
|
||||||
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
||||||
errmsg("terminating background worker \"%s\" due to administrator command",
|
errmsg("terminating background worker \"%s\" due to administrator command",
|
||||||
MyBgworkerEntry->bgw_type)));
|
MyBgworkerEntry->bgw_type),
|
||||||
|
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
|
||||||
else if (AmIoWorkerProcess())
|
else if (AmIoWorkerProcess())
|
||||||
{
|
{
|
||||||
ereport(DEBUG1,
|
ereport(DEBUG1,
|
||||||
(errmsg_internal("io worker shutting down due to administrator command")));
|
(errmsg_internal("io worker shutting down due to administrator command"),
|
||||||
|
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
|
||||||
|
|
||||||
proc_exit(0);
|
proc_exit(0);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
ereport(FATAL,
|
ereport(FATAL,
|
||||||
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
||||||
errmsg("terminating connection due to administrator command")));
|
errmsg("terminating connection due to administrator command"),
|
||||||
|
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (CheckClientConnectionPending)
|
if (CheckClientConnectionPending)
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,8 @@ volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false;
|
||||||
volatile uint32 InterruptHoldoffCount = 0;
|
volatile uint32 InterruptHoldoffCount = 0;
|
||||||
volatile uint32 QueryCancelHoldoffCount = 0;
|
volatile uint32 QueryCancelHoldoffCount = 0;
|
||||||
volatile uint32 CritSectionCount = 0;
|
volatile uint32 CritSectionCount = 0;
|
||||||
|
volatile int ProcDieSenderPid = 0;
|
||||||
|
volatile int ProcDieSenderUid = 0;
|
||||||
|
|
||||||
int MyProcPid;
|
int MyProcPid;
|
||||||
pg_time_t MyStartTime;
|
pg_time_t MyStartTime;
|
||||||
|
|
|
||||||
|
|
@ -142,12 +142,11 @@ my ($ret, $out, $err) = $node->psql('postgres',
|
||||||
is($ret, 2, 'server crash: psql exit code');
|
is($ret, 2, 'server crash: psql exit code');
|
||||||
like($out, qr/before/, 'server crash: output before crash');
|
like($out, qr/before/, 'server crash: output before crash');
|
||||||
unlike($out, qr/AFTER/, 'server crash: no output after crash');
|
unlike($out, qr/AFTER/, 'server crash: no output after crash');
|
||||||
is( $err,
|
like( $err, qr/psql:<stdin>:2: FATAL: terminating connection due to administrator command
|
||||||
'psql:<stdin>:2: FATAL: terminating connection due to administrator command
|
(?:DETAIL: Signal sent by PID \d+, UID \d+\.\n)?psql:<stdin>:2: server closed the connection unexpectedly
|
||||||
psql:<stdin>:2: server closed the connection unexpectedly
|
|
||||||
This probably means the server terminated abnormally
|
This probably means the server terminated abnormally
|
||||||
before or while processing the request.
|
before or while processing the request.
|
||||||
psql:<stdin>:2: error: connection to server was lost',
|
psql:<stdin>:2: error: connection to server was lost/,
|
||||||
'server crash: error message');
|
'server crash: error message');
|
||||||
|
|
||||||
# test \errverbose
|
# test \errverbose
|
||||||
|
|
|
||||||
|
|
@ -90,6 +90,8 @@
|
||||||
extern PGDLLIMPORT volatile sig_atomic_t InterruptPending;
|
extern PGDLLIMPORT volatile sig_atomic_t InterruptPending;
|
||||||
extern PGDLLIMPORT volatile sig_atomic_t QueryCancelPending;
|
extern PGDLLIMPORT volatile sig_atomic_t QueryCancelPending;
|
||||||
extern PGDLLIMPORT volatile sig_atomic_t ProcDiePending;
|
extern PGDLLIMPORT volatile sig_atomic_t ProcDiePending;
|
||||||
|
extern PGDLLIMPORT volatile int ProcDieSenderPid;
|
||||||
|
extern PGDLLIMPORT volatile int ProcDieSenderUid;
|
||||||
extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending;
|
extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending;
|
||||||
extern PGDLLIMPORT volatile sig_atomic_t TransactionTimeoutPending;
|
extern PGDLLIMPORT volatile sig_atomic_t TransactionTimeoutPending;
|
||||||
extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
|
extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
|
||||||
|
|
|
||||||
|
|
@ -354,6 +354,9 @@
|
||||||
/* Define to 1 if you have the `rl_variable_bind' function. */
|
/* Define to 1 if you have the `rl_variable_bind' function. */
|
||||||
#undef HAVE_RL_VARIABLE_BIND
|
#undef HAVE_RL_VARIABLE_BIND
|
||||||
|
|
||||||
|
/* Define to 1 if you have SA_SIGINFO available. */
|
||||||
|
#undef HAVE_SA_SIGINFO
|
||||||
|
|
||||||
/* Define to 1 if you have the <security/pam_appl.h> header file. */
|
/* Define to 1 if you have the <security/pam_appl.h> header file. */
|
||||||
#undef HAVE_SECURITY_PAM_APPL_H
|
#undef HAVE_SECURITY_PAM_APPL_H
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -82,10 +82,19 @@ static volatile pqsigfunc pqsignal_handlers[PG_NSIG];
|
||||||
*
|
*
|
||||||
* This wrapper also handles restoring the value of errno.
|
* This wrapper also handles restoring the value of errno.
|
||||||
*/
|
*/
|
||||||
|
#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
|
||||||
|
static void
|
||||||
|
wrapper_handler(int signo, siginfo_t * info, void *context)
|
||||||
|
#else
|
||||||
static void
|
static void
|
||||||
wrapper_handler(SIGNAL_ARGS)
|
wrapper_handler(SIGNAL_ARGS)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
int save_errno = errno;
|
int save_errno = errno;
|
||||||
|
#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
|
||||||
|
/* SA_SIGINFO signature uses signo, not SIGNAL_ARGS macro */
|
||||||
|
int postgres_signal_arg = signo;
|
||||||
|
#endif
|
||||||
|
|
||||||
Assert(postgres_signal_arg > 0);
|
Assert(postgres_signal_arg > 0);
|
||||||
Assert(postgres_signal_arg < PG_NSIG);
|
Assert(postgres_signal_arg < PG_NSIG);
|
||||||
|
|
@ -105,6 +114,14 @@ wrapper_handler(SIGNAL_ARGS)
|
||||||
raise(postgres_signal_arg);
|
raise(postgres_signal_arg);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_SA_SIGINFO
|
||||||
|
if (signo == SIGTERM && info)
|
||||||
|
{
|
||||||
|
ProcDieSenderPid = info->si_pid;
|
||||||
|
ProcDieSenderUid = info->si_uid;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
(*pqsignal_handlers[postgres_signal_arg]) (postgres_signal_arg);
|
(*pqsignal_handlers[postgres_signal_arg]) (postgres_signal_arg);
|
||||||
|
|
@ -125,6 +142,7 @@ pqsignal(int signo, pqsigfunc func)
|
||||||
#if !(defined(WIN32) && defined(FRONTEND))
|
#if !(defined(WIN32) && defined(FRONTEND))
|
||||||
struct sigaction act;
|
struct sigaction act;
|
||||||
#endif
|
#endif
|
||||||
|
bool use_wrapper = false;
|
||||||
|
|
||||||
Assert(signo > 0);
|
Assert(signo > 0);
|
||||||
Assert(signo < PG_NSIG);
|
Assert(signo < PG_NSIG);
|
||||||
|
|
@ -132,13 +150,24 @@ pqsignal(int signo, pqsigfunc func)
|
||||||
if (func != SIG_IGN && func != SIG_DFL)
|
if (func != SIG_IGN && func != SIG_DFL)
|
||||||
{
|
{
|
||||||
pqsignal_handlers[signo] = func; /* assumed atomic */
|
pqsignal_handlers[signo] = func; /* assumed atomic */
|
||||||
func = wrapper_handler;
|
use_wrapper = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !(defined(WIN32) && defined(FRONTEND))
|
#if !(defined(WIN32) && defined(FRONTEND))
|
||||||
act.sa_handler = func;
|
|
||||||
sigemptyset(&act.sa_mask);
|
sigemptyset(&act.sa_mask);
|
||||||
act.sa_flags = SA_RESTART;
|
act.sa_flags = SA_RESTART;
|
||||||
|
#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
|
||||||
|
if (use_wrapper)
|
||||||
|
{
|
||||||
|
act.sa_sigaction = wrapper_handler;
|
||||||
|
act.sa_flags |= SA_SIGINFO;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
act.sa_handler = func;
|
||||||
|
#else
|
||||||
|
act.sa_handler = use_wrapper ? wrapper_handler : func;
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef SA_NOCLDSTOP
|
#ifdef SA_NOCLDSTOP
|
||||||
if (signo == SIGCHLD)
|
if (signo == SIGCHLD)
|
||||||
act.sa_flags |= SA_NOCLDSTOP;
|
act.sa_flags |= SA_NOCLDSTOP;
|
||||||
|
|
@ -147,7 +176,7 @@ pqsignal(int signo, pqsigfunc func)
|
||||||
Assert(false); /* probably indicates coding error */
|
Assert(false); /* probably indicates coding error */
|
||||||
#else
|
#else
|
||||||
/* Forward to Windows native signal system. */
|
/* Forward to Windows native signal system. */
|
||||||
if (signal(signo, func) == SIG_ERR)
|
if (signal(signo, use_wrapper ? wrapper_handler : func) == SIG_ERR)
|
||||||
Assert(false); /* probably indicates coding error */
|
Assert(false); /* probably indicates coding error */
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue