From d5cc6df608018f4dbf32542fe6bec405d2d770c0 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 27 May 2026 14:49:04 +0900 Subject: [PATCH] Fix race conditions in ProcKill()'s lock-group freelist handling This commit fixes two bugs in ProcKill()'s lock-group teardown freelist publication: * a double push of the leader's PGPROC that corrupts the freelist. * a leak of the last follower's PGPROC slot. ProcKill()'s lock-group teardown had two PGPROC freelist updates scattered through the function, done under two separate freeProcsLock acquisitions: * A follower's push of the leader's PGPROC, done when a follower is the last group member exiting. * Every backend's self-push at the bottom of the function. The two freelist updates were coordinated only by inspecting proc->lockGroupLeader, which a follower could clear as a side effect of pushing the leader. This coordination was broken. For example, with two concurrent backends: * The follower clears leader->lockGroupLeader and pushes the leader's PGPROC under leader_lwlock. * The follower does not clear its own proc->lockGroupLeader, being skipped. * When the leader reaches the bottom of ProcKill(), it sees a NULL proc->lockGroupLeader (the follower cleared it) and pushes itself, causing a second dlist_push_tail() of the same node onto the same freelist. * The follower at the bottom sees its own proc->lockGroupLeader being not NULL (never cleared) and skips its own push, causing its own slot to leak. This commit refactors the freelist manipulation to be done in two distinct phases, each step using its own lock acquisition to ensure that each freelist operation happens in an isolated manner for each backend (follower or leader): - First, under a single leader_lwlock acquisition, check the state of the lock-group. Depending on if we are dealing with a follower and/or a leader, and if the leader has exited before a follower, then set some state booleans that define which actions should be taken with the freelist. - Second, under a single freeProcsLock acquisition, perform the cleanup actions, self-push of a backend and/or push of the leader back to the freelist. This is an old issue, dating back to 9.6 where parallel workers and lock grouping has been added. Author: Vlad Lesin Reviewed-by: Andrey Borodin Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/d2983796-2603-41b7-a66e-fc8489ddb954@gmail.com Backpatch-through: 14 --- src/backend/storage/lmgr/proc.c | 81 ++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 93c4324534b..b68b4010d63 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -803,7 +803,10 @@ static void ProcKill(int code, Datum arg) { PGPROC *proc; + PGPROC *leader; PGPROC *volatile *procgloballist; + bool push_leader; + bool push_self; Assert(MyProc != NULL); @@ -834,36 +837,59 @@ ProcKill(int code, Datum arg) /* Cancel any pending condition variable sleep, too */ ConditionVariableCancelSleep(); + proc = MyProc; + procgloballist = proc->procgloballist; + /* - * Detach from any lock group of which we are a member. If the leader - * exist before all other group members, its PGPROC will remain allocated - * until the last group process exits; that process must return the - * leader's PGPROC to the appropriate list. + * Detach from any lock group of which we are a member, deciding under + * leader_lwlock whether we (via push_self) and/or the leader (via + * push_leader) need to be pushed onto a freelist. The actual pushes + * happen after evaluating if any of these are required, under a single + * ProcGlobal->freeProcsLock. + * + * The decision whether any of the freelists needs to be updated is taken + * under a single leader_lwlock. */ - if (MyProc->lockGroupLeader != NULL) + push_leader = false; + push_self = true; + leader = NULL; + + if (proc->lockGroupLeader != NULL) { - PGPROC *leader = MyProc->lockGroupLeader; - LWLock *leader_lwlock = LockHashPartitionLockByProc(leader); + LWLock *leader_lwlock; + + leader = proc->lockGroupLeader; + leader_lwlock = LockHashPartitionLockByProc(leader); LWLockAcquire(leader_lwlock, LW_EXCLUSIVE); Assert(!dlist_is_empty(&leader->lockGroupMembers)); - dlist_delete(&MyProc->lockGroupLink); + dlist_delete(&proc->lockGroupLink); if (dlist_is_empty(&leader->lockGroupMembers)) { leader->lockGroupLeader = NULL; - if (leader != MyProc) + if (leader != proc) { - procgloballist = leader->procgloballist; - - /* Leader exited first; return its PGPROC. */ - SpinLockAcquire(ProcStructLock); - leader->links.next = (SHM_QUEUE *) *procgloballist; - *procgloballist = leader; - SpinLockRelease(ProcStructLock); + /* + * We are the last follower and the leader exited earlier; its + * PGPROC is still allocated and must be pushed here. + */ + push_leader = true; + proc->lockGroupLeader = NULL; } } - else if (leader != MyProc) - MyProc->lockGroupLeader = NULL; + else if (leader != proc) + { + /* Non-last follower; leader still present in the group. */ + proc->lockGroupLeader = NULL; + } + else + { + /* + * We are the leader and followers remain. Skip our own push; the + * last follower to exit will push us back to the freelist. + */ + push_self = false; + } LWLockRelease(leader_lwlock); } @@ -879,20 +905,21 @@ ProcKill(int code, Datum arg) SwitchBackToLocalLatch(); pgstat_reset_wait_event_storage(); - proc = MyProc; MyProc = NULL; DisownLatch(&proc->procLatch); - procgloballist = proc->procgloballist; SpinLockAcquire(ProcStructLock); - - /* - * If we're still a member of a locking group, that means we're a leader - * which has somehow exited before its children. The last remaining child - * will release our PGPROC. Otherwise, release it now. - */ - if (proc->lockGroupLeader == NULL) + if (push_leader) { + /* Return leader PGPROC (and semaphore) to appropriate freelist */ + PGPROC *volatile *leadergloballist = leader->procgloballist; + + leader->links.next = (SHM_QUEUE *) *leadergloballist; + *leadergloballist = leader; + } + if (push_self) + { + Assert(proc->lockGroupLeader == NULL); /* Since lockGroupLeader is NULL, lockGroupMembers should be empty. */ Assert(dlist_is_empty(&proc->lockGroupMembers));