killpg(2): close a race with fork(2), part1

If the process group member performs fork(), the child could escape signalling from killpg(). Prevent it by introducing an sx process group lock pg_killsx which is taken interruptibly shared around fork. If there is a pending signal, do the trip through userspace with ERESTART to handle signal ASTs. The lock is taken exclusively during killpg(). The lock is also locked exclusive when the process changes group membership, to avoid escaping a signal by this means, by ensuring that the process group is stable during fork. Note that the new lock is before proctree lock, so in some situations we could only do trylocking to obtain it. This relatively simple approach cannot work for REAP_KILL, because process potentially belongs to more than one reaper tree by having sub-reapers. Reported by: dchagin Tested by: dchagin, pho Reviewed by: markj Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Differential revision: https://reviews.freebsd.org/D40493
2026-05-28 04:12:45 -04:00 · 2023-06-12 10:33:43 +03:00 · 2023-06-12 10:33:43 +03:00 · 3360b48525
commit 3360b48525
parent 4b59d1724b
6 changed files with 72 additions and 4 deletions
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@ -495,6 +495,7 @@ proc0_init(void *dummy __unused)
 	LIST_INSERT_HEAD(&allproc, p, p_list);
 	LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
 	mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
+	sx_init(&pgrp0.pg_killsx, "killpg racer");
 	p->p_pgrp = &pgrp0;
 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
 	LIST_INIT(&pgrp0.pg_members);
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@ -856,11 +856,13 @@ fork1(struct thread *td, struct fork_req *fr)
 	struct vmspace *vm2;
 	struct ucred *cred;
 	struct file *fp_procdesc;
+	struct pgrp *pg;
 	vm_ooffset_t mem_charged;
 	int error, nprocs_new;
 	static int curfail;
 	static struct timeval lastfail;
 	int flags, pages;
+	bool killsx_locked;

 	flags = fr->fr_flags;
 	pages = fr->fr_pages;
@ -917,6 +919,7 @@ fork1(struct thread *td, struct fork_req *fr)
 	fp_procdesc = NULL;
 	newproc = NULL;
 	vm2 = NULL;
+	killsx_locked = false;

 	/*
 	 * Increment the nprocs resource before allocations occur.
@ -946,6 +949,28 @@ fork1(struct thread *td, struct fork_req *fr)
 		}
 	}

+	/*
+	 * Atomically check for signals and block threads from sending
+	 * a signal to our process group until the child is visible.
+	 */
+	pg = p1->p_pgrp;
+	if (sx_slock_sig(&pg->pg_killsx) != 0) {
+		error = ERESTART;
+		goto fail2;
+	} else if (__predict_false(p1->p_pgrp != pg || sig_intr() != 0)) {
+		/*
+		 * Either the process was moved to other process
+		 * group, or there is pending signal.  sx_slock_sig()
+		 * does not check for signals if not sleeping for the
+		 * lock.
+		 */
+		sx_sunlock(&pg->pg_killsx);
+		error = ERESTART;
+		goto fail2;
+	} else {
+		killsx_locked = true;
+	}
+
 	/*
 	 * If required, create a process descriptor in the parent first; we
 	 * will abandon it if something goes wrong. We don't finit() until
@ -1037,6 +1062,7 @@ fork1(struct thread *td, struct fork_req *fr)
 	}

 	do_fork(td, fr, newproc, td2, vm2, fp_procdesc);
+	sx_sunlock(&pg->pg_killsx);
 	return (0);
 fail0:
 	error = EAGAIN;
@ -1055,6 +1081,8 @@ fail2:
 		fdrop(fp_procdesc, td);
 	}
 	atomic_add_int(&nprocs, -1);
+	if (killsx_locked)
+		sx_sunlock(&pg->pg_killsx);
 	pause("fork", hz / 2);
 	return (error);
 }
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@ -310,6 +310,7 @@ pgrp_init(void *mem, int size, int flags)

 	pg = mem;
 	mtx_init(&pg->pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
+	sx_init(&pg->pg_killsx, "killpg racer");
 	return (0);
 }

@ -573,6 +574,7 @@ errout:
 int
 enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess)
 {
+	struct pgrp *old_pgrp;

 	sx_assert(&proctree_lock, SX_XLOCKED);

@ -584,6 +586,11 @@ enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess)
 	KASSERT(!SESS_LEADER(p),
 	    ("enterpgrp: session leader attempted setpgrp"));

+	old_pgrp = p->p_pgrp;
+	if (!sx_try_xlock(&old_pgrp->pg_killsx))
+		return (ERESTART);
+	MPASS(old_pgrp == p->p_pgrp);
+
 	if (sess != NULL) {
 		/*
 		 * new session
@ -625,6 +632,7 @@ enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess)

 	doenterpgrp(p, pgrp);

+	sx_xunlock(&old_pgrp->pg_killsx);
 	return (0);
 }

@ -634,6 +642,7 @@ enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess)
 int
 enterthispgrp(struct proc *p, struct pgrp *pgrp)
 {
+	struct pgrp *old_pgrp;

 	sx_assert(&proctree_lock, SX_XLOCKED);
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
@ -646,8 +655,19 @@ enterthispgrp(struct proc *p, struct pgrp *pgrp)
 	KASSERT(pgrp != p->p_pgrp,
 	    ("%s: p %p belongs to pgrp %p", __func__, p, pgrp));

+	old_pgrp = p->p_pgrp;
+	if (!sx_try_xlock(&old_pgrp->pg_killsx))
+		return (ERESTART);
+	MPASS(old_pgrp == p->p_pgrp);
+	if (!sx_try_xlock(&pgrp->pg_killsx)) {
+		sx_xunlock(&old_pgrp->pg_killsx);
+		return (ERESTART);
+	}
+
 	doenterpgrp(p, pgrp);

+	sx_xunlock(&pgrp->pg_killsx);
+	sx_xunlock(&old_pgrp->pg_killsx);
 	return (0);
 }

--- a/sys/kern/kern_prot.c
+++ b/sys/kern/kern_prot.c
@ -332,12 +332,13 @@ sys_setsid(struct thread *td, struct setsid_args *uap)
 	struct pgrp *newpgrp;
 	struct session *newsess;

-	error = 0;
 	pgrp = NULL;

 	newpgrp = uma_zalloc(pgrp_zone, M_WAITOK);
 	newsess = malloc(sizeof(struct session), M_SESSION, M_WAITOK | M_ZERO);

+again:
+	error = 0;
 	sx_xlock(&proctree_lock);

 	if (p->p_pgid == p->p_pid || (pgrp = pgfind(p->p_pid)) != NULL) {
@ -345,7 +346,12 @@ sys_setsid(struct thread *td, struct setsid_args *uap)
 			PGRP_UNLOCK(pgrp);
 		error = EPERM;
 	} else {
-		(void)enterpgrp(p, p->p_pid, newpgrp, newsess);
+		error = enterpgrp(p, p->p_pid, newpgrp, newsess);
+		if (error == ERESTART) {
+			sx_xunlock(&proctree_lock);
+			goto again;
+		}
+		MPASS(error == 0);
 		td->td_retval[0] = p->p_pid;
 		newpgrp = NULL;
 		newsess = NULL;
@ -391,10 +397,11 @@ sys_setpgid(struct thread *td, struct setpgid_args *uap)
 	if (uap->pgid < 0)
 		return (EINVAL);

-	error = 0;
-
 	newpgrp = uma_zalloc(pgrp_zone, M_WAITOK);

+again:
+	error = 0;
+
 	sx_xlock(&proctree_lock);
 	if (uap->pid != 0 && uap->pid != curp->p_pid) {
 		if ((targp = pfind(uap->pid)) == NULL) {
@ -456,6 +463,8 @@ done:
 	sx_xunlock(&proctree_lock);
 	KASSERT((error == 0) || (newpgrp != NULL),
 	    ("setpgid failed and newpgrp is NULL"));
+	if (error == ERESTART)
+		goto again;
 	uma_zfree(pgrp_zone, newpgrp);
 	return (error);
 }
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@ -1847,6 +1847,7 @@ killpg1(struct thread *td, int sig, int pgid, int all, ksiginfo_t *ksi)
 		prison_proc_iterate(td->td_ucred->cr_prison,
 		    kill_processes_prison_cb, &arg);
 	} else {
+again:
 		sx_slock(&proctree_lock);
 		if (pgid == 0) {
 			/*
@ -1862,10 +1863,17 @@ killpg1(struct thread *td, int sig, int pgid, int all, ksiginfo_t *ksi)
 			}
 		}
 		sx_sunlock(&proctree_lock);
+		if (!sx_try_xlock(&pgrp->pg_killsx)) {
+			PGRP_UNLOCK(pgrp);
+			sx_xlock(&pgrp->pg_killsx);
+			sx_xunlock(&pgrp->pg_killsx);
+			goto again;
+		}
 		LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
 			killpg1_sendsig(p, false, &arg);
 		}
 		PGRP_UNLOCK(pgrp);
+		sx_xunlock(&pgrp->pg_killsx);
 	}
 	MPASS(arg.ret != 0 || arg.found || !arg.sent);
 	if (arg.ret == 0 && !arg.sent)
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@ -113,6 +113,8 @@ struct pgrp {
 	pid_t		pg_id;		/* (c) Process group id. */
 	struct mtx	pg_mtx;		/* Mutex to protect members */
 	int		pg_flags;	/* (m) PGRP_ flags */
+	struct sx	pg_killsx;	/* Mutual exclusion between group member
+					 * fork() and killpg() */
 };

 #define	PGRP_ORPHANED	0x00000001	/* Group is orphaned */