From 8e2e767b1feda90240ccc2eed1fe8f687f47315f Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Fri, 26 Oct 2001 08:12:54 +0000 Subject: [PATCH] Add a per-thread ucred reference for syscalls and synchronous traps from userland. The per thread ucred reference is immutable and thus needs no locks to be read. However, until all the proc locking associated with writes to p_ucred are completed, it is still not safe to use the per-thread reference. Tested on: x86 (SMP), alpha, sparc64 --- sys/alpha/alpha/trap.c | 21 +++++++++++-- sys/amd64/amd64/trap.c | 63 ++++++++++++++++++++++---------------- sys/i386/i386/trap.c | 63 ++++++++++++++++++++++---------------- sys/ia64/ia64/trap.c | 18 +++++++++++ sys/kern/init_main.c | 1 + sys/kern/kern_exit.c | 10 ++++++ sys/kern/kern_fork.c | 5 +++ sys/kern/subr_trap.c | 14 +++++++-- sys/sparc64/sparc64/trap.c | 29 +++++++++++++++--- sys/sys/proc.h | 1 + 10 files changed, 164 insertions(+), 61 deletions(-) diff --git a/sys/alpha/alpha/trap.c b/sys/alpha/alpha/trap.c index d82f4555189..b86c6ed1572 100644 --- a/sys/alpha/alpha/trap.c +++ b/sys/alpha/alpha/trap.c @@ -293,8 +293,14 @@ trap(a0, a1, a2, entry, framep) if (user) { sticks = td->td_kse->ke_sticks; td->td_frame = framep; + KASSERT(td->td_ucred == NULL, ("already have a ucred")); + PROC_LOCK(p); + td->td_ucred = crhold(p->p_ucred); + PROC_UNLOCK(p); } else { sticks = 0; /* XXX bogus -Wuninitialized warning */ + KASSERT(cold || td->td_ucred != NULL, + ("kernel trap doesn't have ucred")); } #ifdef DIAGNOSTIC @@ -615,8 +621,11 @@ out: if (user) { framep->tf_regs[FRAME_SP] = alpha_pal_rdusp(); userret(td, framep, sticks); - if (mtx_owned(&Giant)) - mtx_unlock(&Giant); + mtx_assert(&Giant, MA_NOTOWNED); + mtx_lock(&Giant); + crfree(td->td_ucred); + mtx_unlock(&Giant); + td->td_ucred = NULL; } return; @@ -685,6 +694,10 @@ syscall(code, framep) td->td_frame = framep; opc = framep->tf_regs[FRAME_PC] - 4; sticks = td->td_kse->ke_sticks; + KASSERT(td->td_ucred == NULL, ("already have a ucred")); + PROC_LOCK(p); + td->td_ucred = crhold(p->p_ucred); + PROC_UNLOCK(p); #ifdef DIAGNOSTIC alpha_fpstate_check(p); @@ -805,6 +818,10 @@ syscall(code, framep) */ STOPEVENT(p, S_SCX, code); + mtx_lock(&Giant); + crfree(td->td_ucred); + mtx_unlock(&Giant); + td->td_ucred = NULL; #ifdef WITNESS if (witness_list(td)) { panic("system call %s returning with mutex(s) held\n", diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index fd4c8fe9080..714f3f2f4f4 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -216,6 +216,17 @@ trap(frame) eva = 0; type = frame.tf_trapno; code = frame.tf_err; + if (type == T_PAGEFLT) { + /* + * For some Cyrix CPUs, %cr2 is clobbered by + * interrupts. This problem is worked around by using + * an interrupt gate for the pagefault handler. We + * are finally ready to read %cr2 and then must + * reenable interrupts. + */ + eva = rcr2(); + enable_intr(); + } if ((ISPL(frame.tf_cs) == SEL_UPL) || ((frame.tf_eflags & PSL_VM) && !in_vm86call)) { @@ -223,6 +234,10 @@ trap(frame) sticks = td->td_kse->ke_sticks; td->td_frame = &frame; + KASSERT(td->td_ucred == NULL, ("already have a ucred")); + PROC_LOCK(p); + td->td_ucred = crhold(p->p_ucred); + PROC_UNLOCK(p); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ @@ -240,7 +255,7 @@ trap(frame) #ifdef DEV_NPX ucode = npxtrap(); if (ucode == -1) - return; + goto userout; #else ucode = code; #endif @@ -273,15 +288,6 @@ trap(frame) break; case T_PAGEFLT: /* page fault */ - /* - * For some Cyrix CPUs, %cr2 is clobbered by - * interrupts. This problem is worked around by using - * an interrupt gate for the pagefault handler. We - * are finally ready to read %cr2 and then must - * reenable interrupts. - */ - eva = rcr2(); - enable_intr(); i = trap_pfault(&frame, TRUE, eva); #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (i == -2) { @@ -299,7 +305,7 @@ trap(frame) } #endif if (i == -1) - goto out; + goto userout; if (i == 0) goto user; @@ -324,7 +330,7 @@ trap(frame) lastalert = time_second; } mtx_unlock(&Giant); - goto out; + goto userout; #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ /* XXX Giant */ @@ -339,7 +345,7 @@ trap(frame) kdb_trap (type, 0, &frame); } #endif /* DDB */ - goto out; + goto userout; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; @@ -360,7 +366,7 @@ trap(frame) #ifdef DEV_NPX /* transparent fault (due to context switch "late") */ if (npxdna()) - goto out; + goto userout; #endif if (!pmath_emulate) { i = SIGFPE; @@ -372,7 +378,7 @@ trap(frame) mtx_unlock(&Giant); if (i == 0) { if (!(frame.tf_eflags & PSL_T)) - goto out; + goto userout; frame.tf_eflags &= ~PSL_T; i = SIGTRAP; } @@ -392,17 +398,10 @@ trap(frame) } else { /* kernel trap */ + KASSERT(cold || td->td_ucred != NULL, + ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ - /* - * For some Cyrix CPUs, %cr2 is clobbered by - * interrupts. This problem is worked around by using - * an interrupt gate for the pagefault handler. We - * are finally ready to read %cr2 and then must - * reenable interrupts. - */ - eva = rcr2(); - enable_intr(); (void) trap_pfault(&frame, FALSE, eva); goto out; @@ -622,8 +621,12 @@ trap(frame) user: userret(td, &frame, sticks); - if (mtx_owned(&Giant)) /* XXX why would Giant be owned here? */ - mtx_unlock(&Giant); + mtx_assert(&Giant, MA_NOTOWNED); +userout: + mtx_lock(&Giant); + crfree(td->td_ucred); + mtx_unlock(&Giant); + td->td_ucred = NULL; out: return; } @@ -1046,6 +1049,10 @@ syscall(frame) sticks = td->td_kse->ke_sticks; td->td_frame = &frame; + KASSERT(td->td_ucred == NULL, ("already have a ucred")); + PROC_LOCK(p); + td->td_ucred = crhold(p->p_ucred); + PROC_UNLOCK(p); params = (caddr_t)frame.tf_esp + sizeof(int); code = frame.tf_eax; @@ -1187,6 +1194,10 @@ bad: */ STOPEVENT(p, S_SCX, code); + mtx_lock(&Giant); + crfree(td->td_ucred); + mtx_unlock(&Giant); + td->td_ucred = NULL; #ifdef WITNESS if (witness_list(td)) { panic("system call %s returning with mutex(s) held\n", diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index fd4c8fe9080..714f3f2f4f4 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -216,6 +216,17 @@ trap(frame) eva = 0; type = frame.tf_trapno; code = frame.tf_err; + if (type == T_PAGEFLT) { + /* + * For some Cyrix CPUs, %cr2 is clobbered by + * interrupts. This problem is worked around by using + * an interrupt gate for the pagefault handler. We + * are finally ready to read %cr2 and then must + * reenable interrupts. + */ + eva = rcr2(); + enable_intr(); + } if ((ISPL(frame.tf_cs) == SEL_UPL) || ((frame.tf_eflags & PSL_VM) && !in_vm86call)) { @@ -223,6 +234,10 @@ trap(frame) sticks = td->td_kse->ke_sticks; td->td_frame = &frame; + KASSERT(td->td_ucred == NULL, ("already have a ucred")); + PROC_LOCK(p); + td->td_ucred = crhold(p->p_ucred); + PROC_UNLOCK(p); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ @@ -240,7 +255,7 @@ trap(frame) #ifdef DEV_NPX ucode = npxtrap(); if (ucode == -1) - return; + goto userout; #else ucode = code; #endif @@ -273,15 +288,6 @@ trap(frame) break; case T_PAGEFLT: /* page fault */ - /* - * For some Cyrix CPUs, %cr2 is clobbered by - * interrupts. This problem is worked around by using - * an interrupt gate for the pagefault handler. We - * are finally ready to read %cr2 and then must - * reenable interrupts. - */ - eva = rcr2(); - enable_intr(); i = trap_pfault(&frame, TRUE, eva); #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (i == -2) { @@ -299,7 +305,7 @@ trap(frame) } #endif if (i == -1) - goto out; + goto userout; if (i == 0) goto user; @@ -324,7 +330,7 @@ trap(frame) lastalert = time_second; } mtx_unlock(&Giant); - goto out; + goto userout; #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ /* XXX Giant */ @@ -339,7 +345,7 @@ trap(frame) kdb_trap (type, 0, &frame); } #endif /* DDB */ - goto out; + goto userout; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; @@ -360,7 +366,7 @@ trap(frame) #ifdef DEV_NPX /* transparent fault (due to context switch "late") */ if (npxdna()) - goto out; + goto userout; #endif if (!pmath_emulate) { i = SIGFPE; @@ -372,7 +378,7 @@ trap(frame) mtx_unlock(&Giant); if (i == 0) { if (!(frame.tf_eflags & PSL_T)) - goto out; + goto userout; frame.tf_eflags &= ~PSL_T; i = SIGTRAP; } @@ -392,17 +398,10 @@ trap(frame) } else { /* kernel trap */ + KASSERT(cold || td->td_ucred != NULL, + ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ - /* - * For some Cyrix CPUs, %cr2 is clobbered by - * interrupts. This problem is worked around by using - * an interrupt gate for the pagefault handler. We - * are finally ready to read %cr2 and then must - * reenable interrupts. - */ - eva = rcr2(); - enable_intr(); (void) trap_pfault(&frame, FALSE, eva); goto out; @@ -622,8 +621,12 @@ trap(frame) user: userret(td, &frame, sticks); - if (mtx_owned(&Giant)) /* XXX why would Giant be owned here? */ - mtx_unlock(&Giant); + mtx_assert(&Giant, MA_NOTOWNED); +userout: + mtx_lock(&Giant); + crfree(td->td_ucred); + mtx_unlock(&Giant); + td->td_ucred = NULL; out: return; } @@ -1046,6 +1049,10 @@ syscall(frame) sticks = td->td_kse->ke_sticks; td->td_frame = &frame; + KASSERT(td->td_ucred == NULL, ("already have a ucred")); + PROC_LOCK(p); + td->td_ucred = crhold(p->p_ucred); + PROC_UNLOCK(p); params = (caddr_t)frame.tf_esp + sizeof(int); code = frame.tf_eax; @@ -1187,6 +1194,10 @@ bad: */ STOPEVENT(p, S_SCX, code); + mtx_lock(&Giant); + crfree(td->td_ucred); + mtx_unlock(&Giant); + td->td_ucred = NULL; #ifdef WITNESS if (witness_list(td)) { panic("system call %s returning with mutex(s) held\n", diff --git a/sys/ia64/ia64/trap.c b/sys/ia64/ia64/trap.c index 18093066fef..3b024ed1819 100644 --- a/sys/ia64/ia64/trap.c +++ b/sys/ia64/ia64/trap.c @@ -295,8 +295,14 @@ trap(int vector, int imm, struct trapframe *framep) if (user) { sticks = td->td_kse->ke_sticks; td->td_frame = framep; + KASSERT(td->td_ucred == NULL, ("already have a ucred")); + PROC_LOCK(p); + td->td_ucred = crhold(p->p_ucred); + PROC_UNLOCK(p); } else { sticks = 0; /* XXX bogus -Wuninitialized warning */ + KASSERT(cold || td->td_ucred != NULL, + ("kernel trap doesn't have ucred")); } switch (vector) { @@ -552,6 +558,10 @@ out: if (user) { userret(td, framep, sticks); mtx_assert(&Giant, MA_NOTOWNED); + mtx_lock(&Giant); + crfree(td->td_ucred); + mtx_unlock(&Giant); + td->td_ucred = NULL; } return; @@ -596,6 +606,10 @@ syscall(int code, u_int64_t *args, struct trapframe *framep) td->td_frame = framep; sticks = td->td_kse->ke_sticks; + KASSERT(td->td_ucred == NULL, ("already have a ucred")); + PROC_LOCK(p); + td->td_ucred = crhold(p->p_ucred); + PROC_UNLOCK(p); /* * Skip past the break instruction. Remember old address in case @@ -700,6 +714,10 @@ syscall(int code, u_int64_t *args, struct trapframe *framep) */ STOPEVENT(p, S_SCX, code); + mtx_lock(&Giant); + crfree(td->td_ucred); + mtx_unlock(&Giant); + td->td_ucred = NULL; #ifdef WITNESS if (witness_list(td)) { panic("system call %s returning with mutex(s) held\n", diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 5491cef509c..438f2f912f8 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -347,6 +347,7 @@ proc0_init(void *dummy __unused) p->p_ucred->cr_uidinfo = uifind(0); p->p_ucred->cr_ruidinfo = uifind(0); p->p_ucred->cr_prison = NULL; /* Don't jail it. */ + td->td_ucred = crhold(p->p_ucred); /* Create procsig. */ p->p_procsig = &procsig0; diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 07935035cac..20a862f9b16 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -387,6 +387,16 @@ exit1(td, rv) p->p_limit = NULL; } + /* + * Release this thread's reference to the ucred. The actual proc + * reference will stay around until the proc is harvested by + * wait(). At this point the ucred is immutable (no other threads + * from this proc are around that can change it) so we leave the + * per-thread ucred pointer intact in case it is needed although + * in theory nothing should be using it at this point. + */ + crfree(td->td_ucred); + /* * Finally, call machine-dependent code to release the remaining * resources including address space, the kernel stack and pcb. diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 2d1ce60c31d..28f0970fe2d 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -472,6 +472,7 @@ again: */ PROC_LOCK(p1); p2->p_ucred = crhold(p1->p_ucred); + p2->p_thread.td_ucred = crhold(p2->p_ucred); /* XXXKSE */ if (p2->p_args) p2->p_args->ar_ref++; @@ -797,6 +798,10 @@ fork_exit(callout, arg, frame) kthread_exit(0); } PROC_UNLOCK(p); + mtx_lock(&Giant); + crfree(td->td_ucred); + mtx_unlock(&Giant); + td->td_ucred = NULL; mtx_assert(&Giant, MA_NOTOWNED); } diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index 908e8f1421e..a8995766324 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -135,6 +135,7 @@ ast(framep) #endif KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode")); + KASSERT(td->td_ucred == NULL, ("leaked ucred")); #ifdef WITNESS if (witness_list(td)) panic("Returning to user mode with mutex(s) held"); @@ -161,10 +162,13 @@ ast(framep) if (flags & KEF_OWEUPC) { prticks = p->p_stats->p_prof.pr_ticks; p->p_stats->p_prof.pr_ticks = 0; - mtx_unlock_spin(&sched_lock); + } + mtx_unlock_spin(&sched_lock); + PROC_LOCK(p); + td->td_ucred = crhold(p->p_ucred); + PROC_UNLOCK(p); + if (flags & KEF_OWEUPC) addupc_task(ke, p->p_stats->p_prof.pr_addr, prticks); - } else - mtx_unlock_spin(&sched_lock); if (sflag & PS_ALRMPEND) { PROC_LOCK(p); psignal(p, SIGVTALRM); @@ -187,6 +191,10 @@ ast(framep) } userret(td, framep, sticks); + mtx_lock(&Giant); + crfree(td->td_ucred); + mtx_unlock(&Giant); + td->td_ucred = NULL; s = critical_enter(); } mtx_assert(&Giant, MA_NOTOWNED); diff --git a/sys/sparc64/sparc64/trap.c b/sys/sparc64/sparc64/trap.c index 8498b878721..57ee4439b35 100644 --- a/sys/sparc64/sparc64/trap.c +++ b/sys/sparc64/sparc64/trap.c @@ -150,7 +150,6 @@ trap(struct trapframe *tf) error = 0; type = tf->tf_type; ucode = type; /* XXX */ - sticks = 0; CTR5(KTR_TRAP, "trap: %s type=%s (%s) ws=%#lx ow=%#lx", p->p_comm, trap_msg[type & ~T_KERNEL], @@ -160,6 +159,14 @@ trap(struct trapframe *tf) if ((type & T_KERNEL) == 0) { sticks = td->td_kse->ke_sticks; td->td_frame = tf; + KASSERT(td->td_ucred == NULL, ("already have a ucred")); + PROC_LOCK(p); + td->td_ucred = crhold(p->p_ucred); + PROC_UNLOCK(p); + } else { + sticks = 0; + KASSERT(cold || td->td_ucred != NULL, + ("kernel trap doesn't have ucred")); } switch (type) { @@ -206,14 +213,14 @@ trap(struct trapframe *tf) sigexit(td, SIGILL); /* Not reached. */ } - goto out; + goto userout; case T_FILL_RET: if (rwindow_load(td, tf, 1)) { PROC_LOCK(p); sigexit(td, SIGILL); /* Not reached. */ } - goto out; + goto userout; case T_INSN_ILLEGAL: sig = SIGILL; goto trapsig; @@ -230,7 +237,7 @@ trap(struct trapframe *tf) sigexit(td, SIGILL); /* Not reached. */ } - goto out; + goto userout; case T_TAG_OVFLW: sig = SIGEMT; goto trapsig; @@ -322,6 +329,12 @@ trapsig: trapsignal(p, sig, ucode); user: userret(td, tf, sticks); +userout: + mtx_assert(&Giant, MA_NOTOWNED); + mtx_lock(&Giant); + crfree(td->td_ucred); + mtx_unlock(&Giant); + td->td_ucred = NULL; out: CTR1(KTR_TRAP, "trap: td=%p return", td); return; @@ -496,6 +509,10 @@ syscall(struct trapframe *tf) sticks = td->td_kse->ke_sticks; td->td_frame = tf; + KASSERT(td->td_ucred == NULL, ("already have a ucred")); + PROC_LOCK(p); + td->td_ucred = crhold(p->p_ucred); + PROC_UNLOCK(p); code = tf->tf_global[1]; /* @@ -631,6 +648,10 @@ bad: */ STOPEVENT(p, S_SCX, code); + mtx_lock(&Giant); + crfree(td->td_ucred); + mtx_unlock(&Giant); + td->td_ucred = NULL; #ifdef WITNESS if (witness_list(td)) { panic("system call %s returning with mutex(s) held\n", diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 6e5158694c5..40088482b31 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -266,6 +266,7 @@ struct thread { register_t td_retval[2]; /* (k) Syscall aux returns. */ #define td_endcopy td_pcb + struct ucred *td_ucred; /* (k) Reference to credentials. */ struct pcb *td_pcb; /* (k) Kernel VA of pcb and kstack. */ struct callout td_slpcallout; /* (h) Callout for sleep. */ struct trapframe *td_frame; /* (k) */