From 7ef5ed2bb1d91393b145c27d6ade971da2439980 Mon Sep 17 00:00:00 2001 From: Joseph Koshy Date: Sat, 27 Aug 2005 16:03:40 +0000 Subject: [PATCH] - Special-case NMI handling on the AMD64. On entry or exit from the kernel the 'alltraps' and 'doreti' code used taken by normal traps disables interrupts to protect the critical sections where it is setting up %gs. This protection is insufficient in the presence of NMIs since NMIs can be taken even when the processor has disabled normal interrupts. Thus the NMI handler needs to actually read MSR_GBASE on entry to the kernel to determine whether a swap of %gs using 'swapgs' is needed. However, reads of MSRs are expensive and integrating this check into the 'alltraps'/'doreti' path would penalize normal interrupts. - Teach DDB about the 'nmi_calltrap' symbol. Reviewed by: bde, peter (older versions of this change) --- sys/amd64/amd64/db_trace.c | 3 +- sys/amd64/amd64/exception.S | 78 ++++++++++++++++++++++++++++++++++++- sys/amd64/amd64/genassym.c | 2 + sys/amd64/amd64/trap.c | 3 +- 4 files changed, 82 insertions(+), 4 deletions(-) diff --git a/sys/amd64/amd64/db_trace.c b/sys/amd64/amd64/db_trace.c index 264daaa1d1f..afffe2d671b 100644 --- a/sys/amd64/amd64/db_trace.c +++ b/sys/amd64/amd64/db_trace.c @@ -317,7 +317,8 @@ db_nextframe(struct amd64_frame **fp, db_addr_t *ip, struct thread *td) db_symbol_values(sym, &name, NULL); if (name != NULL) { if (strcmp(name, "calltrap") == 0 || - strcmp(name, "fork_trampoline") == 0) + strcmp(name, "fork_trampoline") == 0 || + strcmp(name, "nmi_calltrap") == 0) frame_type = TRAP; else if (strncmp(name, "Xatpic_intr", 11) == 0 || strncmp(name, "Xatpic_fastintr", 15) == 0 || diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index 91610448d49..9b87cee04bc 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -93,8 +93,6 @@ IDTVEC(bpt) jmp alltraps IDTVEC(div) TRAP(T_DIVIDE) -IDTVEC(nmi) - TRAP_NOEN(T_NMI) IDTVEC(ofl) TRAP(T_OFLOW) IDTVEC(bnd) @@ -313,6 +311,82 @@ IDTVEC(fast_syscall) IDTVEC(fast_syscall32) sysret +/* + * NMI handling is special. + * + * First, NMIs do not respect the state of the processor's RFLAGS.IF + * bit and the NMI handler may be invoked at any time, including when + * the processor is in a critical section with RFLAGS.IF == 0. In + * particular, this means that the processor's GS.base values could be + * inconsistent on entry to the handler, and so we need to read + * MSR_GSBASE to determine if a 'swapgs' is needed. We use '%ebx', a + * C-preserved register, to remember whether to swap GS back on the + * exit path. + * + * Second, the processor treats NMIs specially, blocking further NMIs + * until an 'iretq' instruction is executed. We therefore need to + * execute the NMI handler with interrupts disabled to prevent a + * nested interrupt from executing an 'iretq' instruction and + * inadvertently taking the processor out of NMI mode. + */ + +IDTVEC(nmi) + subq $TF_RIP,%rsp + movq $(T_NMI),TF_TRAPNO(%rsp) + movq $0,TF_ADDR(%rsp) + movq $0,TF_ERR(%rsp) + movq %rdi,TF_RDI(%rsp) + movq %rsi,TF_RSI(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + movq %r8,TF_R8(%rsp) + movq %r9,TF_R9(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rbx,TF_RBX(%rsp) + movq %rbp,TF_RBP(%rsp) + movq %r10,TF_R10(%rsp) + movq %r11,TF_R11(%rsp) + movq %r12,TF_R12(%rsp) + movq %r13,TF_R13(%rsp) + movq %r14,TF_R14(%rsp) + movq %r15,TF_R15(%rsp) + xorl %ebx,%ebx + testb $SEL_RPL_MASK,TF_CS(%rsp) + jnz nmi_needswapgs /* we came from userland */ + movl $MSR_GSBASE,%ecx + rdmsr + cmpl $VM_MAXUSER_ADDRESS >> 32,%edx + jae nmi_calltrap /* GS.base holds a kernel VA */ +nmi_needswapgs: + incl %ebx + swapgs +/* Note: this label is also used by ddb and gdb: */ +nmi_calltrap: + FAKE_MCOUNT(TF_RIP(%rsp)) + call trap + MEXITCOUNT + testl %ebx,%ebx + jz nmi_restoreregs + swapgs +nmi_restoreregs: + movq TF_RDI(%rsp),%rdi + movq TF_RSI(%rsp),%rsi + movq TF_RDX(%rsp),%rdx + movq TF_RCX(%rsp),%rcx + movq TF_R8(%rsp),%r8 + movq TF_R9(%rsp),%r9 + movq TF_RAX(%rsp),%rax + movq TF_RBX(%rsp),%rbx + movq TF_RBP(%rsp),%rbp + movq TF_R10(%rsp),%r10 + movq TF_R11(%rsp),%r11 + movq TF_R12(%rsp),%r12 + movq TF_R13(%rsp),%r13 + movq TF_R14(%rsp),%r14 + movq TF_R15(%rsp),%r15 + addq $TF_RIP,%rsp + iretq + ENTRY(fork_trampoline) movq %r12, %rdi /* function */ movq %rbx, %rsi /* arg1 */ diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index 3a51e6f8bc9..6157348d0b1 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -209,3 +209,5 @@ ASSYM(SEL_RPL_MASK, SEL_RPL_MASK); ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock)); ASSYM(MTX_RECURSECNT, offsetof(struct mtx, mtx_recurse)); + +ASSYM(MSR_GSBASE, MSR_GSBASE); diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index cb79c58266e..c70dc0b73cb 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -210,7 +210,8 @@ trap(frame) printf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curproc->p_comm, type); - else if (type != T_BPTFLT && type != T_TRCTRAP) { + else if (type != T_NMI && type != T_BPTFLT && + type != T_TRCTRAP) { /* * XXX not quite right, since this may be for a * multiple fault in user mode.